Convert Microarray Data to Excel Files
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

244 lines
9.1 KiB

import os
import tkinter as tk
import tkinter.ttk as ttk
from tkinter import filedialog
import pandas
import validators
import inout
APP_STATE_1 = 'no valid files selected'
APP_STATE_2 = 'no valid fields selected'
APP_STATE_3 = 'ok, single files'
APP_STATE_4 = 'ok, multiple files'
class StatusPanel(tk.Frame):
def __init__(self, parent):
tk.Frame.__init__(self, parent.master)
self._parent = parent
self.label = tk.Label(self, bd=1, relief=tk.SUNKEN, anchor=tk.S)
self.label.pack(fill=tk.X)
self.pack(side=tk.BOTTOM, fill=tk.X)
def set_text(self, text):
self.label.config(text=text)
self.label.update_idletasks()
def clear_text(self):
self.set_text('')
class FilePanel(tk.Frame):
def __init__(self, parent):
tk.Frame.__init__(self, parent.master)
self._parent = parent
self.btn_files = tk.Button(self, text="Select Files", command=self._parent.select_files)
self.btn_files.pack(pady=5, padx=5)
self.label = tk.Label(self, anchor=tk.SW)
self.label.pack(fill=tk.X, pady=5, padx=5)
ttk.Separator(self, orient=tk.HORIZONTAL).pack(side=tk.BOTTOM, fill=tk.X)
self.pack(side=tk.TOP, fill=tk.X)
def set_text(self, text):
self.label.config(text=text)
self.label.update_idletasks()
def clear_text(self):
self.set_text('')
class FieldPanel(tk.Frame):
def __init__(self, parent):
tk.Frame.__init__(self, parent.master)
self._parent = parent
self.is_single = tk.BooleanVar()
self.is_single.set(True)
self.rbtn_single = tk.Radiobutton(
self, text='single excel files', variable=self.is_single, value=True, command=self.disable_listbox)
self.rbtn_combine = tk.Radiobutton(
self, text='one combined excel file', variable=self.is_single, value=False, command=self.enable_listbox)
self.rbtn_single.pack(anchor=tk.W)
self.rbtn_combine.pack(anchor=tk.W)
self.listbox = tk.Listbox(self, selectmode=tk.MULTIPLE, activestyle='none', height=19)
self.listbox.pack(fill=tk.X, ipady=5, ipadx=5, )
self.listbox.bind('<<ListboxSelect>>', self._parent.toggle_run_button)
self.pack(side=tk.TOP, fill=tk.X)
def disable_radio_btn(self):
self.rbtn_single.config(state="disabled")
self.rbtn_combine.config(state="disabled")
def disable_listbox(self):
self.listbox.config(state="disabled")
self._parent.toggle_run_button()
def disable(self):
self.disable_listbox()
self.disable_radio_btn()
def enable_radio_btn(self):
self.rbtn_single.config(state="normal")
self.rbtn_combine.config(state="normal")
self.is_single.set(True)
def enable_listbox(self):
self.listbox.config(state="normal")
self._parent.toggle_run_button()
def enable(self):
self.enable_listbox()
self.enable_radio_btn()
def set_listbox_content(self, choices, selection):
self.enable_listbox()
current = self.listbox.size()
self.listbox.selection_clear(0, current)
self.listbox.delete(0,current)
for i, element in enumerate(choices):
self.listbox.insert(tk.END, element)
if element in selection:
self.listbox.selection_set(i)
self.disable_listbox()
class ActionPanel(tk.Frame):
def __init__(self, parent):
tk.Frame.__init__(self, parent.master)
self._parent = parent
self.btn_go = tk.Button(self, text="GO!", command=self._parent.convert_files)
self.btn_go.pack(side=tk.LEFT, pady=5, padx=5)
self.btn_quit = tk.Button(self, text="Quit", command=self._parent.quit)
self.btn_quit.pack(side=tk.RIGHT, pady=5, padx=5)
self.pack(side=tk.BOTTOM, fill=tk.X)
def disable(self):
self.btn_go.config(state='disabled')
def enable(self):
self.btn_go.config(state='active')
class Application(tk.Frame):
def __init__(self, master):
master.minsize(height=330, width=300)
tk.Frame.__init__(self, master)
self._master = master
self.pack(fill=tk.BOTH)
self.file_validator = None
self.validated_files = None
self.status_panel = StatusPanel(self)
self.file_panel = FilePanel(self)
self.field_panel = FieldPanel(self)
self.action_panel = ActionPanel(self)
self.reset()
def reset(self):
self.status_panel.clear_text()
self.field_panel.disable()
self.action_panel.disable()
self.file_validator = None
self.validated_files = None
def select_files(self):
self.reset()
self.file_panel.clear_text()
opts = {
'initialdir': '~/_signalyse test',
'filetypes': [('Signalyse Statistics', '.stx'), ('Signalyse Data', '.dat'), ('Sensovation Data', '.csv')],
'multiple': True}
file_selection = tk.filedialog.askopenfilename(**opts)
if not isinstance(file_selection, tuple) or len(file_selection) == 0:
return
self.file_validator = validators.guess_validator(file_selection)
if self.file_validator is None:
return
self.validated_files = [f for f in validators.validate_files(file_selection, self.file_validator)]
status = '%d of %d valid %s files found' % (len(self.validated_files), len(file_selection), self.file_validator.type)
self.file_panel.set_text(status)
if self._state == APP_STATE_1:
return self.reset()
self.field_panel.enable_radio_btn()
self.field_panel.set_listbox_content(self.file_validator.data_fields, self.file_validator.defaults)
self.toggle_run_button()
def toggle_run_button(self, event=None):
self.action_panel.disable()
if self._state in (APP_STATE_3, APP_STATE_4):
self.action_panel.enable()
def quit(self):
self._master.destroy()
def convert_files(self):
state = self._state
if state not in (APP_STATE_3, APP_STATE_4):
return self.reset()
if state == APP_STATE_3:
text = self._convert_to_separate_files()
else:
text = self._combine_data_files()
self.status_panel.set_text(text + ' Done')
@property
def _state(self):
c = self.field_panel.rbtn_single.config()
if not self.validated_files or c['state'] == 'disabled':
return APP_STATE_1
elif self.field_panel.is_single.get():
return APP_STATE_3
elif self.field_panel.listbox.curselection():
return APP_STATE_4
else:
return APP_STATE_2
def _convert_to_separate_files(self):
total = len(self.validated_files)
text = 'Converting data file %d of %d to separate excel file ...'
all_fields = self.file_validator.id_fields + self.file_validator.data_fields
for i, data_file in enumerate(self.validated_files):
self.status_panel.set_text(text % (i+1, total))
data_frame = inout.read_data_file(data_file, all_fields)
data_frame = data_frame.set_index(data_file.validator.id_fields)
name, ext = os.path.splitext(data_file.path)
path = name + ext.replace('.', '_') + '.xlsx'
inout.write_excel_file(path, data_frame)
return text % (i+1, total)
def _combine_data_files(self):
total = len(self.validated_files)
text = 'Combining data file %d of %d to single excel file ...'
data_fields = [self.file_validator.data_fields[i] for i in self.field_panel.listbox.curselection()]
selected_fields = self.file_validator.id_fields + data_fields
data_frame_list = []
for i, data_file in enumerate(self.validated_files):
self.status_panel.set_text(text % (i + 1, total))
data_frame = inout.read_data_file(data_file, selected_fields)
data_frame['File Name'] = os.path.basename(data_file.path)
data_frame_list.append(data_frame)
master_frame = pandas.concat(data_frame_list, ignore_index=True)
pivoted_df = master_frame.pivot(self.file_validator.id_fields[0],'File Name')
col_grouper = dict()
value_columns = []
for value_col, file_col in pivoted_df.columns:
if value_col not in col_grouper:
col_grouper[value_col] = []
value_columns.append(value_col)
col_grouper[value_col].append(file_col)
sorted_col_names = []
for value_col in value_columns:
sorted_file_cols = sorted(col_grouper[value_col], key=lambda x: inout.natural_sort(x))
sorted_col_names.extend([(value_col, file_col) for file_col in sorted_file_cols])
sorted_df = pivoted_df.reindex_axis(sorted_col_names, axis=1)
first_file = self.validated_files[0]
directory = os.path.dirname(first_file.path)
pseudo_extension = self.file_validator.extension.replace('.', '_')
xls_path = os.path.join(directory, 'combined' + pseudo_extension + '.xlsx')
inout.write_excel_file(xls_path, sorted_df)
return text % (i + 1, total)