diff --git a/array2xls/gui.py b/array2xls/gui.py index d7d444d..9efb7be 100644 --- a/array2xls/gui.py +++ b/array2xls/gui.py @@ -2,7 +2,7 @@ import os import tkinter as tk import tkinter.ttk as ttk from tkinter import filedialog - +import pandas import validators import inout @@ -180,7 +180,7 @@ class Application(tk.Frame): if state not in (APP_STATE_3, APP_STATE_4): return self.reset() if state == APP_STATE_3: - text = self._convert_to_single_files() + text = self._convert_to_separate_files() else: text = self._combine_data_files() self.status_panel.set_text(text + ' Done') @@ -197,12 +197,12 @@ class Application(tk.Frame): else: return APP_STATE_2 - def _convert_to_single_files(self): + def _convert_to_separate_files(self): total = len(self.validated_files) - text = 'Converting data file %d of %d to single excel file ...' + text = 'Converting data file %d of %d to separate excel file ...' + all_fields = self.file_validator.id_fields + self.file_validator.data_fields for i, data_file in enumerate(self.validated_files): self.status_panel.set_text(text % (i+1, total)) - all_fields = data_file.validator.id_fields + data_file.validator.data_fields data_frame = inout.read_data_file(data_file, all_fields) data_frame = data_frame.set_index(data_file.validator.id_fields) name, ext = os.path.splitext(data_file.path) @@ -211,8 +211,33 @@ class Application(tk.Frame): return text % (i+1, total) def _combine_data_files(self): - pass - - - - + total = len(self.validated_files) + text = 'Combining data file %d of %d to single excel file ...' + data_fields = [self.file_validator.data_fields[i] for i in self.field_panel.listbox.curselection()] + selected_fields = self.file_validator.id_fields + data_fields + data_frame_list = [] + for i, data_file in enumerate(self.validated_files): + self.status_panel.set_text(text % (i + 1, total)) + data_frame = inout.read_data_file(data_file, selected_fields) + data_frame['File Name'] = os.path.basename(data_file.path) + data_frame_list.append(data_frame) + master_frame = pandas.concat(data_frame_list, ignore_index=True) + pivoted_df = master_frame.pivot(self.file_validator.id_fields[0],'File Name') + col_grouper = dict() + value_columns = [] + for value_col, file_col in pivoted_df.columns: + if value_col not in col_grouper: + col_grouper[value_col] = [] + value_columns.append(value_col) + col_grouper[value_col].append(file_col) + sorted_col_names = [] + for value_col in value_columns: + sorted_file_cols = sorted(col_grouper[value_col], key=lambda x: inout.natural_sort(x)) + sorted_col_names.extend([(value_col, file_col) for file_col in sorted_file_cols]) + sorted_df = pivoted_df.reindex_axis(sorted_col_names, axis=1) + first_file = self.validated_files[0] + directory = os.path.dirname(first_file.path) + pseudo_extension = self.file_validator.extension.replace('.', '_') + xls_path = os.path.join(directory, 'combined' + pseudo_extension + '.xlsx') + inout.write_excel_file(xls_path, sorted_df) + return text % (i + 1, total) diff --git a/array2xls/inout.py b/array2xls/inout.py index ef7f00b..1e914ea 100644 --- a/array2xls/inout.py +++ b/array2xls/inout.py @@ -1,8 +1,15 @@ import os +import re import pandas import validators + +RE_NATURAL_SORT = re.compile('([0-9]+)') +convert = lambda text: int(text) if text.isdigit() else text.lower() +natural_sort = lambda item: [convert(c) for c in RE_NATURAL_SORT.split(item)] + + def read_data_file(data_file, usecols): with open(data_file.path, 'r', encoding='UTF-8') as file_handle: return pandas.read_csv( @@ -11,4 +18,4 @@ def read_data_file(data_file, usecols): def write_excel_file(path, data_frame): - data_frame.to_excel(path) \ No newline at end of file + data_frame.to_excel(path) diff --git a/array2xls/validators.py b/array2xls/validators.py index 6bd96a9..ecf2268 100644 --- a/array2xls/validators.py +++ b/array2xls/validators.py @@ -70,7 +70,7 @@ dat_validator = Validator( ['Dot_Number', 'Probe_Name', 'Gene_Name', 'Col', 'Row'], ['X[Pix]', 'Y[Pix]', 'DX[Pix]', 'DY[Pix]', 'Spot_Diameter', 'ROI_Width', 'ROI_Heigth', 'Pixels', 'Bkg', 'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD', 'Acc_Number', 'Proc_Control'], - ['Bkg', 'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD'] ) + ['Bkg', 'Net_Signal', 'Net_Integral'] ) csv_validator = Validator( 'Sensovation Data Files', '.csv', validate_csv,