Browse Source

combining source files works!

master
Holger Frey 8 years ago
parent
commit
09096dfee5
  1. 45
      array2xls/gui.py
  2. 9
      array2xls/inout.py
  3. 2
      array2xls/validators.py

45
array2xls/gui.py

@ -2,7 +2,7 @@ import os @@ -2,7 +2,7 @@ import os
import tkinter as tk
import tkinter.ttk as ttk
from tkinter import filedialog
import pandas
import validators
import inout
@ -180,7 +180,7 @@ class Application(tk.Frame): @@ -180,7 +180,7 @@ class Application(tk.Frame):
if state not in (APP_STATE_3, APP_STATE_4):
return self.reset()
if state == APP_STATE_3:
text = self._convert_to_single_files()
text = self._convert_to_separate_files()
else:
text = self._combine_data_files()
self.status_panel.set_text(text + ' Done')
@ -197,12 +197,12 @@ class Application(tk.Frame): @@ -197,12 +197,12 @@ class Application(tk.Frame):
else:
return APP_STATE_2
def _convert_to_single_files(self):
def _convert_to_separate_files(self):
total = len(self.validated_files)
text = 'Converting data file %d of %d to single excel file ...'
text = 'Converting data file %d of %d to separate excel file ...'
all_fields = self.file_validator.id_fields + self.file_validator.data_fields
for i, data_file in enumerate(self.validated_files):
self.status_panel.set_text(text % (i+1, total))
all_fields = data_file.validator.id_fields + data_file.validator.data_fields
data_frame = inout.read_data_file(data_file, all_fields)
data_frame = data_frame.set_index(data_file.validator.id_fields)
name, ext = os.path.splitext(data_file.path)
@ -211,8 +211,33 @@ class Application(tk.Frame): @@ -211,8 +211,33 @@ class Application(tk.Frame):
return text % (i+1, total)
def _combine_data_files(self):
pass
total = len(self.validated_files)
text = 'Combining data file %d of %d to single excel file ...'
data_fields = [self.file_validator.data_fields[i] for i in self.field_panel.listbox.curselection()]
selected_fields = self.file_validator.id_fields + data_fields
data_frame_list = []
for i, data_file in enumerate(self.validated_files):
self.status_panel.set_text(text % (i + 1, total))
data_frame = inout.read_data_file(data_file, selected_fields)
data_frame['File Name'] = os.path.basename(data_file.path)
data_frame_list.append(data_frame)
master_frame = pandas.concat(data_frame_list, ignore_index=True)
pivoted_df = master_frame.pivot(self.file_validator.id_fields[0],'File Name')
col_grouper = dict()
value_columns = []
for value_col, file_col in pivoted_df.columns:
if value_col not in col_grouper:
col_grouper[value_col] = []
value_columns.append(value_col)
col_grouper[value_col].append(file_col)
sorted_col_names = []
for value_col in value_columns:
sorted_file_cols = sorted(col_grouper[value_col], key=lambda x: inout.natural_sort(x))
sorted_col_names.extend([(value_col, file_col) for file_col in sorted_file_cols])
sorted_df = pivoted_df.reindex_axis(sorted_col_names, axis=1)
first_file = self.validated_files[0]
directory = os.path.dirname(first_file.path)
pseudo_extension = self.file_validator.extension.replace('.', '_')
xls_path = os.path.join(directory, 'combined' + pseudo_extension + '.xlsx')
inout.write_excel_file(xls_path, sorted_df)
return text % (i + 1, total)

9
array2xls/inout.py

@ -1,8 +1,15 @@ @@ -1,8 +1,15 @@
import os
import re
import pandas
import validators
RE_NATURAL_SORT = re.compile('([0-9]+)')
convert = lambda text: int(text) if text.isdigit() else text.lower()
natural_sort = lambda item: [convert(c) for c in RE_NATURAL_SORT.split(item)]
def read_data_file(data_file, usecols):
with open(data_file.path, 'r', encoding='UTF-8') as file_handle:
return pandas.read_csv(
@ -11,4 +18,4 @@ def read_data_file(data_file, usecols): @@ -11,4 +18,4 @@ def read_data_file(data_file, usecols):
def write_excel_file(path, data_frame):
data_frame.to_excel(path)
data_frame.to_excel(path)

2
array2xls/validators.py

@ -70,7 +70,7 @@ dat_validator = Validator( @@ -70,7 +70,7 @@ dat_validator = Validator(
['Dot_Number', 'Probe_Name', 'Gene_Name', 'Col', 'Row'],
['X[Pix]', 'Y[Pix]', 'DX[Pix]', 'DY[Pix]', 'Spot_Diameter', 'ROI_Width', 'ROI_Heigth', 'Pixels', 'Bkg',
'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD', 'Acc_Number', 'Proc_Control'],
['Bkg', 'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD'] )
['Bkg', 'Net_Signal', 'Net_Integral'] )
csv_validator = Validator(
'Sensovation Data Files', '.csv', validate_csv,

Loading…
Cancel
Save