Browse Source

combining source files works!

master
Holger Frey 8 years ago
parent
commit
09096dfee5
  1. 45
      array2xls/gui.py
  2. 9
      array2xls/inout.py
  3. 2
      array2xls/validators.py

45
array2xls/gui.py

@ -2,7 +2,7 @@ import os
import tkinter as tk import tkinter as tk
import tkinter.ttk as ttk import tkinter.ttk as ttk
from tkinter import filedialog from tkinter import filedialog
import pandas
import validators import validators
import inout import inout
@ -180,7 +180,7 @@ class Application(tk.Frame):
if state not in (APP_STATE_3, APP_STATE_4): if state not in (APP_STATE_3, APP_STATE_4):
return self.reset() return self.reset()
if state == APP_STATE_3: if state == APP_STATE_3:
text = self._convert_to_single_files() text = self._convert_to_separate_files()
else: else:
text = self._combine_data_files() text = self._combine_data_files()
self.status_panel.set_text(text + ' Done') self.status_panel.set_text(text + ' Done')
@ -197,12 +197,12 @@ class Application(tk.Frame):
else: else:
return APP_STATE_2 return APP_STATE_2
def _convert_to_single_files(self): def _convert_to_separate_files(self):
total = len(self.validated_files) total = len(self.validated_files)
text = 'Converting data file %d of %d to single excel file ...' text = 'Converting data file %d of %d to separate excel file ...'
all_fields = self.file_validator.id_fields + self.file_validator.data_fields
for i, data_file in enumerate(self.validated_files): for i, data_file in enumerate(self.validated_files):
self.status_panel.set_text(text % (i+1, total)) self.status_panel.set_text(text % (i+1, total))
all_fields = data_file.validator.id_fields + data_file.validator.data_fields
data_frame = inout.read_data_file(data_file, all_fields) data_frame = inout.read_data_file(data_file, all_fields)
data_frame = data_frame.set_index(data_file.validator.id_fields) data_frame = data_frame.set_index(data_file.validator.id_fields)
name, ext = os.path.splitext(data_file.path) name, ext = os.path.splitext(data_file.path)
@ -211,8 +211,33 @@ class Application(tk.Frame):
return text % (i+1, total) return text % (i+1, total)
def _combine_data_files(self): def _combine_data_files(self):
pass total = len(self.validated_files)
text = 'Combining data file %d of %d to single excel file ...'
data_fields = [self.file_validator.data_fields[i] for i in self.field_panel.listbox.curselection()]
selected_fields = self.file_validator.id_fields + data_fields
data_frame_list = []
for i, data_file in enumerate(self.validated_files):
self.status_panel.set_text(text % (i + 1, total))
data_frame = inout.read_data_file(data_file, selected_fields)
data_frame['File Name'] = os.path.basename(data_file.path)
data_frame_list.append(data_frame)
master_frame = pandas.concat(data_frame_list, ignore_index=True)
pivoted_df = master_frame.pivot(self.file_validator.id_fields[0],'File Name')
col_grouper = dict()
value_columns = []
for value_col, file_col in pivoted_df.columns:
if value_col not in col_grouper:
col_grouper[value_col] = []
value_columns.append(value_col)
col_grouper[value_col].append(file_col)
sorted_col_names = []
for value_col in value_columns:
sorted_file_cols = sorted(col_grouper[value_col], key=lambda x: inout.natural_sort(x))
sorted_col_names.extend([(value_col, file_col) for file_col in sorted_file_cols])
sorted_df = pivoted_df.reindex_axis(sorted_col_names, axis=1)
first_file = self.validated_files[0]
directory = os.path.dirname(first_file.path)
pseudo_extension = self.file_validator.extension.replace('.', '_')
xls_path = os.path.join(directory, 'combined' + pseudo_extension + '.xlsx')
inout.write_excel_file(xls_path, sorted_df)
return text % (i + 1, total)

9
array2xls/inout.py

@ -1,8 +1,15 @@
import os import os
import re
import pandas import pandas
import validators import validators
RE_NATURAL_SORT = re.compile('([0-9]+)')
convert = lambda text: int(text) if text.isdigit() else text.lower()
natural_sort = lambda item: [convert(c) for c in RE_NATURAL_SORT.split(item)]
def read_data_file(data_file, usecols): def read_data_file(data_file, usecols):
with open(data_file.path, 'r', encoding='UTF-8') as file_handle: with open(data_file.path, 'r', encoding='UTF-8') as file_handle:
return pandas.read_csv( return pandas.read_csv(
@ -11,4 +18,4 @@ def read_data_file(data_file, usecols):
def write_excel_file(path, data_frame): def write_excel_file(path, data_frame):
data_frame.to_excel(path) data_frame.to_excel(path)

2
array2xls/validators.py

@ -70,7 +70,7 @@ dat_validator = Validator(
['Dot_Number', 'Probe_Name', 'Gene_Name', 'Col', 'Row'], ['Dot_Number', 'Probe_Name', 'Gene_Name', 'Col', 'Row'],
['X[Pix]', 'Y[Pix]', 'DX[Pix]', 'DY[Pix]', 'Spot_Diameter', 'ROI_Width', 'ROI_Heigth', 'Pixels', 'Bkg', ['X[Pix]', 'Y[Pix]', 'DX[Pix]', 'DY[Pix]', 'Spot_Diameter', 'ROI_Width', 'ROI_Heigth', 'Pixels', 'Bkg',
'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD', 'Acc_Number', 'Proc_Control'], 'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD', 'Acc_Number', 'Proc_Control'],
['Bkg', 'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD'] ) ['Bkg', 'Net_Signal', 'Net_Integral'] )
csv_validator = Validator( csv_validator = Validator(
'Sensovation Data Files', '.csv', validate_csv, 'Sensovation Data Files', '.csv', validate_csv,

Loading…
Cancel
Save