From 7ee6ae13f50dae1af55b713976856bd1be2c210b Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Tue, 19 Jul 2016 18:02:58 +0200 Subject: [PATCH] single file conversion works --- array2xls/gui.py | 24 ++++++++++++++++-------- array2xls/inout.py | 14 ++++++++++++++ array2xls/validators.py | 29 +++++++++++++++-------------- 3 files changed, 45 insertions(+), 22 deletions(-) create mode 100644 array2xls/inout.py diff --git a/array2xls/gui.py b/array2xls/gui.py index c2e0bb5..d7d444d 100644 --- a/array2xls/gui.py +++ b/array2xls/gui.py @@ -1,8 +1,11 @@ +import os import tkinter as tk import tkinter.ttk as ttk from tkinter import filedialog + import validators +import inout APP_STATE_1 = 'no valid files selected' APP_STATE_2 = 'no valid fields selected' @@ -176,15 +179,10 @@ class Application(tk.Frame): state = self._state if state not in (APP_STATE_3, APP_STATE_4): return self.reset() - i = len(self.validated_files) if state == APP_STATE_3: - text = 'Converting %d data files to %d excel files ...' % (i, i) - self.status_panel.set_text(text) - self._convert_to_single_files() + text = self._convert_to_single_files() else: - text = 'Combining %d data files into one excel file ...' % i - self.status_panel.set_text(text) - self._combine_data_files() + text = self._combine_data_files() self.status_panel.set_text(text + ' Done') @property @@ -200,7 +198,17 @@ class Application(tk.Frame): return APP_STATE_2 def _convert_to_single_files(self): - pass + total = len(self.validated_files) + text = 'Converting data file %d of %d to single excel file ...' + for i, data_file in enumerate(self.validated_files): + self.status_panel.set_text(text % (i+1, total)) + all_fields = data_file.validator.id_fields + data_file.validator.data_fields + data_frame = inout.read_data_file(data_file, all_fields) + data_frame = data_frame.set_index(data_file.validator.id_fields) + name, ext = os.path.splitext(data_file.path) + path = name + ext.replace('.', '_') + '.xlsx' + inout.write_excel_file(path, data_frame) + return text % (i+1, total) def _combine_data_files(self): pass diff --git a/array2xls/inout.py b/array2xls/inout.py new file mode 100644 index 0000000..ef7f00b --- /dev/null +++ b/array2xls/inout.py @@ -0,0 +1,14 @@ +import os +import pandas + +import validators + +def read_data_file(data_file, usecols): + with open(data_file.path, 'r', encoding='UTF-8') as file_handle: + return pandas.read_csv( + file_handle, sep='\t', decimal=data_file.separator, header=0, + index_col=False, skiprows=data_file.skip, usecols=usecols) + + +def write_excel_file(path, data_frame): + data_frame.to_excel(path) \ No newline at end of file diff --git a/array2xls/validators.py b/array2xls/validators.py index 807046a..6bd96a9 100644 --- a/array2xls/validators.py +++ b/array2xls/validators.py @@ -6,43 +6,44 @@ class ValidationError(ValueError): pass Validator = namedtuple('Validator', 'type extension validate id_fields data_fields defaults') -DataFile = namedtuple('DataFile', 'path separator') +DataFile = namedtuple('DataFile', 'path separator skip validator') def validate_stx(lines): - iterator = iter(lines) - line = next(iterator) + iterator = enumerate(lines) + _, line = next(iterator) if not line.startswith('Report_Format:\t2'): raise ValidationError('1 Unsupported File;' + line) - for line in iterator: + for i, line in iterator: if line.startswith('Probe_Name\t'): break else: raise ValidationError('1 Unsupported File') try: - line = next(iterator) + _, line = next(iterator) _, numeric_data = line.split('\t', 1) except (ValueError, StopIteration): raise ValidationError('No Data Present') separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' - return separator + return separator, i def validate_dat(lines): - iterator = iter(lines) - if not next(iterator).startswith('Report_Format:\t2'): + iterator = enumerate(lines) + _, line = next(iterator) + if not line.startswith('Report_Format:\t2'): raise ValidationError('Unsupported File') - for line in iterator: + for i, line in iterator: if line.startswith('Dot_Number\t'): break else: raise ValidationError('Unsupported File') try: - line = next(iterator) + _, line = next(iterator) _, _, _, numeric_data = line.split('\t', 3) except (ValueError, StopIteration): raise ValidationError('No Data Present') separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' - return separator + return separator, i def validate_csv(lines): @@ -55,7 +56,7 @@ def validate_csv(lines): except (ValueError, StopIteration): raise ValidationError('No Data Present') separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' - return separator + return separator, 0 stx_validator = Validator( @@ -96,7 +97,7 @@ def validate_files(unvalidated, selected_validator): for file_path in unvalidated: try: with open(file_path, mode='r', encoding='utf-8') as file_handle: - separator = selected_validator.validate(file_handle) - yield DataFile(file_path, separator) + separator, skip = selected_validator.validate(file_handle) + yield DataFile(file_path, separator, skip, selected_validator) except (IOError, UnicodeError, ValidationError) as e: print(e)