Browse Source

single file conversion works

master
Holger Frey 8 years ago
parent
commit
7ee6ae13f5
  1. 24
      array2xls/gui.py
  2. 14
      array2xls/inout.py
  3. 29
      array2xls/validators.py

24
array2xls/gui.py

@ -1,8 +1,11 @@
import os
import tkinter as tk import tkinter as tk
import tkinter.ttk as ttk import tkinter.ttk as ttk
from tkinter import filedialog from tkinter import filedialog
import validators import validators
import inout
APP_STATE_1 = 'no valid files selected' APP_STATE_1 = 'no valid files selected'
APP_STATE_2 = 'no valid fields selected' APP_STATE_2 = 'no valid fields selected'
@ -176,15 +179,10 @@ class Application(tk.Frame):
state = self._state state = self._state
if state not in (APP_STATE_3, APP_STATE_4): if state not in (APP_STATE_3, APP_STATE_4):
return self.reset() return self.reset()
i = len(self.validated_files)
if state == APP_STATE_3: if state == APP_STATE_3:
text = 'Converting %d data files to %d excel files ...' % (i, i) text = self._convert_to_single_files()
self.status_panel.set_text(text)
self._convert_to_single_files()
else: else:
text = 'Combining %d data files into one excel file ...' % i text = self._combine_data_files()
self.status_panel.set_text(text)
self._combine_data_files()
self.status_panel.set_text(text + ' Done') self.status_panel.set_text(text + ' Done')
@property @property
@ -200,7 +198,17 @@ class Application(tk.Frame):
return APP_STATE_2 return APP_STATE_2
def _convert_to_single_files(self): def _convert_to_single_files(self):
pass total = len(self.validated_files)
text = 'Converting data file %d of %d to single excel file ...'
for i, data_file in enumerate(self.validated_files):
self.status_panel.set_text(text % (i+1, total))
all_fields = data_file.validator.id_fields + data_file.validator.data_fields
data_frame = inout.read_data_file(data_file, all_fields)
data_frame = data_frame.set_index(data_file.validator.id_fields)
name, ext = os.path.splitext(data_file.path)
path = name + ext.replace('.', '_') + '.xlsx'
inout.write_excel_file(path, data_frame)
return text % (i+1, total)
def _combine_data_files(self): def _combine_data_files(self):
pass pass

14
array2xls/inout.py

@ -0,0 +1,14 @@
import os
import pandas
import validators
def read_data_file(data_file, usecols):
with open(data_file.path, 'r', encoding='UTF-8') as file_handle:
return pandas.read_csv(
file_handle, sep='\t', decimal=data_file.separator, header=0,
index_col=False, skiprows=data_file.skip, usecols=usecols)
def write_excel_file(path, data_frame):
data_frame.to_excel(path)

29
array2xls/validators.py

@ -6,43 +6,44 @@ class ValidationError(ValueError):
pass pass
Validator = namedtuple('Validator', 'type extension validate id_fields data_fields defaults') Validator = namedtuple('Validator', 'type extension validate id_fields data_fields defaults')
DataFile = namedtuple('DataFile', 'path separator') DataFile = namedtuple('DataFile', 'path separator skip validator')
def validate_stx(lines): def validate_stx(lines):
iterator = iter(lines) iterator = enumerate(lines)
line = next(iterator) _, line = next(iterator)
if not line.startswith('Report_Format:\t2'): if not line.startswith('Report_Format:\t2'):
raise ValidationError('1 Unsupported File;' + line) raise ValidationError('1 Unsupported File;' + line)
for line in iterator: for i, line in iterator:
if line.startswith('Probe_Name\t'): if line.startswith('Probe_Name\t'):
break break
else: else:
raise ValidationError('1 Unsupported File') raise ValidationError('1 Unsupported File')
try: try:
line = next(iterator) _, line = next(iterator)
_, numeric_data = line.split('\t', 1) _, numeric_data = line.split('\t', 1)
except (ValueError, StopIteration): except (ValueError, StopIteration):
raise ValidationError('No Data Present') raise ValidationError('No Data Present')
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.'
return separator return separator, i
def validate_dat(lines): def validate_dat(lines):
iterator = iter(lines) iterator = enumerate(lines)
if not next(iterator).startswith('Report_Format:\t2'): _, line = next(iterator)
if not line.startswith('Report_Format:\t2'):
raise ValidationError('Unsupported File') raise ValidationError('Unsupported File')
for line in iterator: for i, line in iterator:
if line.startswith('Dot_Number\t'): if line.startswith('Dot_Number\t'):
break break
else: else:
raise ValidationError('Unsupported File') raise ValidationError('Unsupported File')
try: try:
line = next(iterator) _, line = next(iterator)
_, _, _, numeric_data = line.split('\t', 3) _, _, _, numeric_data = line.split('\t', 3)
except (ValueError, StopIteration): except (ValueError, StopIteration):
raise ValidationError('No Data Present') raise ValidationError('No Data Present')
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.'
return separator return separator, i
def validate_csv(lines): def validate_csv(lines):
@ -55,7 +56,7 @@ def validate_csv(lines):
except (ValueError, StopIteration): except (ValueError, StopIteration):
raise ValidationError('No Data Present') raise ValidationError('No Data Present')
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.'
return separator return separator, 0
stx_validator = Validator( stx_validator = Validator(
@ -96,7 +97,7 @@ def validate_files(unvalidated, selected_validator):
for file_path in unvalidated: for file_path in unvalidated:
try: try:
with open(file_path, mode='r', encoding='utf-8') as file_handle: with open(file_path, mode='r', encoding='utf-8') as file_handle:
separator = selected_validator.validate(file_handle) separator, skip = selected_validator.validate(file_handle)
yield DataFile(file_path, separator) yield DataFile(file_path, separator, skip, selected_validator)
except (IOError, UnicodeError, ValidationError) as e: except (IOError, UnicodeError, ValidationError) as e:
print(e) print(e)

Loading…
Cancel
Save