Browse Source

single file conversion works

master
Holger Frey 8 years ago
parent
commit
7ee6ae13f5
  1. 24
      array2xls/gui.py
  2. 14
      array2xls/inout.py
  3. 29
      array2xls/validators.py

24
array2xls/gui.py

@ -1,8 +1,11 @@ @@ -1,8 +1,11 @@
import os
import tkinter as tk
import tkinter.ttk as ttk
from tkinter import filedialog
import validators
import inout
APP_STATE_1 = 'no valid files selected'
APP_STATE_2 = 'no valid fields selected'
@ -176,15 +179,10 @@ class Application(tk.Frame): @@ -176,15 +179,10 @@ class Application(tk.Frame):
state = self._state
if state not in (APP_STATE_3, APP_STATE_4):
return self.reset()
i = len(self.validated_files)
if state == APP_STATE_3:
text = 'Converting %d data files to %d excel files ...' % (i, i)
self.status_panel.set_text(text)
self._convert_to_single_files()
text = self._convert_to_single_files()
else:
text = 'Combining %d data files into one excel file ...' % i
self.status_panel.set_text(text)
self._combine_data_files()
text = self._combine_data_files()
self.status_panel.set_text(text + ' Done')
@property
@ -200,7 +198,17 @@ class Application(tk.Frame): @@ -200,7 +198,17 @@ class Application(tk.Frame):
return APP_STATE_2
def _convert_to_single_files(self):
pass
total = len(self.validated_files)
text = 'Converting data file %d of %d to single excel file ...'
for i, data_file in enumerate(self.validated_files):
self.status_panel.set_text(text % (i+1, total))
all_fields = data_file.validator.id_fields + data_file.validator.data_fields
data_frame = inout.read_data_file(data_file, all_fields)
data_frame = data_frame.set_index(data_file.validator.id_fields)
name, ext = os.path.splitext(data_file.path)
path = name + ext.replace('.', '_') + '.xlsx'
inout.write_excel_file(path, data_frame)
return text % (i+1, total)
def _combine_data_files(self):
pass

14
array2xls/inout.py

@ -0,0 +1,14 @@ @@ -0,0 +1,14 @@
import os
import pandas
import validators
def read_data_file(data_file, usecols):
with open(data_file.path, 'r', encoding='UTF-8') as file_handle:
return pandas.read_csv(
file_handle, sep='\t', decimal=data_file.separator, header=0,
index_col=False, skiprows=data_file.skip, usecols=usecols)
def write_excel_file(path, data_frame):
data_frame.to_excel(path)

29
array2xls/validators.py

@ -6,43 +6,44 @@ class ValidationError(ValueError): @@ -6,43 +6,44 @@ class ValidationError(ValueError):
pass
Validator = namedtuple('Validator', 'type extension validate id_fields data_fields defaults')
DataFile = namedtuple('DataFile', 'path separator')
DataFile = namedtuple('DataFile', 'path separator skip validator')
def validate_stx(lines):
iterator = iter(lines)
line = next(iterator)
iterator = enumerate(lines)
_, line = next(iterator)
if not line.startswith('Report_Format:\t2'):
raise ValidationError('1 Unsupported File;' + line)
for line in iterator:
for i, line in iterator:
if line.startswith('Probe_Name\t'):
break
else:
raise ValidationError('1 Unsupported File')
try:
line = next(iterator)
_, line = next(iterator)
_, numeric_data = line.split('\t', 1)
except (ValueError, StopIteration):
raise ValidationError('No Data Present')
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.'
return separator
return separator, i
def validate_dat(lines):
iterator = iter(lines)
if not next(iterator).startswith('Report_Format:\t2'):
iterator = enumerate(lines)
_, line = next(iterator)
if not line.startswith('Report_Format:\t2'):
raise ValidationError('Unsupported File')
for line in iterator:
for i, line in iterator:
if line.startswith('Dot_Number\t'):
break
else:
raise ValidationError('Unsupported File')
try:
line = next(iterator)
_, line = next(iterator)
_, _, _, numeric_data = line.split('\t', 3)
except (ValueError, StopIteration):
raise ValidationError('No Data Present')
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.'
return separator
return separator, i
def validate_csv(lines):
@ -55,7 +56,7 @@ def validate_csv(lines): @@ -55,7 +56,7 @@ def validate_csv(lines):
except (ValueError, StopIteration):
raise ValidationError('No Data Present')
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.'
return separator
return separator, 0
stx_validator = Validator(
@ -96,7 +97,7 @@ def validate_files(unvalidated, selected_validator): @@ -96,7 +97,7 @@ def validate_files(unvalidated, selected_validator):
for file_path in unvalidated:
try:
with open(file_path, mode='r', encoding='utf-8') as file_handle:
separator = selected_validator.validate(file_handle)
yield DataFile(file_path, separator)
separator, skip = selected_validator.validate(file_handle)
yield DataFile(file_path, separator, skip, selected_validator)
except (IOError, UnicodeError, ValidationError) as e:
print(e)

Loading…
Cancel
Save