|
|
|
@ -6,43 +6,44 @@ class ValidationError(ValueError):
@@ -6,43 +6,44 @@ class ValidationError(ValueError):
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
Validator = namedtuple('Validator', 'type extension validate id_fields data_fields defaults') |
|
|
|
|
DataFile = namedtuple('DataFile', 'path separator') |
|
|
|
|
DataFile = namedtuple('DataFile', 'path separator skip validator') |
|
|
|
|
|
|
|
|
|
def validate_stx(lines): |
|
|
|
|
iterator = iter(lines) |
|
|
|
|
line = next(iterator) |
|
|
|
|
iterator = enumerate(lines) |
|
|
|
|
_, line = next(iterator) |
|
|
|
|
if not line.startswith('Report_Format:\t2'): |
|
|
|
|
raise ValidationError('1 Unsupported File;' + line) |
|
|
|
|
for line in iterator: |
|
|
|
|
for i, line in iterator: |
|
|
|
|
if line.startswith('Probe_Name\t'): |
|
|
|
|
break |
|
|
|
|
else: |
|
|
|
|
raise ValidationError('1 Unsupported File') |
|
|
|
|
try: |
|
|
|
|
line = next(iterator) |
|
|
|
|
_, line = next(iterator) |
|
|
|
|
_, numeric_data = line.split('\t', 1) |
|
|
|
|
except (ValueError, StopIteration): |
|
|
|
|
raise ValidationError('No Data Present') |
|
|
|
|
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' |
|
|
|
|
return separator |
|
|
|
|
return separator, i |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def validate_dat(lines): |
|
|
|
|
iterator = iter(lines) |
|
|
|
|
if not next(iterator).startswith('Report_Format:\t2'): |
|
|
|
|
iterator = enumerate(lines) |
|
|
|
|
_, line = next(iterator) |
|
|
|
|
if not line.startswith('Report_Format:\t2'): |
|
|
|
|
raise ValidationError('Unsupported File') |
|
|
|
|
for line in iterator: |
|
|
|
|
for i, line in iterator: |
|
|
|
|
if line.startswith('Dot_Number\t'): |
|
|
|
|
break |
|
|
|
|
else: |
|
|
|
|
raise ValidationError('Unsupported File') |
|
|
|
|
try: |
|
|
|
|
line = next(iterator) |
|
|
|
|
_, line = next(iterator) |
|
|
|
|
_, _, _, numeric_data = line.split('\t', 3) |
|
|
|
|
except (ValueError, StopIteration): |
|
|
|
|
raise ValidationError('No Data Present') |
|
|
|
|
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' |
|
|
|
|
return separator |
|
|
|
|
return separator, i |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def validate_csv(lines): |
|
|
|
@ -55,7 +56,7 @@ def validate_csv(lines):
@@ -55,7 +56,7 @@ def validate_csv(lines):
|
|
|
|
|
except (ValueError, StopIteration): |
|
|
|
|
raise ValidationError('No Data Present') |
|
|
|
|
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' |
|
|
|
|
return separator |
|
|
|
|
return separator, 0 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
stx_validator = Validator( |
|
|
|
@ -96,7 +97,7 @@ def validate_files(unvalidated, selected_validator):
@@ -96,7 +97,7 @@ def validate_files(unvalidated, selected_validator):
|
|
|
|
|
for file_path in unvalidated: |
|
|
|
|
try: |
|
|
|
|
with open(file_path, mode='r', encoding='utf-8') as file_handle: |
|
|
|
|
separator = selected_validator.validate(file_handle) |
|
|
|
|
yield DataFile(file_path, separator) |
|
|
|
|
separator, skip = selected_validator.validate(file_handle) |
|
|
|
|
yield DataFile(file_path, separator, skip, selected_validator) |
|
|
|
|
except (IOError, UnicodeError, ValidationError) as e: |
|
|
|
|
print(e) |
|
|
|
|