Convert Microarray Data to Excel Files
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.2 KiB

import os
from collections import namedtuple
class ValidationError(ValueError):
pass
Validator = namedtuple('Validator', 'type validate extension')
DataFile = namedtuple('DataFile', 'path separator')
def validate_stx(lines):
iterator = iter(lines)
line = next(iterator)
if not line.startswith('Report_Format\t2'):
raise ValidationError('1 Unsupported File;' + line)
for line in iterator:
if line.startswith('Probe_Name\t'):
break
else:
raise ValidationError('1 Unsupported File')
for line in iterator:
probe_name, rest = line.split('\t', 1)
separator = ',' if rest.count(',') > rest.count('.') else '.'
return separator
else:
raise ValidationError('No Data Present')
def validate_dat(lines):
iterator = iter(lines)
if not next(iterator).startswith('Report_Format\t2'):
raise ValidationError('Unsupported File')
for line in iterator:
if line.startswith('Dot_Number\t'):
break
else:
raise ValidationError('Unsupported File')
for line in iterator:
probe_name, rest = line.split('\t', 3)
separator = ',' if rest.count(',') > rest.count('.') else '.'
return separator
else:
raise ValidationError('No Data Present')
def validate_csv(lines):
pass
validation_map = {
'.stx': Validator('Signalyse Statistic Files', validate_stx, '.stx'),
'.dat': Validator('Signalyse Data Files', validate_dat, '.dat'),
'.csv': Validator('Sensovation Data Files', validate_csv, '.csv')
}
def guess_validator(unvalidated):
# get the validation method by examining the first file
_, extension = os.path.splitext(unvalidated[0])
return validation_map.get(extension, None)
def validate_files(unvalidated, selected_validator):
# get the validation method by examining the first file
for file_path in unvalidated:
try:
with open(file_path, mode='r', encoding='utf-8') as file_handle:
separator = selected_validator.validate(file_handle)
yield DataFile(file_path, separator)
except (IOError, UnicodeError, ValidationError) as e:
print(e)