|
|
@ -5,52 +5,87 @@ from collections import namedtuple |
|
|
|
class ValidationError(ValueError): |
|
|
|
class ValidationError(ValueError): |
|
|
|
pass |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
Validator = namedtuple('Validator', 'type validate extension') |
|
|
|
Validator = namedtuple('Validator', 'type extension validate id_fields data_fields defaults') |
|
|
|
DataFile = namedtuple('DataFile', 'path separator') |
|
|
|
DataFile = namedtuple('DataFile', 'path separator') |
|
|
|
|
|
|
|
|
|
|
|
def validate_stx(lines): |
|
|
|
def validate_stx(lines): |
|
|
|
iterator = iter(lines) |
|
|
|
iterator = iter(lines) |
|
|
|
line = next(iterator) |
|
|
|
line = next(iterator) |
|
|
|
if not line.startswith('Report_Format\t2'): |
|
|
|
if not line.startswith('Report_Format:\t2'): |
|
|
|
raise ValidationError('1 Unsupported File;' + line) |
|
|
|
raise ValidationError('1 Unsupported File;' + line) |
|
|
|
for line in iterator: |
|
|
|
for line in iterator: |
|
|
|
if line.startswith('Probe_Name\t'): |
|
|
|
if line.startswith('Probe_Name\t'): |
|
|
|
break |
|
|
|
break |
|
|
|
else: |
|
|
|
else: |
|
|
|
raise ValidationError('1 Unsupported File') |
|
|
|
raise ValidationError('1 Unsupported File') |
|
|
|
for line in iterator: |
|
|
|
try: |
|
|
|
probe_name, rest = line.split('\t', 1) |
|
|
|
line = next(iterator) |
|
|
|
separator = ',' if rest.count(',') > rest.count('.') else '.' |
|
|
|
_, numeric_data = line.split('\t', 1) |
|
|
|
return separator |
|
|
|
except (ValueError, StopIteration): |
|
|
|
else: |
|
|
|
|
|
|
|
raise ValidationError('No Data Present') |
|
|
|
raise ValidationError('No Data Present') |
|
|
|
|
|
|
|
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' |
|
|
|
|
|
|
|
return separator |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def validate_dat(lines): |
|
|
|
def validate_dat(lines): |
|
|
|
iterator = iter(lines) |
|
|
|
iterator = iter(lines) |
|
|
|
if not next(iterator).startswith('Report_Format\t2'): |
|
|
|
if not next(iterator).startswith('Report_Format:\t2'): |
|
|
|
raise ValidationError('Unsupported File') |
|
|
|
raise ValidationError('Unsupported File') |
|
|
|
for line in iterator: |
|
|
|
for line in iterator: |
|
|
|
if line.startswith('Dot_Number\t'): |
|
|
|
if line.startswith('Dot_Number\t'): |
|
|
|
break |
|
|
|
break |
|
|
|
else: |
|
|
|
else: |
|
|
|
raise ValidationError('Unsupported File') |
|
|
|
raise ValidationError('Unsupported File') |
|
|
|
for line in iterator: |
|
|
|
try: |
|
|
|
probe_name, rest = line.split('\t', 3) |
|
|
|
line = next(iterator) |
|
|
|
separator = ',' if rest.count(',') > rest.count('.') else '.' |
|
|
|
_, _, _, numeric_data = line.split('\t', 3) |
|
|
|
return separator |
|
|
|
except (ValueError, StopIteration): |
|
|
|
else: |
|
|
|
|
|
|
|
raise ValidationError('No Data Present') |
|
|
|
raise ValidationError('No Data Present') |
|
|
|
|
|
|
|
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' |
|
|
|
|
|
|
|
return separator |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def validate_csv(lines): |
|
|
|
def validate_csv(lines): |
|
|
|
pass |
|
|
|
iterator = iter(lines) |
|
|
|
|
|
|
|
if not next(iterator).startswith(' ID '): |
|
|
|
|
|
|
|
raise ValidationError('Unsupported File') |
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
line = next(iterator) |
|
|
|
|
|
|
|
_, numeric_data = line.split('\t', 1) |
|
|
|
|
|
|
|
except (ValueError, StopIteration): |
|
|
|
|
|
|
|
raise ValidationError('No Data Present') |
|
|
|
|
|
|
|
separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' |
|
|
|
|
|
|
|
return separator |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
stx_validator = Validator( |
|
|
|
|
|
|
|
'Signalyse Statistic Files', '.stx', validate_stx, |
|
|
|
|
|
|
|
['Probe_Name'], |
|
|
|
|
|
|
|
['Count', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD', 'Proc_Control'], |
|
|
|
|
|
|
|
['Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD']) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dat_validator = Validator( |
|
|
|
|
|
|
|
'Signalyse Data Files', '.dat', validate_dat, |
|
|
|
|
|
|
|
['Dot_Number', 'Probe_Name', 'Gene_Name', 'Col', 'Row'], |
|
|
|
|
|
|
|
['X[Pix]', 'Y[Pix]', 'DX[Pix]', 'DY[Pix]', 'Spot_Diameter', 'ROI_Width', 'ROI_Heigth', 'Pixels', 'Bkg', |
|
|
|
|
|
|
|
'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD', 'Acc_Number', 'Proc_Control'], |
|
|
|
|
|
|
|
['Bkg', 'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD'] ) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
csv_validator = Validator( |
|
|
|
|
|
|
|
'Sensovation Data Files', '.csv', validate_csv, |
|
|
|
|
|
|
|
[' ID '], |
|
|
|
|
|
|
|
['Pos.X', 'Pos.Y', 'Bkg.Mean', 'Spot.Mean', 'Bkg.Median', 'Spot.Median', 'Bkg.StdDev', 'Spot.StdDev', |
|
|
|
|
|
|
|
'Bkg.Sum', 'Spot.Sum', 'Bkg.Area', 'Spot.Area', 'Spot.Sat. (%)', 'Found', 'Pos.Nom.X', 'Pos.Nom.Y', 'Dia.', |
|
|
|
|
|
|
|
'Rect.', 'Contour'], |
|
|
|
|
|
|
|
['Bkg.Mean', 'Spot.Mean', 'Bkg.Median', 'Spot.Median', 'Bkg.StdDev', 'Spot.StdDev', 'Bkg.Sum', 'Spot.Sum'] ) |
|
|
|
|
|
|
|
|
|
|
|
validation_map = { |
|
|
|
validation_map = { |
|
|
|
'.stx': Validator('Signalyse Statistic Files', validate_stx, '.stx'), |
|
|
|
stx_validator.extension: stx_validator, |
|
|
|
'.dat': Validator('Signalyse Data Files', validate_dat, '.dat'), |
|
|
|
dat_validator.extension: dat_validator, |
|
|
|
'.csv': Validator('Sensovation Data Files', validate_csv, '.csv') |
|
|
|
csv_validator.extension: csv_validator |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def guess_validator(unvalidated): |
|
|
|
def guess_validator(unvalidated): |
|
|
|
# get the validation method by examining the first file |
|
|
|
# get the validation method by examining the first file |
|
|
|
_, extension = os.path.splitext(unvalidated[0]) |
|
|
|
_, extension = os.path.splitext(unvalidated[0]) |
|
|
|