import os from collections import namedtuple class ValidationError(ValueError): pass Validator = namedtuple('Validator', 'type validate extension') DataFile = namedtuple('DataFile', 'path separator') def validate_stx(lines): iterator = iter(lines) line = next(iterator) if not line.startswith('Report_Format\t2'): raise ValidationError('1 Unsupported File;' + line) for line in iterator: if line.startswith('Probe_Name\t'): break else: raise ValidationError('1 Unsupported File') for line in iterator: probe_name, rest = line.split('\t', 1) separator = ',' if rest.count(',') > rest.count('.') else '.' return separator else: raise ValidationError('No Data Present') def validate_dat(lines): iterator = iter(lines) if not next(iterator).startswith('Report_Format\t2'): raise ValidationError('Unsupported File') for line in iterator: if line.startswith('Dot_Number\t'): break else: raise ValidationError('Unsupported File') for line in iterator: probe_name, rest = line.split('\t', 3) separator = ',' if rest.count(',') > rest.count('.') else '.' return separator else: raise ValidationError('No Data Present') def validate_csv(lines): pass validation_map = { '.stx': Validator('Signalyse Statistic Files', validate_stx, '.stx'), '.dat': Validator('Signalyse Data Files', validate_dat, '.dat'), '.csv': Validator('Sensovation Data Files', validate_csv, '.csv') } def guess_validator(unvalidated): # get the validation method by examining the first file _, extension = os.path.splitext(unvalidated[0]) return validation_map.get(extension, None) def validate_files(unvalidated, selected_validator): # get the validation method by examining the first file for file_path in unvalidated: try: with open(file_path, mode='r', encoding='utf-8') as file_handle: separator = selected_validator.validate(file_handle) yield DataFile(file_path, separator) except (IOError, UnicodeError, ValidationError) as e: print(e)