You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							103 lines
						
					
					
						
							3.8 KiB
						
					
					
				
			
		
		
	
	
							103 lines
						
					
					
						
							3.8 KiB
						
					
					
				| import os | |
| 
 | |
| from collections import namedtuple | |
| 
 | |
| class ValidationError(ValueError): | |
|     pass | |
| 
 | |
| Validator = namedtuple('Validator', 'type extension validate id_fields data_fields defaults') | |
| DataFile = namedtuple('DataFile', 'path separator skip validator') | |
| 
 | |
| def validate_stx(lines): | |
|     iterator = enumerate(lines) | |
|     _, line = next(iterator) | |
|     if not line.startswith('Report_Format:\t2'): | |
|         raise ValidationError('1 Unsupported File;' + line) | |
|     for i, line in iterator: | |
|         if line.startswith('Probe_Name\t'): | |
|             break | |
|     else: | |
|         raise ValidationError('1 Unsupported File') | |
|     try: | |
|         _, line = next(iterator) | |
|         _, numeric_data = line.split('\t', 1) | |
|     except (ValueError, StopIteration): | |
|         raise ValidationError('No Data Present') | |
|     separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' | |
|     return separator, i | |
| 
 | |
| 
 | |
| def validate_dat(lines): | |
|     iterator = enumerate(lines) | |
|     _, line = next(iterator) | |
|     if not line.startswith('Report_Format:\t2'): | |
|         raise ValidationError('Unsupported File') | |
|     for i, line in iterator: | |
|         if line.startswith('Dot_Number\t'): | |
|             break | |
|     else: | |
|         raise ValidationError('Unsupported File') | |
|     try: | |
|         _, line = next(iterator) | |
|         _, _, _, numeric_data = line.split('\t', 3) | |
|     except (ValueError, StopIteration): | |
|         raise ValidationError('No Data Present') | |
|     separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' | |
|     return separator, i | |
| 
 | |
| 
 | |
| def validate_csv(lines): | |
|     iterator = iter(lines) | |
|     if not next(iterator).startswith(' ID '): | |
|         raise ValidationError('Unsupported File') | |
|     try: | |
|         line = next(iterator) | |
|         _, numeric_data = line.split('\t', 1) | |
|     except (ValueError, StopIteration): | |
|         raise ValidationError('No Data Present') | |
|     separator = ',' if numeric_data.count(',') > numeric_data.count('.') else '.' | |
|     return separator, 0 | |
| 
 | |
| 
 | |
| stx_validator = Validator( | |
|     'Signalyse Statistic Files', '.stx', validate_stx, | |
|     ['Probe_Name'], | |
|     ['Count', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD', 'Proc_Control'], | |
|     ['Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD']) | |
| 
 | |
| dat_validator = Validator( | |
|     'Signalyse Data Files', '.dat', validate_dat, | |
|     ['Dot_Number', 'Probe_Name', 'Gene_Name', 'Col', 'Row'], | |
|     ['X[Pix]', 'Y[Pix]', 'DX[Pix]', 'DY[Pix]', 'Spot_Diameter', 'ROI_Width', 'ROI_Heigth', 'Pixels', 'Bkg', | |
|      'Bkg_SD', 'Net_Signal', 'Net_Signal_SD', 'Net_Integral', 'Net_Integral_SD', 'Acc_Number', 'Proc_Control'], | |
|     ['Bkg', 'Net_Signal', 'Net_Integral'] ) | |
| 
 | |
| csv_validator = Validator( | |
|     'Sensovation Data Files', '.csv', validate_csv, | |
|     [' ID '], | |
|     ['Pos.X', 'Pos.Y', 'Bkg.Mean', 'Spot.Mean', 'Bkg.Median', 'Spot.Median', 'Bkg.StdDev', 'Spot.StdDev', | |
|      'Bkg.Sum', 'Spot.Sum', 'Bkg.Area', 'Spot.Area', 'Spot.Sat. (%)', 'Found', 'Pos.Nom.X', 'Pos.Nom.Y', 'Dia.', | |
|      'Rect.', 'Contour'], | |
|     ['Bkg.Mean', 'Spot.Mean', 'Bkg.Median', 'Spot.Median', 'Bkg.StdDev', 'Spot.StdDev', 'Bkg.Sum', 'Spot.Sum'] ) | |
| 
 | |
| validation_map = { | |
|     stx_validator.extension: stx_validator, | |
|     dat_validator.extension: dat_validator, | |
|     csv_validator.extension: csv_validator | |
| } | |
| 
 | |
| 
 | |
| def guess_validator(unvalidated): | |
|     # get the validation method by examining the first file | |
|     _, extension = os.path.splitext(unvalidated[0]) | |
|     return validation_map.get(extension, None) | |
| 
 | |
| def validate_files(unvalidated, selected_validator): | |
|     # get the validation method by examining the first file | |
|     for file_path in unvalidated: | |
|         try: | |
|             with open(file_path, mode='r', encoding='utf-8') as file_handle: | |
|                 separator, skip = selected_validator.validate(file_handle) | |
|                 yield DataFile(file_path, separator, skip, selected_validator) | |
|         except (IOError, UnicodeError, ValidationError) as e: | |
|             print(e)
 | |
| 
 |