|  |  |  | @ -17,6 +17,7 @@ from .columns import (@@ -17,6 +17,7 @@ from .columns import ( | 
			
		
	
		
			
				
					|  |  |  |  |     COL_NAME_EXPOSURE_ID, | 
			
		
	
		
			
				
					|  |  |  |  |     COL_NAME_WELL_COLUMN, | 
			
		
	
		
			
				
					|  |  |  |  |     COL_NAME_SPOT_DIAMETER, | 
			
		
	
		
			
				
					|  |  |  |  |     COLUMNS_RENAME_MAP | 
			
		
	
		
			
				
					|  |  |  |  | ) | 
			
		
	
		
			
				
					|  |  |  |  | from .parameters import add_optional_measurement_parameters | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
	
		
			
				
					|  |  |  | @ -28,25 +29,15 @@ REGEX_WELL = re.compile(@@ -28,25 +29,15 @@ REGEX_WELL = re.compile( | 
			
		
	
		
			
				
					|  |  |  |  |     re.VERBOSE | re.IGNORECASE, | 
			
		
	
		
			
				
					|  |  |  |  | ) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | COLUMNS_TO_DROP = ["Rect.", "Contour", "Id", "Name", "Foo"] | 
			
		
	
		
			
				
					|  |  |  |  | COLUMNS_RENAME_MAP = { | 
			
		
	
		
			
				
					|  |  |  |  |     " ID ": COL_NAME_POS_ID, | 
			
		
	
		
			
				
					|  |  |  |  |     "Found": COL_NAME_SPOT_FOUND, | 
			
		
	
		
			
				
					|  |  |  |  |     "Dia.": COL_NAME_SPOT_DIAMETER, | 
			
		
	
		
			
				
					|  |  |  |  | } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | CACHE_FILE_NAME = "raw_data.h5" | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | FileInfo = namedtuple("FileInfo", ["row", "column", "exposure"]) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | def _get_cache_table_name(): | 
			
		
	
		
			
				
					|  |  |  |  |     """ automatic hdf5 table name, avoids a circular import """ | 
			
		
	
		
			
				
					|  |  |  |  |     from . import VERSION_TABLE_NAME | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     return VERSION_TABLE_NAME | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | def _guess_decimal_separator(file_handle): | 
			
		
	
		
			
				
					|  |  |  |  |     """ guesses the decimal spearator of a opened data file """ | 
			
		
	
		
			
				
					|  |  |  |  |     file_handle.seek(0) | 
			
		
	
	
		
			
				
					|  |  |  | @ -85,12 +76,15 @@ def _cleanup_data_columns(data_frame):@@ -85,12 +76,15 @@ def _cleanup_data_columns(data_frame): | 
			
		
	
		
			
				
					|  |  |  |  |     return renamed.drop(columns=surplus_columns) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | def parse_file(data_file): | 
			
		
	
		
			
				
					|  |  |  |  | def parse_file(data_file, silent=False): | 
			
		
	
		
			
				
					|  |  |  |  |     """ parses one data file and adds metadata to result """ | 
			
		
	
		
			
				
					|  |  |  |  |     try: | 
			
		
	
		
			
				
					|  |  |  |  |         measurement_info = _extract_measurement_info(Path(data_file)) | 
			
		
	
		
			
				
					|  |  |  |  |     except ValueError as e: | 
			
		
	
		
			
				
					|  |  |  |  |         return None | 
			
		
	
		
			
				
					|  |  |  |  |         if silent: | 
			
		
	
		
			
				
					|  |  |  |  |             return None | 
			
		
	
		
			
				
					|  |  |  |  |         else: | 
			
		
	
		
			
				
					|  |  |  |  |             raise e | 
			
		
	
		
			
				
					|  |  |  |  |     data_frame = _parse_csv(data_file) | 
			
		
	
		
			
				
					|  |  |  |  |     data_frame[COL_NAME_WELL_ROW] = measurement_info.row | 
			
		
	
		
			
				
					|  |  |  |  |     data_frame[COL_NAME_WELL_COLUMN] = measurement_info.column | 
			
		
	
	
		
			
				
					|  |  |  | @ -102,7 +96,7 @@ def parse_multiple_files(file_list):@@ -102,7 +96,7 @@ def parse_multiple_files(file_list): | 
			
		
	
		
			
				
					|  |  |  |  |     """ parses a list of file paths to one combined dataframe """ | 
			
		
	
		
			
				
					|  |  |  |  |     if not file_list: | 
			
		
	
		
			
				
					|  |  |  |  |         raise ValueError("Empty file list provided") | 
			
		
	
		
			
				
					|  |  |  |  |     collection = (parse_file(path) for path in file_list) | 
			
		
	
		
			
				
					|  |  |  |  |     collection = (parse_file(path, silent=True) for path in file_list) | 
			
		
	
		
			
				
					|  |  |  |  |     filtered = (frame for frame in collection if frame is not None) | 
			
		
	
		
			
				
					|  |  |  |  |     data_frame = next(filtered) | 
			
		
	
		
			
				
					|  |  |  |  |     for next_frame in filtered: | 
			
		
	
	
		
			
				
					|  |  |  | @ -113,7 +107,7 @@ def parse_multiple_files(file_list):@@ -113,7 +107,7 @@ def parse_multiple_files(file_list): | 
			
		
	
		
			
				
					|  |  |  |  |     return data_frame | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | def _list_csv_files(folder): | 
			
		
	
		
			
				
					|  |  |  |  | def list_csv_files(folder): | 
			
		
	
		
			
				
					|  |  |  |  |     """ returns all csv files in a folder """ | 
			
		
	
		
			
				
					|  |  |  |  |     folder_path = Path(folder) | 
			
		
	
		
			
				
					|  |  |  |  |     files = (item for item in folder_path.iterdir() if item.is_file()) | 
			
		
	
	
		
			
				
					|  |  |  | @ -135,29 +129,7 @@ def _sanity_check(data_frame):@@ -135,29 +129,7 @@ def _sanity_check(data_frame): | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | def parse_folder(folder): | 
			
		
	
		
			
				
					|  |  |  |  |     """ parses all csv files in a folder to one large dataframe """ | 
			
		
	
		
			
				
					|  |  |  |  |     file_list = _list_csv_files(Path(folder)) | 
			
		
	
		
			
				
					|  |  |  |  |     file = list_csv_files(Path(folder)) | 
			
		
	
		
			
				
					|  |  |  |  |     data_frame = parse_multiple_files(file_list) | 
			
		
	
		
			
				
					|  |  |  |  |     data_frame = add_optional_measurement_parameters(data_frame, folder) | 
			
		
	
		
			
				
					|  |  |  |  |     return _sanity_check(data_frame) | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | def process_folder(folder, use_cache=True): | 
			
		
	
		
			
				
					|  |  |  |  |     """ parses all csv files in a folder, adds some checks and more data """ | 
			
		
	
		
			
				
					|  |  |  |  |     hdf5_path = Path(folder) / CACHE_FILE_NAME | 
			
		
	
		
			
				
					|  |  |  |  |     if use_cache: | 
			
		
	
		
			
				
					|  |  |  |  |         try: | 
			
		
	
		
			
				
					|  |  |  |  |             return pandas.read_hdf(hdf5_path, _get_cache_table_name()) | 
			
		
	
		
			
				
					|  |  |  |  |         except (FileNotFoundError, KeyError): | 
			
		
	
		
			
				
					|  |  |  |  |             # either file or table doesn't exist | 
			
		
	
		
			
				
					|  |  |  |  |             pass | 
			
		
	
		
			
				
					|  |  |  |  |     data_frame = parse_folder(folder) | 
			
		
	
		
			
				
					|  |  |  |  |     if use_cache: | 
			
		
	
		
			
				
					|  |  |  |  |         try: | 
			
		
	
		
			
				
					|  |  |  |  |             data_frame.to_hdf( | 
			
		
	
		
			
				
					|  |  |  |  |                 hdf5_path, _get_cache_table_name(), format="table" | 
			
		
	
		
			
				
					|  |  |  |  |             ) | 
			
		
	
		
			
				
					|  |  |  |  |         except OSError: | 
			
		
	
		
			
				
					|  |  |  |  |             # capturing high level OSError | 
			
		
	
		
			
				
					|  |  |  |  |             # read only filesystems don't throw a more specific exception | 
			
		
	
		
			
				
					|  |  |  |  |             pass | 
			
		
	
		
			
				
					|  |  |  |  |     return data_frame | 
			
		
	
	
		
			
				
					|  |  |  | 
 |