diff --git a/sensospot_data/columns.py b/sensospot_data/columns.py index e72d136..b2f5a29 100644 --- a/sensospot_data/columns.py +++ b/sensospot_data/columns.py @@ -30,7 +30,8 @@ RAW_DATA_COLUMNS_RENAME_MAP = { "Spot.Sat. (%)": RAW_DATA_SPOT_SAT, } -# meta data extracted from filename +# meta data extracted from filename and path +META_DATA_ANALYSIS_NAME = "Analysis.Name" META_DATA_WELL_NAME = "Well.Name" META_DATA_WELL_ROW = "Well.Row" META_DATA_WELL_COLUMN = "Well.Column" @@ -59,6 +60,7 @@ PARSED_DATA_COLUMN_SET = { RAW_DATA_POS_ID, RAW_DATA_SPOT_FOUND, RAW_DATA_SPOT_DIAMETER, + META_DATA_ANALYSIS_NAME, META_DATA_WELL_NAME, META_DATA_WELL_ROW, META_DATA_WELL_COLUMN, @@ -66,3 +68,15 @@ PARSED_DATA_COLUMN_SET = { META_DATA_PARAMETERS_CHANNEL, META_DATA_PARAMETERS_TIME, } + +# list of columns to ensure a pandas numeric type +RAW_DATA_NUMERIC_COLUMNS = { + RAW_DATA_BKG_MEAN, + RAW_DATA_SPOT_MEAN, + RAW_DATA_BKG_MEDIAN, + RAW_DATA_SPOT_MEDIAN, + RAW_DATA_BKG_STDDEV, + RAW_DATA_SPOT_STDDEV, + RAW_DATA_BKG_SUM, + RAW_DATA_SPOT_SUM, +} diff --git a/sensospot_data/parser.py b/sensospot_data/parser.py index a280a2b..0ded549 100755 --- a/sensospot_data/parser.py +++ b/sensospot_data/parser.py @@ -16,7 +16,8 @@ from .columns import ( META_DATA_EXPOSURE_ID, META_DATA_WELL_COLUMN, PARSED_DATA_COLUMN_SET, - RAW_DATA_NORMALIZATION_MAP, + META_DATA_ANALYSIS_NAME, + RAW_DATA_NUMERIC_COLUMNS, RAW_DATA_COLUMNS_RENAME_MAP, ) from .parameters import add_optional_measurement_parameters @@ -75,8 +76,9 @@ def parse_file(data_file): will race a ValueError, if metadata could not be extracted """ - measurement_info = _extract_measurement_info(Path(data_file)) - data_frame = _parse_csv(data_file) + data_path = Path(data_file).resolve() + measurement_info = _extract_measurement_info(data_path) + data_frame = _parse_csv(data_path) # normalized well name data_frame[ META_DATA_WELL_NAME @@ -84,6 +86,7 @@ def parse_file(data_file): data_frame[META_DATA_WELL_ROW] = measurement_info.row data_frame[META_DATA_WELL_COLUMN] = measurement_info.column data_frame[META_DATA_EXPOSURE_ID] = measurement_info.exposure + data_frame[META_DATA_ANALYSIS_NAME] = data_path.parent.name return _cleanup_data_columns(data_frame) @@ -133,16 +136,17 @@ def _sanity_check(data_frame): f"Measurements are missing: {expected_rows} != {len(data_frame)}" ) # set the right data type for measurement columns - for raw_column in RAW_DATA_NORMALIZATION_MAP: + for raw_column in RAW_DATA_NUMERIC_COLUMNS: data_frame[raw_column] = pandas.to_numeric(data_frame[raw_column]) return data_frame def parse_folder(folder, quiet=False): """parses all csv files in a folder to one large dataframe""" - file_list = list_csv_files(Path(folder)) + folder_path = Path(folder) + file_list = list_csv_files(folder_path) data_frame = parse_multiple_files(file_list) - data_frame = add_optional_measurement_parameters(data_frame, folder) + data_frame = add_optional_measurement_parameters(data_frame, folder_path) if quiet: return data_frame return _sanity_check(data_frame) diff --git a/tests/test_parser.py b/tests/test_parser.py index 93fbced..209de7c 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -172,6 +172,7 @@ def test_parse_file(example_file): "Well.Row", "Well.Column", "Exposure.Id", + "Analysis.Name", } assert set(result.columns) == columns @@ -179,6 +180,7 @@ def test_parse_file(example_file): assert result["Well.Row"][0] == "A" assert result["Well.Column"][0] == 1 assert result["Exposure.Id"][0] == 1 + assert result["Analysis.Name"][0] == "mtp_wo_parameters" def test_parse_file_raises_error(example_dir):