From 8644a25d780cd2f312d2e1c9e2f48abb1a21f0ca Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Tue, 3 Jan 2023 16:27:27 +0100 Subject: [PATCH] moved _cleanup_data_columns() function to "columns" module by moving this function it can be easily reused in the upcoming "xml_parser" module --- src/sensospot_parser/columns.py | 18 ++++++++++++++++++ src/sensospot_parser/csv_parser.py | 18 ++---------------- tests/test_columns.py | 15 +++++++++++++++ tests/test_csv_parser.py | 17 ----------------- 4 files changed, 35 insertions(+), 33 deletions(-) create mode 100644 tests/test_columns.py diff --git a/src/sensospot_parser/columns.py b/src/sensospot_parser/columns.py index 9897d32..ad63ed8 100644 --- a/src/sensospot_parser/columns.py +++ b/src/sensospot_parser/columns.py @@ -1,5 +1,7 @@ """ Column name definitions """ +import pandas + # original, unmodified column names POS_X = "Pos.X" POS_Y = "Pos.Y" @@ -121,3 +123,19 @@ INDEX_COLUMNS_POS = ( WELL_ROW, POS_ID, ) + + +def _cleanup_data_columns(data_frame: pandas.DataFrame) -> pandas.DataFrame: + """renames some data columns for consistency and drops unused columns + + Args: + data_frame: pandas DataFrame with parsed measurement data + + Returns: + pandas DataFrame, column names cleaned up + """ + renamed = data_frame.rename(columns=CSV_RENAME_MAP) + surplus_columns = set(renamed.columns) - PARSED_DATA_COLUMN_SET + x = renamed.drop(columns=surplus_columns) + print(PARSED_DATA_COLUMN_SET - set(x.columns)) + return x diff --git a/src/sensospot_parser/csv_parser.py b/src/sensospot_parser/csv_parser.py index d59518c..24b29c2 100644 --- a/src/sensospot_parser/csv_parser.py +++ b/src/sensospot_parser/csv_parser.py @@ -1,6 +1,6 @@ """ Sensospot Data Parser -Parsing the numerical output from Sensovations Sensospot image analysis. +Parsing the csv result files from Sensovations Sensospot image analysis. """ import re @@ -84,20 +84,6 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo: return FileInfo(row, column, exposure) -def _cleanup_data_columns(data_frame: pandas.DataFrame) -> pandas.DataFrame: - """renames some data columns for consistency and drops unused columns - - Args: - data_frame: pandas DataFrame with parsed measurement data - - Returns: - pandas DataFrame, column names cleaned up - """ - renamed = data_frame.rename(columns=columns.CSV_RENAME_MAP) - surplus_columns = set(renamed.columns) - columns.PARSED_DATA_COLUMN_SET - return renamed.drop(columns=surplus_columns) - - def parse_file(data_file: PathLike) -> pandas.DataFrame: """parses one data file and adds metadata to result @@ -124,7 +110,7 @@ def parse_file(data_file: PathLike) -> pandas.DataFrame: data_frame[columns.EXPOSURE_ID] = measurement_info.exposure data_frame[columns.ANALYSIS_NAME] = data_path.parent.name data_frame[columns.ANALYSIS_IMAGE] = data_path.with_suffix(".tif").name - return _cleanup_data_columns(data_frame) + return columns._cleanup_data_columns(data_frame) def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]: diff --git a/tests/test_columns.py b/tests/test_columns.py new file mode 100644 index 0000000..499c221 --- /dev/null +++ b/tests/test_columns.py @@ -0,0 +1,15 @@ +def test_cleanup_data_columns(): + from pandas import DataFrame + + from sensospot_parser.columns import _cleanup_data_columns + + columns = ["Rect.", "Contour", " ID ", "Found", "Dia."] + data = {col: [i] for i, col in enumerate(columns)} + data_frame = DataFrame(data=data) + + result = _cleanup_data_columns(data_frame) + + assert set(result.columns) == {"Pos.Id", "Spot.Found", "Spot.Diameter"} + assert result["Pos.Id"][0] == 2 + assert result["Spot.Found"][0] == 3 + assert result["Spot.Diameter"][0] == 4 diff --git a/tests/test_csv_parser.py b/tests/test_csv_parser.py index e5f3cf6..c8678d3 100644 --- a/tests/test_csv_parser.py +++ b/tests/test_csv_parser.py @@ -127,23 +127,6 @@ def test_extract_measurement_info_raises_error(filename): _extract_measurement_info(filename) -def test_cleanup_data_columns(): - from pandas import DataFrame - - from sensospot_parser.csv_parser import _cleanup_data_columns - - columns = ["Rect.", "Contour", " ID ", "Found", "Dia."] - data = {col: [i] for i, col in enumerate(columns)} - data_frame = DataFrame(data=data) - - result = _cleanup_data_columns(data_frame) - - assert set(result.columns) == {"Pos.Id", "Spot.Found", "Spot.Diameter"} - assert result["Pos.Id"][0] == 2 - assert result["Spot.Found"][0] == 3 - assert result["Spot.Diameter"][0] == 4 - - def test_parse_file(example_file): from sensospot_parser.csv_parser import parse_file