Browse Source

moved _cleanup_data_columns() function to "columns" module

by moving this function it can be easily reused in the upcoming "xml_parser" module
xmlparsing
Holger Frey 2 years ago
parent
commit
8644a25d78
  1. 18
      src/sensospot_parser/columns.py
  2. 18
      src/sensospot_parser/csv_parser.py
  3. 15
      tests/test_columns.py
  4. 17
      tests/test_csv_parser.py

18
src/sensospot_parser/columns.py

@ -1,5 +1,7 @@
""" Column name definitions """ """ Column name definitions """
import pandas
# original, unmodified column names # original, unmodified column names
POS_X = "Pos.X" POS_X = "Pos.X"
POS_Y = "Pos.Y" POS_Y = "Pos.Y"
@ -121,3 +123,19 @@ INDEX_COLUMNS_POS = (
WELL_ROW, WELL_ROW,
POS_ID, POS_ID,
) )
def _cleanup_data_columns(data_frame: pandas.DataFrame) -> pandas.DataFrame:
"""renames some data columns for consistency and drops unused columns
Args:
data_frame: pandas DataFrame with parsed measurement data
Returns:
pandas DataFrame, column names cleaned up
"""
renamed = data_frame.rename(columns=CSV_RENAME_MAP)
surplus_columns = set(renamed.columns) - PARSED_DATA_COLUMN_SET
x = renamed.drop(columns=surplus_columns)
print(PARSED_DATA_COLUMN_SET - set(x.columns))
return x

18
src/sensospot_parser/csv_parser.py

@ -1,6 +1,6 @@
""" Sensospot Data Parser """ Sensospot Data Parser
Parsing the numerical output from Sensovations Sensospot image analysis. Parsing the csv result files from Sensovations Sensospot image analysis.
""" """
import re import re
@ -84,20 +84,6 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo:
return FileInfo(row, column, exposure) return FileInfo(row, column, exposure)
def _cleanup_data_columns(data_frame: pandas.DataFrame) -> pandas.DataFrame:
"""renames some data columns for consistency and drops unused columns
Args:
data_frame: pandas DataFrame with parsed measurement data
Returns:
pandas DataFrame, column names cleaned up
"""
renamed = data_frame.rename(columns=columns.CSV_RENAME_MAP)
surplus_columns = set(renamed.columns) - columns.PARSED_DATA_COLUMN_SET
return renamed.drop(columns=surplus_columns)
def parse_file(data_file: PathLike) -> pandas.DataFrame: def parse_file(data_file: PathLike) -> pandas.DataFrame:
"""parses one data file and adds metadata to result """parses one data file and adds metadata to result
@ -124,7 +110,7 @@ def parse_file(data_file: PathLike) -> pandas.DataFrame:
data_frame[columns.EXPOSURE_ID] = measurement_info.exposure data_frame[columns.EXPOSURE_ID] = measurement_info.exposure
data_frame[columns.ANALYSIS_NAME] = data_path.parent.name data_frame[columns.ANALYSIS_NAME] = data_path.parent.name
data_frame[columns.ANALYSIS_IMAGE] = data_path.with_suffix(".tif").name data_frame[columns.ANALYSIS_IMAGE] = data_path.with_suffix(".tif").name
return _cleanup_data_columns(data_frame) return columns._cleanup_data_columns(data_frame)
def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]: def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]:

15
tests/test_columns.py

@ -0,0 +1,15 @@
def test_cleanup_data_columns():
from pandas import DataFrame
from sensospot_parser.columns import _cleanup_data_columns
columns = ["Rect.", "Contour", " ID ", "Found", "Dia."]
data = {col: [i] for i, col in enumerate(columns)}
data_frame = DataFrame(data=data)
result = _cleanup_data_columns(data_frame)
assert set(result.columns) == {"Pos.Id", "Spot.Found", "Spot.Diameter"}
assert result["Pos.Id"][0] == 2
assert result["Spot.Found"][0] == 3
assert result["Spot.Diameter"][0] == 4

17
tests/test_csv_parser.py

@ -127,23 +127,6 @@ def test_extract_measurement_info_raises_error(filename):
_extract_measurement_info(filename) _extract_measurement_info(filename)
def test_cleanup_data_columns():
from pandas import DataFrame
from sensospot_parser.csv_parser import _cleanup_data_columns
columns = ["Rect.", "Contour", " ID ", "Found", "Dia."]
data = {col: [i] for i, col in enumerate(columns)}
data_frame = DataFrame(data=data)
result = _cleanup_data_columns(data_frame)
assert set(result.columns) == {"Pos.Id", "Spot.Found", "Spot.Diameter"}
assert result["Pos.Id"][0] == 2
assert result["Spot.Found"][0] == 3
assert result["Spot.Diameter"][0] == 4
def test_parse_file(example_file): def test_parse_file(example_file):
from sensospot_parser.csv_parser import parse_file from sensospot_parser.csv_parser import parse_file

Loading…
Cancel
Save