Browse Source

moved _cleanup_data_columns() function to "columns" module

by moving this function it can be easily reused in the upcoming "xml_parser" module
xmlparsing
Holger Frey 2 years ago
parent
commit
8644a25d78
  1. 18
      src/sensospot_parser/columns.py
  2. 18
      src/sensospot_parser/csv_parser.py
  3. 15
      tests/test_columns.py
  4. 17
      tests/test_csv_parser.py

18
src/sensospot_parser/columns.py

@ -1,5 +1,7 @@ @@ -1,5 +1,7 @@
""" Column name definitions """
import pandas
# original, unmodified column names
POS_X = "Pos.X"
POS_Y = "Pos.Y"
@ -121,3 +123,19 @@ INDEX_COLUMNS_POS = ( @@ -121,3 +123,19 @@ INDEX_COLUMNS_POS = (
WELL_ROW,
POS_ID,
)
def _cleanup_data_columns(data_frame: pandas.DataFrame) -> pandas.DataFrame:
"""renames some data columns for consistency and drops unused columns
Args:
data_frame: pandas DataFrame with parsed measurement data
Returns:
pandas DataFrame, column names cleaned up
"""
renamed = data_frame.rename(columns=CSV_RENAME_MAP)
surplus_columns = set(renamed.columns) - PARSED_DATA_COLUMN_SET
x = renamed.drop(columns=surplus_columns)
print(PARSED_DATA_COLUMN_SET - set(x.columns))
return x

18
src/sensospot_parser/csv_parser.py

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
""" Sensospot Data Parser
Parsing the numerical output from Sensovations Sensospot image analysis.
Parsing the csv result files from Sensovations Sensospot image analysis.
"""
import re
@ -84,20 +84,6 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo: @@ -84,20 +84,6 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo:
return FileInfo(row, column, exposure)
def _cleanup_data_columns(data_frame: pandas.DataFrame) -> pandas.DataFrame:
"""renames some data columns for consistency and drops unused columns
Args:
data_frame: pandas DataFrame with parsed measurement data
Returns:
pandas DataFrame, column names cleaned up
"""
renamed = data_frame.rename(columns=columns.CSV_RENAME_MAP)
surplus_columns = set(renamed.columns) - columns.PARSED_DATA_COLUMN_SET
return renamed.drop(columns=surplus_columns)
def parse_file(data_file: PathLike) -> pandas.DataFrame:
"""parses one data file and adds metadata to result
@ -124,7 +110,7 @@ def parse_file(data_file: PathLike) -> pandas.DataFrame: @@ -124,7 +110,7 @@ def parse_file(data_file: PathLike) -> pandas.DataFrame:
data_frame[columns.EXPOSURE_ID] = measurement_info.exposure
data_frame[columns.ANALYSIS_NAME] = data_path.parent.name
data_frame[columns.ANALYSIS_IMAGE] = data_path.with_suffix(".tif").name
return _cleanup_data_columns(data_frame)
return columns._cleanup_data_columns(data_frame)
def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]:

15
tests/test_columns.py

@ -0,0 +1,15 @@ @@ -0,0 +1,15 @@
def test_cleanup_data_columns():
from pandas import DataFrame
from sensospot_parser.columns import _cleanup_data_columns
columns = ["Rect.", "Contour", " ID ", "Found", "Dia."]
data = {col: [i] for i, col in enumerate(columns)}
data_frame = DataFrame(data=data)
result = _cleanup_data_columns(data_frame)
assert set(result.columns) == {"Pos.Id", "Spot.Found", "Spot.Diameter"}
assert result["Pos.Id"][0] == 2
assert result["Spot.Found"][0] == 3
assert result["Spot.Diameter"][0] == 4

17
tests/test_csv_parser.py

@ -127,23 +127,6 @@ def test_extract_measurement_info_raises_error(filename): @@ -127,23 +127,6 @@ def test_extract_measurement_info_raises_error(filename):
_extract_measurement_info(filename)
def test_cleanup_data_columns():
from pandas import DataFrame
from sensospot_parser.csv_parser import _cleanup_data_columns
columns = ["Rect.", "Contour", " ID ", "Found", "Dia."]
data = {col: [i] for i, col in enumerate(columns)}
data_frame = DataFrame(data=data)
result = _cleanup_data_columns(data_frame)
assert set(result.columns) == {"Pos.Id", "Spot.Found", "Spot.Diameter"}
assert result["Pos.Id"][0] == 2
assert result["Spot.Found"][0] == 3
assert result["Spot.Diameter"][0] == 4
def test_parse_file(example_file):
from sensospot_parser.csv_parser import parse_file

Loading…
Cancel
Save