Browse Source

fixed sanity check

xmlparsing
Holger Frey 3 years ago
parent
commit
75ac4740a5
  1. 16
      sensospot_data/columns.py
  2. 16
      sensospot_data/parser.py
  3. 2
      tests/test_parser.py

16
sensospot_data/columns.py

@ -30,7 +30,8 @@ RAW_DATA_COLUMNS_RENAME_MAP = { @@ -30,7 +30,8 @@ RAW_DATA_COLUMNS_RENAME_MAP = {
"Spot.Sat. (%)": RAW_DATA_SPOT_SAT,
}
# meta data extracted from filename
# meta data extracted from filename and path
META_DATA_ANALYSIS_NAME = "Analysis.Name"
META_DATA_WELL_NAME = "Well.Name"
META_DATA_WELL_ROW = "Well.Row"
META_DATA_WELL_COLUMN = "Well.Column"
@ -59,6 +60,7 @@ PARSED_DATA_COLUMN_SET = { @@ -59,6 +60,7 @@ PARSED_DATA_COLUMN_SET = {
RAW_DATA_POS_ID,
RAW_DATA_SPOT_FOUND,
RAW_DATA_SPOT_DIAMETER,
META_DATA_ANALYSIS_NAME,
META_DATA_WELL_NAME,
META_DATA_WELL_ROW,
META_DATA_WELL_COLUMN,
@ -66,3 +68,15 @@ PARSED_DATA_COLUMN_SET = { @@ -66,3 +68,15 @@ PARSED_DATA_COLUMN_SET = {
META_DATA_PARAMETERS_CHANNEL,
META_DATA_PARAMETERS_TIME,
}
# list of columns to ensure a pandas numeric type
RAW_DATA_NUMERIC_COLUMNS = {
RAW_DATA_BKG_MEAN,
RAW_DATA_SPOT_MEAN,
RAW_DATA_BKG_MEDIAN,
RAW_DATA_SPOT_MEDIAN,
RAW_DATA_BKG_STDDEV,
RAW_DATA_SPOT_STDDEV,
RAW_DATA_BKG_SUM,
RAW_DATA_SPOT_SUM,
}

16
sensospot_data/parser.py

@ -16,7 +16,8 @@ from .columns import ( @@ -16,7 +16,8 @@ from .columns import (
META_DATA_EXPOSURE_ID,
META_DATA_WELL_COLUMN,
PARSED_DATA_COLUMN_SET,
RAW_DATA_NORMALIZATION_MAP,
META_DATA_ANALYSIS_NAME,
RAW_DATA_NUMERIC_COLUMNS,
RAW_DATA_COLUMNS_RENAME_MAP,
)
from .parameters import add_optional_measurement_parameters
@ -75,8 +76,9 @@ def parse_file(data_file): @@ -75,8 +76,9 @@ def parse_file(data_file):
will race a ValueError, if metadata could not be extracted
"""
measurement_info = _extract_measurement_info(Path(data_file))
data_frame = _parse_csv(data_file)
data_path = Path(data_file).resolve()
measurement_info = _extract_measurement_info(data_path)
data_frame = _parse_csv(data_path)
# normalized well name
data_frame[
META_DATA_WELL_NAME
@ -84,6 +86,7 @@ def parse_file(data_file): @@ -84,6 +86,7 @@ def parse_file(data_file):
data_frame[META_DATA_WELL_ROW] = measurement_info.row
data_frame[META_DATA_WELL_COLUMN] = measurement_info.column
data_frame[META_DATA_EXPOSURE_ID] = measurement_info.exposure
data_frame[META_DATA_ANALYSIS_NAME] = data_path.parent.name
return _cleanup_data_columns(data_frame)
@ -133,16 +136,17 @@ def _sanity_check(data_frame): @@ -133,16 +136,17 @@ def _sanity_check(data_frame):
f"Measurements are missing: {expected_rows} != {len(data_frame)}"
)
# set the right data type for measurement columns
for raw_column in RAW_DATA_NORMALIZATION_MAP:
for raw_column in RAW_DATA_NUMERIC_COLUMNS:
data_frame[raw_column] = pandas.to_numeric(data_frame[raw_column])
return data_frame
def parse_folder(folder, quiet=False):
"""parses all csv files in a folder to one large dataframe"""
file_list = list_csv_files(Path(folder))
folder_path = Path(folder)
file_list = list_csv_files(folder_path)
data_frame = parse_multiple_files(file_list)
data_frame = add_optional_measurement_parameters(data_frame, folder)
data_frame = add_optional_measurement_parameters(data_frame, folder_path)
if quiet:
return data_frame
return _sanity_check(data_frame)

2
tests/test_parser.py

@ -172,6 +172,7 @@ def test_parse_file(example_file): @@ -172,6 +172,7 @@ def test_parse_file(example_file):
"Well.Row",
"Well.Column",
"Exposure.Id",
"Analysis.Name",
}
assert set(result.columns) == columns
@ -179,6 +180,7 @@ def test_parse_file(example_file): @@ -179,6 +180,7 @@ def test_parse_file(example_file):
assert result["Well.Row"][0] == "A"
assert result["Well.Column"][0] == 1
assert result["Exposure.Id"][0] == 1
assert result["Analysis.Name"][0] == "mtp_wo_parameters"
def test_parse_file_raises_error(example_dir):

Loading…
Cancel
Save