Browse Source

fixed sanity check

xmlparsing
Holger Frey 3 years ago
parent
commit
75ac4740a5
  1. 16
      sensospot_data/columns.py
  2. 16
      sensospot_data/parser.py
  3. 2
      tests/test_parser.py

16
sensospot_data/columns.py

@ -30,7 +30,8 @@ RAW_DATA_COLUMNS_RENAME_MAP = {
"Spot.Sat. (%)": RAW_DATA_SPOT_SAT, "Spot.Sat. (%)": RAW_DATA_SPOT_SAT,
} }
# meta data extracted from filename # meta data extracted from filename and path
META_DATA_ANALYSIS_NAME = "Analysis.Name"
META_DATA_WELL_NAME = "Well.Name" META_DATA_WELL_NAME = "Well.Name"
META_DATA_WELL_ROW = "Well.Row" META_DATA_WELL_ROW = "Well.Row"
META_DATA_WELL_COLUMN = "Well.Column" META_DATA_WELL_COLUMN = "Well.Column"
@ -59,6 +60,7 @@ PARSED_DATA_COLUMN_SET = {
RAW_DATA_POS_ID, RAW_DATA_POS_ID,
RAW_DATA_SPOT_FOUND, RAW_DATA_SPOT_FOUND,
RAW_DATA_SPOT_DIAMETER, RAW_DATA_SPOT_DIAMETER,
META_DATA_ANALYSIS_NAME,
META_DATA_WELL_NAME, META_DATA_WELL_NAME,
META_DATA_WELL_ROW, META_DATA_WELL_ROW,
META_DATA_WELL_COLUMN, META_DATA_WELL_COLUMN,
@ -66,3 +68,15 @@ PARSED_DATA_COLUMN_SET = {
META_DATA_PARAMETERS_CHANNEL, META_DATA_PARAMETERS_CHANNEL,
META_DATA_PARAMETERS_TIME, META_DATA_PARAMETERS_TIME,
} }
# list of columns to ensure a pandas numeric type
RAW_DATA_NUMERIC_COLUMNS = {
RAW_DATA_BKG_MEAN,
RAW_DATA_SPOT_MEAN,
RAW_DATA_BKG_MEDIAN,
RAW_DATA_SPOT_MEDIAN,
RAW_DATA_BKG_STDDEV,
RAW_DATA_SPOT_STDDEV,
RAW_DATA_BKG_SUM,
RAW_DATA_SPOT_SUM,
}

16
sensospot_data/parser.py

@ -16,7 +16,8 @@ from .columns import (
META_DATA_EXPOSURE_ID, META_DATA_EXPOSURE_ID,
META_DATA_WELL_COLUMN, META_DATA_WELL_COLUMN,
PARSED_DATA_COLUMN_SET, PARSED_DATA_COLUMN_SET,
RAW_DATA_NORMALIZATION_MAP, META_DATA_ANALYSIS_NAME,
RAW_DATA_NUMERIC_COLUMNS,
RAW_DATA_COLUMNS_RENAME_MAP, RAW_DATA_COLUMNS_RENAME_MAP,
) )
from .parameters import add_optional_measurement_parameters from .parameters import add_optional_measurement_parameters
@ -75,8 +76,9 @@ def parse_file(data_file):
will race a ValueError, if metadata could not be extracted will race a ValueError, if metadata could not be extracted
""" """
measurement_info = _extract_measurement_info(Path(data_file)) data_path = Path(data_file).resolve()
data_frame = _parse_csv(data_file) measurement_info = _extract_measurement_info(data_path)
data_frame = _parse_csv(data_path)
# normalized well name # normalized well name
data_frame[ data_frame[
META_DATA_WELL_NAME META_DATA_WELL_NAME
@ -84,6 +86,7 @@ def parse_file(data_file):
data_frame[META_DATA_WELL_ROW] = measurement_info.row data_frame[META_DATA_WELL_ROW] = measurement_info.row
data_frame[META_DATA_WELL_COLUMN] = measurement_info.column data_frame[META_DATA_WELL_COLUMN] = measurement_info.column
data_frame[META_DATA_EXPOSURE_ID] = measurement_info.exposure data_frame[META_DATA_EXPOSURE_ID] = measurement_info.exposure
data_frame[META_DATA_ANALYSIS_NAME] = data_path.parent.name
return _cleanup_data_columns(data_frame) return _cleanup_data_columns(data_frame)
@ -133,16 +136,17 @@ def _sanity_check(data_frame):
f"Measurements are missing: {expected_rows} != {len(data_frame)}" f"Measurements are missing: {expected_rows} != {len(data_frame)}"
) )
# set the right data type for measurement columns # set the right data type for measurement columns
for raw_column in RAW_DATA_NORMALIZATION_MAP: for raw_column in RAW_DATA_NUMERIC_COLUMNS:
data_frame[raw_column] = pandas.to_numeric(data_frame[raw_column]) data_frame[raw_column] = pandas.to_numeric(data_frame[raw_column])
return data_frame return data_frame
def parse_folder(folder, quiet=False): def parse_folder(folder, quiet=False):
"""parses all csv files in a folder to one large dataframe""" """parses all csv files in a folder to one large dataframe"""
file_list = list_csv_files(Path(folder)) folder_path = Path(folder)
file_list = list_csv_files(folder_path)
data_frame = parse_multiple_files(file_list) data_frame = parse_multiple_files(file_list)
data_frame = add_optional_measurement_parameters(data_frame, folder) data_frame = add_optional_measurement_parameters(data_frame, folder_path)
if quiet: if quiet:
return data_frame return data_frame
return _sanity_check(data_frame) return _sanity_check(data_frame)

2
tests/test_parser.py

@ -172,6 +172,7 @@ def test_parse_file(example_file):
"Well.Row", "Well.Row",
"Well.Column", "Well.Column",
"Exposure.Id", "Exposure.Id",
"Analysis.Name",
} }
assert set(result.columns) == columns assert set(result.columns) == columns
@ -179,6 +180,7 @@ def test_parse_file(example_file):
assert result["Well.Row"][0] == "A" assert result["Well.Row"][0] == "A"
assert result["Well.Column"][0] == 1 assert result["Well.Column"][0] == 1
assert result["Exposure.Id"][0] == 1 assert result["Exposure.Id"][0] == 1
assert result["Analysis.Name"][0] == "mtp_wo_parameters"
def test_parse_file_raises_error(example_dir): def test_parse_file_raises_error(example_dir):

Loading…
Cancel
Save