From 938b0630690150d07a0e5e8d7af15e747d2f7d37 Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Tue, 3 Jan 2023 11:55:31 +0100 Subject: [PATCH] removed the "recordtime" module The "recordtime" module uses the assay result xml file for retrieving the Analyses.Datetime value. This value is only available in the assay result xml. Since the "parse_csv" module will only be a backup if this xml file is not present, a separate parser for the Analysis.Datetime is not needed anymore --- src/sensospot_parser/csv_parser.py | 8 ++- src/sensospot_parser/recordtime.py | 98 ------------------------------ tests/test_csv_parser.py | 21 +------ tests/test_recordtime.py | 98 ------------------------------ 4 files changed, 7 insertions(+), 218 deletions(-) delete mode 100644 src/sensospot_parser/recordtime.py delete mode 100644 tests/test_recordtime.py diff --git a/src/sensospot_parser/csv_parser.py b/src/sensospot_parser/csv_parser.py index 73f0656..d59518c 100644 --- a/src/sensospot_parser/csv_parser.py +++ b/src/sensospot_parser/csv_parser.py @@ -12,7 +12,6 @@ import pandas from . import columns from .parameters import add_measurement_parameters -from .recordtime import add_record_time PathLike = Union[str, pathlib.Path] @@ -225,8 +224,13 @@ def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame: data_frame = parse_multiple_files(file_list) except ValueError: raise ValueError(f"No sensospot data found in folder '{folder}'") + data_frame = add_measurement_parameters(data_frame, folder_path) - data_frame = add_record_time(data_frame, folder_path) + + # The csv parser is only used if the xml analysis file is not present + # the xml file would hold the Analysis.Datetime value + data_frame[columns.ANALYSIS_DATETIME] = None + if quiet: return data_frame return _sanity_check(data_frame) diff --git a/src/sensospot_parser/recordtime.py b/src/sensospot_parser/recordtime.py deleted file mode 100644 index 6bda6c8..0000000 --- a/src/sensospot_parser/recordtime.py +++ /dev/null @@ -1,98 +0,0 @@ -""" Sensospot Data Parser - -Parsing the numerical output from Sensovations Sensospot image analysis. -""" - -import pathlib -from typing import Tuple, Union, Iterable, Optional - -import numpy -import pandas -from defusedxml import ElementTree - -from . import columns - -PathLike = Union[str, pathlib.Path] - - -def _search_records_file(folder: PathLike) -> Optional[pathlib.Path]: - """searches for a the records xml file in a folder - - Args: - folder: directory to search - - Returns: - the path to the settings file or None - """ - folder_path = pathlib.Path(folder) - files = (item for item in folder_path.iterdir() if item.suffix == ".xsl") - xls_files = [path for path in files if not path.name.startswith(".")] - if len(xls_files) == 1: - xml_file = xls_files[0].with_suffix(".xml") - if xml_file.is_file(): - return xml_file - return None - - -def _iter_records(records_file: PathLike) -> Iterable[Tuple[str, str]]: - """parses the information from a records file - - Args: - records_file: path to the records file - - Yields: - tuples, filename as first element and the datetime string as second - """ - records_path = pathlib.Path(records_file) - tree = ElementTree.parse(records_path) - for channel_config in tree.findall(".//*[ImageFileName]"): - image_tag = channel_config.find("ImageFileName") - image_name = None if image_tag is None else image_tag.text - datetime_tag = channel_config.find("Timestamp") - datetime_str = None if datetime_tag is None else datetime_tag.text - yield image_name, datetime_str - - -def _parse_records_file(records_file: PathLike) -> pandas.DataFrame: - """parses the information from a records file - - Args: - records_file: path to the records file - - Returns: - pandas data frame with the parsed information - """ - data = _iter_records(records_file) - data_frame = pandas.DataFrame( - data, columns=[columns.ANALYSIS_IMAGE, columns.ANALYSIS_DATETIME] - ) - data_frame[columns.ANALYSIS_DATETIME] = pandas.to_datetime( - data_frame[columns.ANALYSIS_DATETIME] - ) - return data_frame - - -def add_record_time( - measurement: pandas.DataFrame, folder: PathLike -) -> pandas.DataFrame: - """adds the recoding datetime to the data frame - - The returned DataFrame will contain one more column for parsed datetime - - If the parameters could not be foundor do not match up with the - measurement data, the additional collumn will contain NaN. - - Argumentss: - measurement: the parsed measurement data - folder: the folder of the measurement data - - Returns: - the measurement data with parameters added - """ - record_path = _search_records_file(folder) - if record_path is None: - measurement[columns.ANALYSIS_DATETIME] = numpy.NAN - return measurement - - data_frame = _parse_records_file(record_path) - return measurement.merge(data_frame, how="left", on=columns.ANALYSIS_IMAGE) diff --git a/tests/test_csv_parser.py b/tests/test_csv_parser.py index 4ab9480..04b0190 100644 --- a/tests/test_csv_parser.py +++ b/tests/test_csv_parser.py @@ -4,11 +4,7 @@ import numpy import pytest -from .conftest import ( - EXAMPLE_DIR_WO_PARAMS, - EXAMPLE_DIR_WITH_PARAMS, - EXAMPLE_DIR_WITH_RECORD, -) +from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS @pytest.mark.parametrize( @@ -289,21 +285,6 @@ def test_parse_folder_no_datetime_records(example_dir): assert len(data_frame["Analysis.Datetime"].unique()) == 1 -def test_parse_folder_with_datetime_records(example_dir): - from sensospot_parser.csv_parser import parse_folder - - data_frame = parse_folder(example_dir / EXAMPLE_DIR_WITH_RECORD) - - assert len(data_frame) == 8 * 4 * 100 - assert len(data_frame["Well.Row"].unique()) == 2 - assert len(data_frame["Well.Column"].unique()) == 4 - assert len(data_frame["Exposure.Id"].unique()) == 4 - assert len(data_frame["Pos.Id"].unique()) == 100 - assert len(data_frame["Parameters.Channel"].unique()) == 1 - assert len(data_frame["Parameters.Time"].unique()) == 1 - assert len(data_frame["Analysis.Datetime"].unique()) == 8 - - def test_sanity_check_ok(example_dir): from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files diff --git a/tests/test_recordtime.py b/tests/test_recordtime.py deleted file mode 100644 index 6a998da..0000000 --- a/tests/test_recordtime.py +++ /dev/null @@ -1,98 +0,0 @@ -import pandas -import pytest - -from .conftest import EXAMPLE_DIR_WITH_PARAMS, EXAMPLE_DIR_WITH_RECORD - - -@pytest.fixture -def file_list(example_dir): - import pathlib - - path = pathlib.Path(example_dir / EXAMPLE_DIR_WITH_RECORD) - tifs = (i.with_suffix(".tif") for i in path.glob("*.csv")) - return [i.name for i in tifs] - - -def test_search_records_file_ok(example_dir): - from sensospot_parser.recordtime import _search_records_file - - result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) - - assert result.suffix == ".xml" - - -def test_search_records_file_not_found(example_dir): - from sensospot_parser.recordtime import _search_records_file - - result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_PARAMS) - - assert result is None - - -def test_iter_records(example_dir): - from sensospot_parser.recordtime import _iter_records, _search_records_file - - path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) - - result = list(_iter_records(path)) - - assert ( - result[0][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_A01_1.tif" - ) - assert result[0][1] == "3/7/2022 5:31:47 PM" - assert ( - result[-1][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_D04_4.tif" - ) - assert result[-1][1] == "3/7/2022 5:33:41 PM" - - -def test_parse_records_file(example_dir): - from sensospot_parser.recordtime import ( - _parse_records_file, - _search_records_file, - ) - - path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) - - result = _parse_records_file(path) - - assert isinstance(result, pandas.DataFrame) - assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] - assert len(result) == 64 - - -def test_add_record_time_ok(example_dir, file_list): - from sensospot_parser.recordtime import add_record_time - - df = pandas.DataFrame(file_list, columns=["Analysis.Image"]) - - result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD) - - assert len(df) == len(result) - assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] - assert not result["Analysis.Datetime"].hasnans - - -def test_add_record_time_unknown_file(example_dir, file_list): - from sensospot_parser.recordtime import add_record_time - - extended_list = file_list + ["unknown file"] - df = pandas.DataFrame(extended_list, columns=["Analysis.Image"]) - - result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD) - - assert len(df) == len(result) - assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] - assert result["Analysis.Datetime"].hasnans - - -def test_add_record_time_no_record_xml(example_dir, file_list): - from sensospot_parser.recordtime import add_record_time - - df = pandas.DataFrame(file_list, columns=["Analysis.Image"]) - - result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_PARAMS) - - assert len(df) == len(result) - assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] - assert result["Analysis.Datetime"].hasnans