diff --git a/src/sensospot_parser/csv_parser.py b/src/sensospot_parser/csv_parser.py index 73f0656..d59518c 100644 --- a/src/sensospot_parser/csv_parser.py +++ b/src/sensospot_parser/csv_parser.py @@ -12,7 +12,6 @@ import pandas from . import columns from .parameters import add_measurement_parameters -from .recordtime import add_record_time PathLike = Union[str, pathlib.Path] @@ -225,8 +224,13 @@ def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame: data_frame = parse_multiple_files(file_list) except ValueError: raise ValueError(f"No sensospot data found in folder '{folder}'") + data_frame = add_measurement_parameters(data_frame, folder_path) - data_frame = add_record_time(data_frame, folder_path) + + # The csv parser is only used if the xml analysis file is not present + # the xml file would hold the Analysis.Datetime value + data_frame[columns.ANALYSIS_DATETIME] = None + if quiet: return data_frame return _sanity_check(data_frame) diff --git a/src/sensospot_parser/recordtime.py b/src/sensospot_parser/recordtime.py deleted file mode 100644 index 6bda6c8..0000000 --- a/src/sensospot_parser/recordtime.py +++ /dev/null @@ -1,98 +0,0 @@ -""" Sensospot Data Parser - -Parsing the numerical output from Sensovations Sensospot image analysis. -""" - -import pathlib -from typing import Tuple, Union, Iterable, Optional - -import numpy -import pandas -from defusedxml import ElementTree - -from . import columns - -PathLike = Union[str, pathlib.Path] - - -def _search_records_file(folder: PathLike) -> Optional[pathlib.Path]: - """searches for a the records xml file in a folder - - Args: - folder: directory to search - - Returns: - the path to the settings file or None - """ - folder_path = pathlib.Path(folder) - files = (item for item in folder_path.iterdir() if item.suffix == ".xsl") - xls_files = [path for path in files if not path.name.startswith(".")] - if len(xls_files) == 1: - xml_file = xls_files[0].with_suffix(".xml") - if xml_file.is_file(): - return xml_file - return None - - -def _iter_records(records_file: PathLike) -> Iterable[Tuple[str, str]]: - """parses the information from a records file - - Args: - records_file: path to the records file - - Yields: - tuples, filename as first element and the datetime string as second - """ - records_path = pathlib.Path(records_file) - tree = ElementTree.parse(records_path) - for channel_config in tree.findall(".//*[ImageFileName]"): - image_tag = channel_config.find("ImageFileName") - image_name = None if image_tag is None else image_tag.text - datetime_tag = channel_config.find("Timestamp") - datetime_str = None if datetime_tag is None else datetime_tag.text - yield image_name, datetime_str - - -def _parse_records_file(records_file: PathLike) -> pandas.DataFrame: - """parses the information from a records file - - Args: - records_file: path to the records file - - Returns: - pandas data frame with the parsed information - """ - data = _iter_records(records_file) - data_frame = pandas.DataFrame( - data, columns=[columns.ANALYSIS_IMAGE, columns.ANALYSIS_DATETIME] - ) - data_frame[columns.ANALYSIS_DATETIME] = pandas.to_datetime( - data_frame[columns.ANALYSIS_DATETIME] - ) - return data_frame - - -def add_record_time( - measurement: pandas.DataFrame, folder: PathLike -) -> pandas.DataFrame: - """adds the recoding datetime to the data frame - - The returned DataFrame will contain one more column for parsed datetime - - If the parameters could not be foundor do not match up with the - measurement data, the additional collumn will contain NaN. - - Argumentss: - measurement: the parsed measurement data - folder: the folder of the measurement data - - Returns: - the measurement data with parameters added - """ - record_path = _search_records_file(folder) - if record_path is None: - measurement[columns.ANALYSIS_DATETIME] = numpy.NAN - return measurement - - data_frame = _parse_records_file(record_path) - return measurement.merge(data_frame, how="left", on=columns.ANALYSIS_IMAGE) diff --git a/tests/test_csv_parser.py b/tests/test_csv_parser.py index 4ab9480..04b0190 100644 --- a/tests/test_csv_parser.py +++ b/tests/test_csv_parser.py @@ -4,11 +4,7 @@ import numpy import pytest -from .conftest import ( - EXAMPLE_DIR_WO_PARAMS, - EXAMPLE_DIR_WITH_PARAMS, - EXAMPLE_DIR_WITH_RECORD, -) +from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS @pytest.mark.parametrize( @@ -289,21 +285,6 @@ def test_parse_folder_no_datetime_records(example_dir): assert len(data_frame["Analysis.Datetime"].unique()) == 1 -def test_parse_folder_with_datetime_records(example_dir): - from sensospot_parser.csv_parser import parse_folder - - data_frame = parse_folder(example_dir / EXAMPLE_DIR_WITH_RECORD) - - assert len(data_frame) == 8 * 4 * 100 - assert len(data_frame["Well.Row"].unique()) == 2 - assert len(data_frame["Well.Column"].unique()) == 4 - assert len(data_frame["Exposure.Id"].unique()) == 4 - assert len(data_frame["Pos.Id"].unique()) == 100 - assert len(data_frame["Parameters.Channel"].unique()) == 1 - assert len(data_frame["Parameters.Time"].unique()) == 1 - assert len(data_frame["Analysis.Datetime"].unique()) == 8 - - def test_sanity_check_ok(example_dir): from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files diff --git a/tests/test_recordtime.py b/tests/test_recordtime.py deleted file mode 100644 index 6a998da..0000000 --- a/tests/test_recordtime.py +++ /dev/null @@ -1,98 +0,0 @@ -import pandas -import pytest - -from .conftest import EXAMPLE_DIR_WITH_PARAMS, EXAMPLE_DIR_WITH_RECORD - - -@pytest.fixture -def file_list(example_dir): - import pathlib - - path = pathlib.Path(example_dir / EXAMPLE_DIR_WITH_RECORD) - tifs = (i.with_suffix(".tif") for i in path.glob("*.csv")) - return [i.name for i in tifs] - - -def test_search_records_file_ok(example_dir): - from sensospot_parser.recordtime import _search_records_file - - result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) - - assert result.suffix == ".xml" - - -def test_search_records_file_not_found(example_dir): - from sensospot_parser.recordtime import _search_records_file - - result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_PARAMS) - - assert result is None - - -def test_iter_records(example_dir): - from sensospot_parser.recordtime import _iter_records, _search_records_file - - path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) - - result = list(_iter_records(path)) - - assert ( - result[0][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_A01_1.tif" - ) - assert result[0][1] == "3/7/2022 5:31:47 PM" - assert ( - result[-1][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_D04_4.tif" - ) - assert result[-1][1] == "3/7/2022 5:33:41 PM" - - -def test_parse_records_file(example_dir): - from sensospot_parser.recordtime import ( - _parse_records_file, - _search_records_file, - ) - - path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) - - result = _parse_records_file(path) - - assert isinstance(result, pandas.DataFrame) - assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] - assert len(result) == 64 - - -def test_add_record_time_ok(example_dir, file_list): - from sensospot_parser.recordtime import add_record_time - - df = pandas.DataFrame(file_list, columns=["Analysis.Image"]) - - result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD) - - assert len(df) == len(result) - assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] - assert not result["Analysis.Datetime"].hasnans - - -def test_add_record_time_unknown_file(example_dir, file_list): - from sensospot_parser.recordtime import add_record_time - - extended_list = file_list + ["unknown file"] - df = pandas.DataFrame(extended_list, columns=["Analysis.Image"]) - - result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD) - - assert len(df) == len(result) - assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] - assert result["Analysis.Datetime"].hasnans - - -def test_add_record_time_no_record_xml(example_dir, file_list): - from sensospot_parser.recordtime import add_record_time - - df = pandas.DataFrame(file_list, columns=["Analysis.Image"]) - - result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_PARAMS) - - assert len(df) == len(result) - assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] - assert result["Analysis.Datetime"].hasnans