Browse Source

removed the "recordtime" module

The "recordtime" module uses the assay result xml file for retrieving the Analyses.Datetime value.
This value is only available in the assay result xml.
Since the "parse_csv" module will only be a backup if this xml file is not present, a separate parser for the Analysis.Datetime is not needed anymore
xmlparsing
Holger Frey 2 years ago
parent
commit
938b063069
  1. 8
      src/sensospot_parser/csv_parser.py
  2. 98
      src/sensospot_parser/recordtime.py
  3. 21
      tests/test_csv_parser.py
  4. 98
      tests/test_recordtime.py

8
src/sensospot_parser/csv_parser.py

@ -12,7 +12,6 @@ import pandas
from . import columns from . import columns
from .parameters import add_measurement_parameters from .parameters import add_measurement_parameters
from .recordtime import add_record_time
PathLike = Union[str, pathlib.Path] PathLike = Union[str, pathlib.Path]
@ -225,8 +224,13 @@ def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame:
data_frame = parse_multiple_files(file_list) data_frame = parse_multiple_files(file_list)
except ValueError: except ValueError:
raise ValueError(f"No sensospot data found in folder '{folder}'") raise ValueError(f"No sensospot data found in folder '{folder}'")
data_frame = add_measurement_parameters(data_frame, folder_path) data_frame = add_measurement_parameters(data_frame, folder_path)
data_frame = add_record_time(data_frame, folder_path)
# The csv parser is only used if the xml analysis file is not present
# the xml file would hold the Analysis.Datetime value
data_frame[columns.ANALYSIS_DATETIME] = None
if quiet: if quiet:
return data_frame return data_frame
return _sanity_check(data_frame) return _sanity_check(data_frame)

98
src/sensospot_parser/recordtime.py

@ -1,98 +0,0 @@
""" Sensospot Data Parser
Parsing the numerical output from Sensovations Sensospot image analysis.
"""
import pathlib
from typing import Tuple, Union, Iterable, Optional
import numpy
import pandas
from defusedxml import ElementTree
from . import columns
PathLike = Union[str, pathlib.Path]
def _search_records_file(folder: PathLike) -> Optional[pathlib.Path]:
"""searches for a the records xml file in a folder
Args:
folder: directory to search
Returns:
the path to the settings file or None
"""
folder_path = pathlib.Path(folder)
files = (item for item in folder_path.iterdir() if item.suffix == ".xsl")
xls_files = [path for path in files if not path.name.startswith(".")]
if len(xls_files) == 1:
xml_file = xls_files[0].with_suffix(".xml")
if xml_file.is_file():
return xml_file
return None
def _iter_records(records_file: PathLike) -> Iterable[Tuple[str, str]]:
"""parses the information from a records file
Args:
records_file: path to the records file
Yields:
tuples, filename as first element and the datetime string as second
"""
records_path = pathlib.Path(records_file)
tree = ElementTree.parse(records_path)
for channel_config in tree.findall(".//*[ImageFileName]"):
image_tag = channel_config.find("ImageFileName")
image_name = None if image_tag is None else image_tag.text
datetime_tag = channel_config.find("Timestamp")
datetime_str = None if datetime_tag is None else datetime_tag.text
yield image_name, datetime_str
def _parse_records_file(records_file: PathLike) -> pandas.DataFrame:
"""parses the information from a records file
Args:
records_file: path to the records file
Returns:
pandas data frame with the parsed information
"""
data = _iter_records(records_file)
data_frame = pandas.DataFrame(
data, columns=[columns.ANALYSIS_IMAGE, columns.ANALYSIS_DATETIME]
)
data_frame[columns.ANALYSIS_DATETIME] = pandas.to_datetime(
data_frame[columns.ANALYSIS_DATETIME]
)
return data_frame
def add_record_time(
measurement: pandas.DataFrame, folder: PathLike
) -> pandas.DataFrame:
"""adds the recoding datetime to the data frame
The returned DataFrame will contain one more column for parsed datetime
If the parameters could not be foundor do not match up with the
measurement data, the additional collumn will contain NaN.
Argumentss:
measurement: the parsed measurement data
folder: the folder of the measurement data
Returns:
the measurement data with parameters added
"""
record_path = _search_records_file(folder)
if record_path is None:
measurement[columns.ANALYSIS_DATETIME] = numpy.NAN
return measurement
data_frame = _parse_records_file(record_path)
return measurement.merge(data_frame, how="left", on=columns.ANALYSIS_IMAGE)

21
tests/test_csv_parser.py

@ -4,11 +4,7 @@
import numpy import numpy
import pytest import pytest
from .conftest import ( from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS
EXAMPLE_DIR_WO_PARAMS,
EXAMPLE_DIR_WITH_PARAMS,
EXAMPLE_DIR_WITH_RECORD,
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -289,21 +285,6 @@ def test_parse_folder_no_datetime_records(example_dir):
assert len(data_frame["Analysis.Datetime"].unique()) == 1 assert len(data_frame["Analysis.Datetime"].unique()) == 1
def test_parse_folder_with_datetime_records(example_dir):
from sensospot_parser.csv_parser import parse_folder
data_frame = parse_folder(example_dir / EXAMPLE_DIR_WITH_RECORD)
assert len(data_frame) == 8 * 4 * 100
assert len(data_frame["Well.Row"].unique()) == 2
assert len(data_frame["Well.Column"].unique()) == 4
assert len(data_frame["Exposure.Id"].unique()) == 4
assert len(data_frame["Pos.Id"].unique()) == 100
assert len(data_frame["Parameters.Channel"].unique()) == 1
assert len(data_frame["Parameters.Time"].unique()) == 1
assert len(data_frame["Analysis.Datetime"].unique()) == 8
def test_sanity_check_ok(example_dir): def test_sanity_check_ok(example_dir):
from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files

98
tests/test_recordtime.py

@ -1,98 +0,0 @@
import pandas
import pytest
from .conftest import EXAMPLE_DIR_WITH_PARAMS, EXAMPLE_DIR_WITH_RECORD
@pytest.fixture
def file_list(example_dir):
import pathlib
path = pathlib.Path(example_dir / EXAMPLE_DIR_WITH_RECORD)
tifs = (i.with_suffix(".tif") for i in path.glob("*.csv"))
return [i.name for i in tifs]
def test_search_records_file_ok(example_dir):
from sensospot_parser.recordtime import _search_records_file
result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD)
assert result.suffix == ".xml"
def test_search_records_file_not_found(example_dir):
from sensospot_parser.recordtime import _search_records_file
result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_PARAMS)
assert result is None
def test_iter_records(example_dir):
from sensospot_parser.recordtime import _iter_records, _search_records_file
path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD)
result = list(_iter_records(path))
assert (
result[0][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_A01_1.tif"
)
assert result[0][1] == "3/7/2022 5:31:47 PM"
assert (
result[-1][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_D04_4.tif"
)
assert result[-1][1] == "3/7/2022 5:33:41 PM"
def test_parse_records_file(example_dir):
from sensospot_parser.recordtime import (
_parse_records_file,
_search_records_file,
)
path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD)
result = _parse_records_file(path)
assert isinstance(result, pandas.DataFrame)
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"]
assert len(result) == 64
def test_add_record_time_ok(example_dir, file_list):
from sensospot_parser.recordtime import add_record_time
df = pandas.DataFrame(file_list, columns=["Analysis.Image"])
result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD)
assert len(df) == len(result)
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"]
assert not result["Analysis.Datetime"].hasnans
def test_add_record_time_unknown_file(example_dir, file_list):
from sensospot_parser.recordtime import add_record_time
extended_list = file_list + ["unknown file"]
df = pandas.DataFrame(extended_list, columns=["Analysis.Image"])
result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD)
assert len(df) == len(result)
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"]
assert result["Analysis.Datetime"].hasnans
def test_add_record_time_no_record_xml(example_dir, file_list):
from sensospot_parser.recordtime import add_record_time
df = pandas.DataFrame(file_list, columns=["Analysis.Image"])
result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_PARAMS)
assert len(df) == len(result)
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"]
assert result["Analysis.Datetime"].hasnans
Loading…
Cancel
Save