Browse Source
The "recordtime" module uses the assay result xml file for retrieving the Analyses.Datetime value. This value is only available in the assay result xml. Since the "parse_csv" module will only be a backup if this xml file is not present, a separate parser for the Analysis.Datetime is not needed anymorexmlparsing
Holger Frey
2 years ago
4 changed files with 7 additions and 218 deletions
@ -1,98 +0,0 @@ |
|||||||
""" Sensospot Data Parser |
|
||||||
|
|
||||||
Parsing the numerical output from Sensovations Sensospot image analysis. |
|
||||||
""" |
|
||||||
|
|
||||||
import pathlib |
|
||||||
from typing import Tuple, Union, Iterable, Optional |
|
||||||
|
|
||||||
import numpy |
|
||||||
import pandas |
|
||||||
from defusedxml import ElementTree |
|
||||||
|
|
||||||
from . import columns |
|
||||||
|
|
||||||
PathLike = Union[str, pathlib.Path] |
|
||||||
|
|
||||||
|
|
||||||
def _search_records_file(folder: PathLike) -> Optional[pathlib.Path]: |
|
||||||
"""searches for a the records xml file in a folder |
|
||||||
|
|
||||||
Args: |
|
||||||
folder: directory to search |
|
||||||
|
|
||||||
Returns: |
|
||||||
the path to the settings file or None |
|
||||||
""" |
|
||||||
folder_path = pathlib.Path(folder) |
|
||||||
files = (item for item in folder_path.iterdir() if item.suffix == ".xsl") |
|
||||||
xls_files = [path for path in files if not path.name.startswith(".")] |
|
||||||
if len(xls_files) == 1: |
|
||||||
xml_file = xls_files[0].with_suffix(".xml") |
|
||||||
if xml_file.is_file(): |
|
||||||
return xml_file |
|
||||||
return None |
|
||||||
|
|
||||||
|
|
||||||
def _iter_records(records_file: PathLike) -> Iterable[Tuple[str, str]]: |
|
||||||
"""parses the information from a records file |
|
||||||
|
|
||||||
Args: |
|
||||||
records_file: path to the records file |
|
||||||
|
|
||||||
Yields: |
|
||||||
tuples, filename as first element and the datetime string as second |
|
||||||
""" |
|
||||||
records_path = pathlib.Path(records_file) |
|
||||||
tree = ElementTree.parse(records_path) |
|
||||||
for channel_config in tree.findall(".//*[ImageFileName]"): |
|
||||||
image_tag = channel_config.find("ImageFileName") |
|
||||||
image_name = None if image_tag is None else image_tag.text |
|
||||||
datetime_tag = channel_config.find("Timestamp") |
|
||||||
datetime_str = None if datetime_tag is None else datetime_tag.text |
|
||||||
yield image_name, datetime_str |
|
||||||
|
|
||||||
|
|
||||||
def _parse_records_file(records_file: PathLike) -> pandas.DataFrame: |
|
||||||
"""parses the information from a records file |
|
||||||
|
|
||||||
Args: |
|
||||||
records_file: path to the records file |
|
||||||
|
|
||||||
Returns: |
|
||||||
pandas data frame with the parsed information |
|
||||||
""" |
|
||||||
data = _iter_records(records_file) |
|
||||||
data_frame = pandas.DataFrame( |
|
||||||
data, columns=[columns.ANALYSIS_IMAGE, columns.ANALYSIS_DATETIME] |
|
||||||
) |
|
||||||
data_frame[columns.ANALYSIS_DATETIME] = pandas.to_datetime( |
|
||||||
data_frame[columns.ANALYSIS_DATETIME] |
|
||||||
) |
|
||||||
return data_frame |
|
||||||
|
|
||||||
|
|
||||||
def add_record_time( |
|
||||||
measurement: pandas.DataFrame, folder: PathLike |
|
||||||
) -> pandas.DataFrame: |
|
||||||
"""adds the recoding datetime to the data frame |
|
||||||
|
|
||||||
The returned DataFrame will contain one more column for parsed datetime |
|
||||||
|
|
||||||
If the parameters could not be foundor do not match up with the |
|
||||||
measurement data, the additional collumn will contain NaN. |
|
||||||
|
|
||||||
Argumentss: |
|
||||||
measurement: the parsed measurement data |
|
||||||
folder: the folder of the measurement data |
|
||||||
|
|
||||||
Returns: |
|
||||||
the measurement data with parameters added |
|
||||||
""" |
|
||||||
record_path = _search_records_file(folder) |
|
||||||
if record_path is None: |
|
||||||
measurement[columns.ANALYSIS_DATETIME] = numpy.NAN |
|
||||||
return measurement |
|
||||||
|
|
||||||
data_frame = _parse_records_file(record_path) |
|
||||||
return measurement.merge(data_frame, how="left", on=columns.ANALYSIS_IMAGE) |
|
@ -1,98 +0,0 @@ |
|||||||
import pandas |
|
||||||
import pytest |
|
||||||
|
|
||||||
from .conftest import EXAMPLE_DIR_WITH_PARAMS, EXAMPLE_DIR_WITH_RECORD |
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture |
|
||||||
def file_list(example_dir): |
|
||||||
import pathlib |
|
||||||
|
|
||||||
path = pathlib.Path(example_dir / EXAMPLE_DIR_WITH_RECORD) |
|
||||||
tifs = (i.with_suffix(".tif") for i in path.glob("*.csv")) |
|
||||||
return [i.name for i in tifs] |
|
||||||
|
|
||||||
|
|
||||||
def test_search_records_file_ok(example_dir): |
|
||||||
from sensospot_parser.recordtime import _search_records_file |
|
||||||
|
|
||||||
result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) |
|
||||||
|
|
||||||
assert result.suffix == ".xml" |
|
||||||
|
|
||||||
|
|
||||||
def test_search_records_file_not_found(example_dir): |
|
||||||
from sensospot_parser.recordtime import _search_records_file |
|
||||||
|
|
||||||
result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_PARAMS) |
|
||||||
|
|
||||||
assert result is None |
|
||||||
|
|
||||||
|
|
||||||
def test_iter_records(example_dir): |
|
||||||
from sensospot_parser.recordtime import _iter_records, _search_records_file |
|
||||||
|
|
||||||
path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) |
|
||||||
|
|
||||||
result = list(_iter_records(path)) |
|
||||||
|
|
||||||
assert ( |
|
||||||
result[0][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_A01_1.tif" |
|
||||||
) |
|
||||||
assert result[0][1] == "3/7/2022 5:31:47 PM" |
|
||||||
assert ( |
|
||||||
result[-1][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_D04_4.tif" |
|
||||||
) |
|
||||||
assert result[-1][1] == "3/7/2022 5:33:41 PM" |
|
||||||
|
|
||||||
|
|
||||||
def test_parse_records_file(example_dir): |
|
||||||
from sensospot_parser.recordtime import ( |
|
||||||
_parse_records_file, |
|
||||||
_search_records_file, |
|
||||||
) |
|
||||||
|
|
||||||
path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) |
|
||||||
|
|
||||||
result = _parse_records_file(path) |
|
||||||
|
|
||||||
assert isinstance(result, pandas.DataFrame) |
|
||||||
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] |
|
||||||
assert len(result) == 64 |
|
||||||
|
|
||||||
|
|
||||||
def test_add_record_time_ok(example_dir, file_list): |
|
||||||
from sensospot_parser.recordtime import add_record_time |
|
||||||
|
|
||||||
df = pandas.DataFrame(file_list, columns=["Analysis.Image"]) |
|
||||||
|
|
||||||
result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD) |
|
||||||
|
|
||||||
assert len(df) == len(result) |
|
||||||
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] |
|
||||||
assert not result["Analysis.Datetime"].hasnans |
|
||||||
|
|
||||||
|
|
||||||
def test_add_record_time_unknown_file(example_dir, file_list): |
|
||||||
from sensospot_parser.recordtime import add_record_time |
|
||||||
|
|
||||||
extended_list = file_list + ["unknown file"] |
|
||||||
df = pandas.DataFrame(extended_list, columns=["Analysis.Image"]) |
|
||||||
|
|
||||||
result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD) |
|
||||||
|
|
||||||
assert len(df) == len(result) |
|
||||||
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] |
|
||||||
assert result["Analysis.Datetime"].hasnans |
|
||||||
|
|
||||||
|
|
||||||
def test_add_record_time_no_record_xml(example_dir, file_list): |
|
||||||
from sensospot_parser.recordtime import add_record_time |
|
||||||
|
|
||||||
df = pandas.DataFrame(file_list, columns=["Analysis.Image"]) |
|
||||||
|
|
||||||
result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_PARAMS) |
|
||||||
|
|
||||||
assert len(df) == len(result) |
|
||||||
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] |
|
||||||
assert result["Analysis.Datetime"].hasnans |
|
Loading…
Reference in new issue