Browse Source
The "recordtime" module uses the assay result xml file for retrieving the Analyses.Datetime value. This value is only available in the assay result xml. Since the "parse_csv" module will only be a backup if this xml file is not present, a separate parser for the Analysis.Datetime is not needed anymorexmlparsing
Holger Frey
2 years ago
4 changed files with 7 additions and 218 deletions
@ -1,98 +0,0 @@
@@ -1,98 +0,0 @@
|
||||
""" Sensospot Data Parser |
||||
|
||||
Parsing the numerical output from Sensovations Sensospot image analysis. |
||||
""" |
||||
|
||||
import pathlib |
||||
from typing import Tuple, Union, Iterable, Optional |
||||
|
||||
import numpy |
||||
import pandas |
||||
from defusedxml import ElementTree |
||||
|
||||
from . import columns |
||||
|
||||
PathLike = Union[str, pathlib.Path] |
||||
|
||||
|
||||
def _search_records_file(folder: PathLike) -> Optional[pathlib.Path]: |
||||
"""searches for a the records xml file in a folder |
||||
|
||||
Args: |
||||
folder: directory to search |
||||
|
||||
Returns: |
||||
the path to the settings file or None |
||||
""" |
||||
folder_path = pathlib.Path(folder) |
||||
files = (item for item in folder_path.iterdir() if item.suffix == ".xsl") |
||||
xls_files = [path for path in files if not path.name.startswith(".")] |
||||
if len(xls_files) == 1: |
||||
xml_file = xls_files[0].with_suffix(".xml") |
||||
if xml_file.is_file(): |
||||
return xml_file |
||||
return None |
||||
|
||||
|
||||
def _iter_records(records_file: PathLike) -> Iterable[Tuple[str, str]]: |
||||
"""parses the information from a records file |
||||
|
||||
Args: |
||||
records_file: path to the records file |
||||
|
||||
Yields: |
||||
tuples, filename as first element and the datetime string as second |
||||
""" |
||||
records_path = pathlib.Path(records_file) |
||||
tree = ElementTree.parse(records_path) |
||||
for channel_config in tree.findall(".//*[ImageFileName]"): |
||||
image_tag = channel_config.find("ImageFileName") |
||||
image_name = None if image_tag is None else image_tag.text |
||||
datetime_tag = channel_config.find("Timestamp") |
||||
datetime_str = None if datetime_tag is None else datetime_tag.text |
||||
yield image_name, datetime_str |
||||
|
||||
|
||||
def _parse_records_file(records_file: PathLike) -> pandas.DataFrame: |
||||
"""parses the information from a records file |
||||
|
||||
Args: |
||||
records_file: path to the records file |
||||
|
||||
Returns: |
||||
pandas data frame with the parsed information |
||||
""" |
||||
data = _iter_records(records_file) |
||||
data_frame = pandas.DataFrame( |
||||
data, columns=[columns.ANALYSIS_IMAGE, columns.ANALYSIS_DATETIME] |
||||
) |
||||
data_frame[columns.ANALYSIS_DATETIME] = pandas.to_datetime( |
||||
data_frame[columns.ANALYSIS_DATETIME] |
||||
) |
||||
return data_frame |
||||
|
||||
|
||||
def add_record_time( |
||||
measurement: pandas.DataFrame, folder: PathLike |
||||
) -> pandas.DataFrame: |
||||
"""adds the recoding datetime to the data frame |
||||
|
||||
The returned DataFrame will contain one more column for parsed datetime |
||||
|
||||
If the parameters could not be foundor do not match up with the |
||||
measurement data, the additional collumn will contain NaN. |
||||
|
||||
Argumentss: |
||||
measurement: the parsed measurement data |
||||
folder: the folder of the measurement data |
||||
|
||||
Returns: |
||||
the measurement data with parameters added |
||||
""" |
||||
record_path = _search_records_file(folder) |
||||
if record_path is None: |
||||
measurement[columns.ANALYSIS_DATETIME] = numpy.NAN |
||||
return measurement |
||||
|
||||
data_frame = _parse_records_file(record_path) |
||||
return measurement.merge(data_frame, how="left", on=columns.ANALYSIS_IMAGE) |
@ -1,98 +0,0 @@
@@ -1,98 +0,0 @@
|
||||
import pandas |
||||
import pytest |
||||
|
||||
from .conftest import EXAMPLE_DIR_WITH_PARAMS, EXAMPLE_DIR_WITH_RECORD |
||||
|
||||
|
||||
@pytest.fixture |
||||
def file_list(example_dir): |
||||
import pathlib |
||||
|
||||
path = pathlib.Path(example_dir / EXAMPLE_DIR_WITH_RECORD) |
||||
tifs = (i.with_suffix(".tif") for i in path.glob("*.csv")) |
||||
return [i.name for i in tifs] |
||||
|
||||
|
||||
def test_search_records_file_ok(example_dir): |
||||
from sensospot_parser.recordtime import _search_records_file |
||||
|
||||
result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) |
||||
|
||||
assert result.suffix == ".xml" |
||||
|
||||
|
||||
def test_search_records_file_not_found(example_dir): |
||||
from sensospot_parser.recordtime import _search_records_file |
||||
|
||||
result = _search_records_file(example_dir / EXAMPLE_DIR_WITH_PARAMS) |
||||
|
||||
assert result is None |
||||
|
||||
|
||||
def test_iter_records(example_dir): |
||||
from sensospot_parser.recordtime import _iter_records, _search_records_file |
||||
|
||||
path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) |
||||
|
||||
result = list(_iter_records(path)) |
||||
|
||||
assert ( |
||||
result[0][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_A01_1.tif" |
||||
) |
||||
assert result[0][1] == "3/7/2022 5:31:47 PM" |
||||
assert ( |
||||
result[-1][0] == "220307_SN0801_CHECK-01_SL1,11,9,14_MS_1_1_D04_4.tif" |
||||
) |
||||
assert result[-1][1] == "3/7/2022 5:33:41 PM" |
||||
|
||||
|
||||
def test_parse_records_file(example_dir): |
||||
from sensospot_parser.recordtime import ( |
||||
_parse_records_file, |
||||
_search_records_file, |
||||
) |
||||
|
||||
path = _search_records_file(example_dir / EXAMPLE_DIR_WITH_RECORD) |
||||
|
||||
result = _parse_records_file(path) |
||||
|
||||
assert isinstance(result, pandas.DataFrame) |
||||
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] |
||||
assert len(result) == 64 |
||||
|
||||
|
||||
def test_add_record_time_ok(example_dir, file_list): |
||||
from sensospot_parser.recordtime import add_record_time |
||||
|
||||
df = pandas.DataFrame(file_list, columns=["Analysis.Image"]) |
||||
|
||||
result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD) |
||||
|
||||
assert len(df) == len(result) |
||||
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] |
||||
assert not result["Analysis.Datetime"].hasnans |
||||
|
||||
|
||||
def test_add_record_time_unknown_file(example_dir, file_list): |
||||
from sensospot_parser.recordtime import add_record_time |
||||
|
||||
extended_list = file_list + ["unknown file"] |
||||
df = pandas.DataFrame(extended_list, columns=["Analysis.Image"]) |
||||
|
||||
result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_RECORD) |
||||
|
||||
assert len(df) == len(result) |
||||
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] |
||||
assert result["Analysis.Datetime"].hasnans |
||||
|
||||
|
||||
def test_add_record_time_no_record_xml(example_dir, file_list): |
||||
from sensospot_parser.recordtime import add_record_time |
||||
|
||||
df = pandas.DataFrame(file_list, columns=["Analysis.Image"]) |
||||
|
||||
result = add_record_time(df, example_dir / EXAMPLE_DIR_WITH_PARAMS) |
||||
|
||||
assert len(df) == len(result) |
||||
assert list(result.columns) == ["Analysis.Image", "Analysis.Datetime"] |
||||
assert result["Analysis.Datetime"].hasnans |
Loading…
Reference in new issue