diff --git a/sensospot_data/__init__.py b/sensospot_data/__init__.py index 72158cb..1f921ea 100644 --- a/sensospot_data/__init__.py +++ b/sensospot_data/__init__.py @@ -5,6 +5,8 @@ Parsing the numerical output from Sensovations Sensospot image analysis. __version__ = "0.1.0" +VERSION_TABLE_NAME = f"v{__version__}".replace(".", "_") + from .parser import ( # noqa: F401 CACHE_FILE_NAME, diff --git a/sensospot_data/parameters.py b/sensospot_data/parameters.py new file mode 100644 index 0000000..a16e175 --- /dev/null +++ b/sensospot_data/parameters.py @@ -0,0 +1,74 @@ +""" Sensospot Data Parser + +Parsing the numerical output from Sensovations Sensospot image analysis. +""" + +from pathlib import Path +from collections import namedtuple + +import numpy +from defusedxml import ElementTree + +MeasurementParams = namedtuple("MeasurementParams", ["channel", "time"]) + + +def _search_measurement_params_file(folder): + """ searches for a exposure settings file in a folder """ + folder_path = Path(folder) + params_folder = folder_path / "Parameters" + if not params_folder.is_dir(): + return None + param_files = list(params_folder.glob("**/*.svexp")) + if len(param_files) == 1: + return param_files[0] + else: + return None + + +def _parse_measurement_params(params_file): + """ parses the cannel informations from a settings file """ + file_path = Path(params_file) + with file_path.open("r") as file_handle: + tree = ElementTree.parse(file_handle) + result = {} + for child in tree.find("Channels"): + # child.tag == "ChannelConfig1" + exposure = int(child.tag[-1]) + channel_description = child.attrib["Description"] + # channel_description == "[Cy3|Cy5] Green" + channel = channel_description.rsplit(" ", 1)[-1] + time = int(child.attrib["ExposureTimeMs"]) + result[exposure] = MeasurementParams(channel.lower(), time) + return result + + +def _get_measurement_params(folder): + """ returns measurement parameters """ + params_file = _search_measurement_params_file(folder) + if params_file is not None: + return _parse_measurement_params(params_file) + return None + + +def _add_measurement_params(data_frame, params): + """ adds measurement parameters to a data frame """ + for exposure_id, info in params.items(): + mask = data_frame["Exposure.Id"] == exposure_id + data_frame.loc[mask, "Parameters.Channel"] = info.channel + data_frame.loc[mask, "Parameters.Time"] = info.time + data_frame["Parameters.Channel"] = data_frame["Parameters.Channel"].astype( + "category" + ) + return data_frame + + +def add_optional_measurement_parameters(data_frame, folder): + """ adds measurement params to the data frame, if they could be parsed """ + data_frame["Parameters.Channel"] = numpy.nan + data_frame["Parameters.Time"] = numpy.nan + params = _get_measurement_params(folder) + if params: + available_exposures = set(data_frame["Exposure.Id"].unique()) + if available_exposures == set(params.keys()): + return _add_measurement_params(data_frame, params) + return data_frame diff --git a/sensospot_data/parser.py b/sensospot_data/parser.py index 729a2cb..809d715 100644 --- a/sensospot_data/parser.py +++ b/sensospot_data/parser.py @@ -9,6 +9,8 @@ from collections import namedtuple import pandas +from .parameters import add_optional_measurement_parameters + REGEX_WELL = re.compile( r""" (?P([A-Z]+)) # row name containing one or more letters @@ -31,9 +33,9 @@ FileInfo = namedtuple("FileInfo", ["row", "column", "exposure"]) def _get_cache_table_name(): """ automatic hdf5 table name, avoids a circular import """ - from . import __version__ + from . import VERSION_TABLE_NAME - return f"v{__version__}" + return VERSION_TABLE_NAME def _guess_decimal_separator(file_handle): @@ -91,6 +93,7 @@ def parse_multiple_files(file_list): data_frame = next(collection) for next_frame in collection: data_frame = data_frame.append(next_frame, ignore_index=True) + data_frame["Well.Row"] = data_frame["Well.Row"].astype("category") return data_frame @@ -118,7 +121,7 @@ def parse_folder(folder): """ parses all csv files in a folder to one large dataframe """ file_list = _list_csv_files(folder) data_frame = parse_multiple_files(file_list) - data_frame["Well.Row"] = data_frame["Well.Row"].astype("category") + data_frame = add_optional_measurement_parameters(data_frame, folder) return _sanity_check(data_frame) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..3858b31 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,37 @@ +""" test fixtures """ + +from pathlib import Path + +import pytest + +EXAMPLE_DIR_WO_PARAMS = "mtp_wo_parameters" +EXAMPLE_DIR_WITH_PARAMS = "mtp_with_parameters" + + +@pytest.fixture +def example_dir(request): + root_dir = Path(request.config.rootdir) + yield root_dir / "example_data" + + +@pytest.fixture +def example_file(example_dir): + data_dir = example_dir / EXAMPLE_DIR_WO_PARAMS + yield data_dir / "160218_SG2-013-001_Regen1_Cy3-100_1_A1_1.csv" + + +@pytest.fixture +def exposure_df(): + from pandas import DataFrame + + yield DataFrame(data={"Exposure.Id": [1, 2, 3]}) + + +@pytest.fixture +def dir_for_caching(tmpdir, example_file): + import shutil + + temp_path = Path(tmpdir) + dest = temp_path / example_file.name + shutil.copy(example_file, dest) + yield temp_path diff --git a/tests/test_parameters.py b/tests/test_parameters.py new file mode 100644 index 0000000..7772958 --- /dev/null +++ b/tests/test_parameters.py @@ -0,0 +1,125 @@ +from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS + + +def test_search_measurement_params_file_ok(example_dir): + from sensospot_data.parameters import _search_measurement_params_file + + result = _search_measurement_params_file( + example_dir / EXAMPLE_DIR_WITH_PARAMS + ) + + assert result.suffix == ".svexp" + + +def test_search_measurement_params_file_no_parameters_folder(example_dir): + from sensospot_data.parameters import _search_measurement_params_file + + result = _search_measurement_params_file( + example_dir / EXAMPLE_DIR_WO_PARAMS + ) + + assert result is None + + +def test_ssearch_measurement_params_file_parameters_file(tmpdir): + from sensospot_data.parameters import _search_measurement_params_file + + params_dir = tmpdir / "Parameters" + params_dir.mkdir() + + result = _search_measurement_params_file(tmpdir) + + assert result is None + + +def test_parse_channel_info(example_dir): + from sensospot_data.parameters import ( + _search_measurement_params_file, + _parse_measurement_params, + ) + + params = _search_measurement_params_file( + example_dir / EXAMPLE_DIR_WITH_PARAMS + ) + result = _parse_measurement_params(params) + + assert set(result.keys()) == {1, 2, 3} + assert result[1] == ("green", 100) + assert result[2] == ("red", 150) + assert result[3] == ("red", 15) + + +def test_get_measurement_params_file_found(example_dir): + from sensospot_data.parameters import _get_measurement_params + + result = _get_measurement_params(example_dir / EXAMPLE_DIR_WITH_PARAMS) + + assert set(result.keys()) == {1, 2, 3} + assert result[1] == ("green", 100) + assert result[2] == ("red", 150) + assert result[3] == ("red", 15) + + +def test_get_measurement_params_file_not_found(example_dir): + from sensospot_data.parameters import _get_measurement_params + + result = _get_measurement_params(example_dir / EXAMPLE_DIR_WO_PARAMS) + + assert result is None + + +def test_add_measurement_params(exposure_df): + from sensospot_data.parameters import ( + _add_measurement_params, + MeasurementParams, + ) + + params = { + 1: MeasurementParams("red", 10), + 2: MeasurementParams("green", 20), + 3: MeasurementParams("blue", 50), + } + + result = _add_measurement_params(exposure_df, params) + + assert result["Exposure.Id"][0] == 1 + assert result["Parameters.Channel"][0] == "red" + assert result["Parameters.Time"][0] == 10 + assert result["Exposure.Id"][1] == 2 + assert result["Parameters.Channel"][1] == "green" + assert result["Parameters.Time"][1] == 20 + assert result["Exposure.Id"][2] == 3 + assert result["Parameters.Channel"][2] == "blue" + assert result["Parameters.Time"][2] == 50 + + +def test_add_optional_measurement_parameters_with_params_file( + exposure_df, example_dir +): + from sensospot_data.parameters import add_optional_measurement_parameters + + folder = example_dir / EXAMPLE_DIR_WITH_PARAMS + add_optional_measurement_parameters(exposure_df, folder) + + expected = [(1, "green", 100), (2, "red", 150), (3, "red", 15)] + for exposure_id, channel, time in expected: + mask = exposure_df["Exposure.Id"] == exposure_id + example_row = exposure_df.loc[mask].iloc[0] + assert example_row["Parameters.Channel"] == channel + assert example_row["Parameters.Time"] == time + + +def test_add_optional_measurement_parameters_without_params_file( + exposure_df, example_dir +): + from sensospot_data.parameters import add_optional_measurement_parameters + from pandas import isnull + + folder = example_dir / EXAMPLE_DIR_WO_PARAMS + add_optional_measurement_parameters(exposure_df, folder) + + for exposure_id in range(1, 4): + mask = exposure_df["Exposure.Id"] == exposure_id + example_row = exposure_df.loc[mask].iloc[0] + assert isnull(example_row["Parameters.Channel"]) + assert isnull(example_row["Parameters.Time"]) diff --git a/tests/test_parser.py b/tests/test_parser.py index a6c6ef2..1e58f91 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,41 +1,10 @@ """ Stub file for testing the project """ -from pathlib import Path import numpy import pytest -EXAMPLE_DIR_WO_PARAMS = "mtp_wo_parameters" -EXAMPLE_DIR_WITH_PARAMS = "mtp_with_parameters" - - -@pytest.fixture -def example_dir(request): - root_dir = Path(request.config.rootdir) - yield root_dir / "example_data" - - -@pytest.fixture -def example_file(example_dir): - data_dir = example_dir / EXAMPLE_DIR_WO_PARAMS - yield data_dir / "160218_SG2-013-001_Regen1_Cy3-100_1_A1_1.csv" - - -@pytest.fixture -def exposure_df(): - from pandas import DataFrame - - yield DataFrame(data={"Exposure.Id": [1, 2, 3]}) - - -@pytest.fixture -def dir_for_caching(tmpdir, example_file): - import shutil - - temp_path = Path(tmpdir) - dest = temp_path / example_file.name - shutil.copy(example_file, dest) - yield temp_path +from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS @pytest.mark.parametrize( @@ -268,6 +237,8 @@ def test_parse_folder(example_dir): assert len(data_frame["Well.Column"].unique()) == 12 assert len(data_frame["Exposure.Id"].unique()) == 3 assert len(data_frame["Pos.Id"].unique()) == 100 + assert len(data_frame["Parameters.Channel"].unique()) == 2 + assert len(data_frame["Parameters.Time"].unique()) == 3 def test_sanity_check_ok(example_dir): @@ -353,12 +324,11 @@ def test_process_folder_read_cache_fails_silently( def test_get_cache_table_name(): from sensospot_data.parser import _get_cache_table_name - from sensospot_data import __version__ + from sensospot_data import VERSION_TABLE_NAME result = _get_cache_table_name() - assert result.startswith("v") - assert result[1:] == __version__ + assert result == VERSION_TABLE_NAME def test_process_folder_read_cache_no_cache_arg(dir_for_caching, exposure_df):