From 64cee964857ab12e560683553c06f21a395cc0cf Mon Sep 17 00:00:00 2001
From: Holger Frey <frey@imtek.de>
Date: Fri, 25 Mar 2022 10:59:18 +0100
Subject: [PATCH] simplified adding parsed parameters

---
 sensospot_data/__init__.py   |   3 +-
 sensospot_data/parameters.py |  81 +++++++++-------------------
 sensospot_data/parser.py     |   3 +-
 tests/test_parameters.py     | 102 ++++++-----------------------------
 tests/test_sensospot_data.py |   3 +-
 5 files changed, 45 insertions(+), 147 deletions(-)

diff --git a/sensospot_data/__init__.py b/sensospot_data/__init__.py
index a6f155d..062b1cb 100644
--- a/sensospot_data/__init__.py
+++ b/sensospot_data/__init__.py
@@ -12,9 +12,8 @@ from pathlib import Path
 import click
 import pandas
 
-from . import columns
+from . import columns  # noqa: F401
 from .parser import parse_file, parse_folder  # noqa: F401
-from .parameters import ExposureInfo  # noqa: F401
 
 DEFAULT_OUTPUT_FILENAME = "collected_data.csv"
 
diff --git a/sensospot_data/parameters.py b/sensospot_data/parameters.py
index d714459..1d09d68 100644
--- a/sensospot_data/parameters.py
+++ b/sensospot_data/parameters.py
@@ -12,7 +12,6 @@ from defusedxml import ElementTree
 
 from . import columns
 
-ExposureInfo = namedtuple("ExposureInfo", ["channel", "time"])
 
 
 def _search_measurement_params_file(folder):
@@ -28,21 +27,28 @@ def _search_measurement_params_file(folder):
         return None
 
 
+def _get_channel_data(channel_node):
+    # child.tag == "ChannelConfig1"
+    exposure_id = int(channel_node.tag[-1])
+    # channel_description == "[Cy3|Cy5] Green"
+    description = channel_node.attrib["Description"]
+    exposure_channel = description.rsplit(" ", 1)[-1]
+    # floats can be used for exposure times, not only ints
+    exposure_time = float(channel_node.attrib["ExposureTimeMs"])
+    return {
+        columns.EXPOSURE_ID: exposure_id,
+        columns.PARAMETERS_CHANNEL: exposure_channel.lower(),
+        columns.PARAMETERS_TIME: exposure_time,
+    }
+
+
 def _parse_measurement_params(params_file):
     """parses the cannel informations from a settings file"""
     file_path = Path(params_file)
     with file_path.open("r") as file_handle:
         tree = ElementTree.parse(file_handle)
-    result = {}
-    for child in tree.find("Channels"):
-        # child.tag == "ChannelConfig1"
-        exposure = int(child.tag[-1])
-        channel_description = child.attrib["Description"]
-        # channel_description == "[Cy3|Cy5] Green"
-        channel = channel_description.rsplit(" ", 1)[-1]
-        time = float(child.attrib["ExposureTimeMs"])
-        result[exposure] = ExposureInfo(channel.lower(), time)
-    return result
+        data = [_get_channel_data(child) for child in tree.find("Channels")]
+    return pandas.DataFrame(data)
 
 
 def get_measurement_params(folder):
@@ -53,51 +59,16 @@ def get_measurement_params(folder):
     return None
 
 
-def _add_measurement_params(data_frame, params):
-    """adds measurement parameters to a data frame"""
-    keys = [columns.PARAMETERS_CHANNEL, columns.PARAMETERS_TIME]
-    map = {k: dict(zip(keys, v)) for k, v in params.items()}
-    return _apply_map(data_frame, map, columns.EXPOSURE_ID)
-
-
-def _apply_map(data_frame, map, index_col):
-    """adds a nested dictionary to a data frame on a specific index column
-
-    map:
-        keys: must be the same as the values in the index column,
-        values: dictionary with new column names as keys and the values
-
-    example:
-
-        >>> df = DataFrame(data={"MyIndex": [10, 10, 20]})
-        >>> map = {
-        ...     10: {"NewCol": "foo"},
-        ...     20: {"NewCol": "Bar"},
-        ... }
-        >>> apply_map(df, map, "MyIndex")
-            MyIndex  NewCol
-        0   10       foo
-        1   10       foo
-        2   20       bar
-
-    """
-    map_df = pandas.DataFrame.from_dict(map, orient="index")
-    return data_frame.merge(
-        map_df,
-        how="left",
-        left_on=index_col,
-        right_index=True,
-    )
-
-
 def add_optional_measurement_parameters(data_frame, folder):
     """adds measurement params to the data frame, if they could be parsed"""
     params = get_measurement_params(folder)
-    if params:
-        available_exposures = set(data_frame[columns.EXPOSURE_ID].unique())
-        if available_exposures == set(params.keys()):
-            return _add_measurement_params(data_frame, params)
-    else:
-        data_frame[columns.PARAMETERS_CHANNEL] = numpy.nan
-        data_frame[columns.PARAMETERS_TIME] = numpy.nan
+    if params is not None:
+        params_exposures = params[columns.EXPOSURE_ID].unique()
+        data_exposures = data_frame[columns.EXPOSURE_ID].unique()
+        if set(data_exposures) == set(params_exposures):
+            return data_frame.merge(params, how="left", on=columns.EXPOSURE_ID)
+    
+    # only executing if the parameters were not merged to the data frame
+    data_frame[columns.PARAMETERS_CHANNEL] = numpy.nan
+    data_frame[columns.PARAMETERS_TIME] = numpy.nan
     return data_frame
diff --git a/sensospot_data/parser.py b/sensospot_data/parser.py
index cbe4d12..1b8515a 100755
--- a/sensospot_data/parser.py
+++ b/sensospot_data/parser.py
@@ -9,9 +9,10 @@ from collections import namedtuple
 
 import pandas
 
-from . import columns 
+from . import columns
 from .parameters import add_optional_measurement_parameters
 
+
 REGEX_WELL = re.compile(
     r"""
     (?P<row>([A-Z]+))  # row name containing one or more letters
diff --git a/tests/test_parameters.py b/tests/test_parameters.py
index dc4a7c3..48f2b5c 100644
--- a/tests/test_parameters.py
+++ b/tests/test_parameters.py
@@ -1,3 +1,4 @@
+import pandas
 from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS
 
 
@@ -43,10 +44,13 @@ def test_parse_channel_info(example_dir):
     )
     result = _parse_measurement_params(params)
 
-    assert set(result.keys()) == {1, 2, 3}
-    assert result[1] == ("green", 100)
-    assert result[2] == ("red", 150)
-    assert result[3] == ("red", 15)
+    expected = pandas.DataFrame({
+        "Exposure.Id": [1,2,3],
+        "Parameters.Channel": ["green", "red", "red"],
+        "Parameters.Time" : [100.0, 150.0, 15.0]
+    })
+
+    assert result.equals(expected)
 
 
 def test_get_measurement_params_file_found(example_dir):
@@ -54,10 +58,13 @@ def test_get_measurement_params_file_found(example_dir):
 
     result = get_measurement_params(example_dir / EXAMPLE_DIR_WITH_PARAMS)
 
-    assert set(result.keys()) == {1, 2, 3}
-    assert result[1] == ("green", 100)
-    assert result[2] == ("red", 150)
-    assert result[3] == ("red", 15)
+    expected = pandas.DataFrame({
+        "Exposure.Id": [1,2,3],
+        "Parameters.Channel": ["green", "red", "red"],
+        "Parameters.Time" : [100.0, 150.0, 15.0]
+    })
+    
+    assert result.equals(expected)
 
 
 def test_get_measurement_params_file_not_found(example_dir):
@@ -68,28 +75,6 @@ def test_get_measurement_params_file_not_found(example_dir):
     assert result is None
 
 
-def test_add_measurement_params(exposure_df):
-    from sensospot_data.parameters import ExposureInfo, _add_measurement_params
-
-    params = {
-        1: ExposureInfo("red", 10),
-        2: ExposureInfo("green", 20),
-        3: ExposureInfo("blue", 50),
-    }
-
-    result = _add_measurement_params(exposure_df, params)
-
-    assert result["Exposure.Id"][0] == 1
-    assert result["Parameters.Channel"][0] == "red"
-    assert result["Parameters.Time"][0] == 10
-    assert result["Exposure.Id"][1] == 2
-    assert result["Parameters.Channel"][1] == "green"
-    assert result["Parameters.Time"][1] == 20
-    assert result["Exposure.Id"][2] == 3
-    assert result["Parameters.Channel"][2] == "blue"
-    assert result["Parameters.Time"][2] == 50
-
-
 def test_add_optional_measurement_parameters_with_params_file(
     exposure_df, example_dir
 ):
@@ -121,60 +106,3 @@ def test_add_optional_measurement_parameters_without_params_file(
         assert one_exposure_data_frame["Parameters.Time"].hasnans
 
 
-def test_apply_map(exposure_df):
-    from sensospot_data.parameters import _apply_map
-
-    map = {
-        1: {"SomeColumn": "A", "OtherColumn": 9},
-        2: {"SomeColumn": "B", "OtherColumn": 8},
-        3: {"SomeColumn": "C", "OtherColumn": 7},
-    }
-
-    result = _apply_map(exposure_df, map, "Exposure.Id")
-
-    for key, value in map.items():
-        mask = result["Exposure.Id"] == key
-        partial = result.loc[mask]
-        assert set(partial["SomeColumn"].unique()) == {value["SomeColumn"]}
-        assert set(partial["OtherColumn"].unique()) == {value["OtherColumn"]}
-
-
-def test_apply_map_keys_not_in_df(exposure_df):
-    from sensospot_data.parameters import _apply_map
-
-    map = {
-        1: {"some_col": "A", "other_col": 9},
-        2: {"some_col": "B", "other_col": 8},
-        3: {"some_col": "C", "other_col": 7},
-        4: {"some_col": "D", "other_col": 6},
-    }
-
-    result = _apply_map(exposure_df, map, "Exposure.Id")
-
-    for key in (1, 2, 3):
-        value = map[key]
-        mask = result["Exposure.Id"] == key
-        partial = result.loc[mask]
-        assert set(partial["some_col"].unique()) == {value["some_col"]}
-        assert set(partial["other_col"].unique()) == {value["other_col"]}
-
-    assert "D" not in set(result["some_col"].unique())
-    assert "6" not in set(result["other_col"].unique())
-
-
-def test_apply_map_not_all_keys_map_to_df(exposure_df):
-    from sensospot_data.parameters import _apply_map
-
-    map = {
-        1: {"some_col": "A", "other_col": 9},
-        3: {"some_col": "C", "other_col": 7},
-    }
-
-    result = _apply_map(exposure_df, map, "Exposure.Id")
-
-    assert not result.iloc[0].hasnans
-    assert result.iloc[1].hasnans
-    assert not result.iloc[2].hasnans
-
-    assert result["some_col"].hasnans
-    assert result["other_col"].hasnans
diff --git a/tests/test_sensospot_data.py b/tests/test_sensospot_data.py
index 967d743..90e4cd6 100644
--- a/tests/test_sensospot_data.py
+++ b/tests/test_sensospot_data.py
@@ -2,8 +2,7 @@
 
 
 def test_import_api():
-    from sensospot_data import ExposureInfo  # noqa: F401
     from sensospot_data import main  # noqa: F401
+    from sensospot_data import columns  # noqa: F401
     from sensospot_data import parse_file  # noqa: F401
     from sensospot_data import parse_folder  # noqa: F401
-    from sensospot_data import columns  # noqa: F401