From 9359cc00af45bfec2d07b4040ac25e438e355d7d Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Tue, 18 Jan 2022 11:44:21 +0100 Subject: [PATCH] added api function apply_map this also lead to simplifications in _add_measurement_params and apply_exposure_map --- CHANGES.md | 16 +++++++++------ sensospot_data/__init__.py | 4 ++-- sensospot_data/parameters.py | 17 +++++++--------- sensospot_data/utils.py | 37 +++++++++++++++++++++++++++------- tests/test_parameters.py | 4 ++-- tests/test_sensovation_data.py | 1 + tests/test_utils.py | 18 +++++++++++++++++ 7 files changed, 70 insertions(+), 27 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 0500924..dd8d206 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,18 +11,22 @@ 0.5.1 ===== -- added standard aggregates functionality to utlis -- exposed `utils` api functions in package + - added standard aggregates functionality to utlis + - exposed `utils` api functions in package 0.5.2 ===== -- providing the normalized exposure time to `normalize_values` is now optional + - providing the normalized exposure time to `normalize_values` is now optional 0.5.3 ===== -- renaming function `split_data_frame` to `split` -- added --quite flag to cli to bypass sanity checks -- removed the aggregate functions from the utils module, is now a spearate project + - renaming function `split_data_frame` to `split` + - added --quite flag to cli to bypass sanity checks + - removed the aggregate functions from the utils module, is now a spearate project + +0.5.4 +===== + - added api function "apply_map" 0.4.0 - api changes diff --git a/sensospot_data/__init__.py b/sensospot_data/__init__.py index 52637d8..af73fe1 100644 --- a/sensospot_data/__init__.py +++ b/sensospot_data/__init__.py @@ -3,14 +3,14 @@ Parsing the numerical output from Sensovations Sensospot image analysis. """ -__version__ = "0.5.3" +__version__ = "0.5.4" from pathlib import Path import click -from .utils import split, apply_exposure_map # noqa: F401 +from .utils import split, apply_map, apply_exposure_map # noqa: F401 from .parser import parse_file, parse_folder # noqa: F401 from .parameters import ExposureInfo # noqa: F401 from .dynamic_range import blend, create_xdr, normalize_values # noqa: F401 diff --git a/sensospot_data/parameters.py b/sensospot_data/parameters.py index 372bbb5..36fdefc 100644 --- a/sensospot_data/parameters.py +++ b/sensospot_data/parameters.py @@ -14,6 +14,7 @@ from .columns import ( META_DATA_PARAMETERS_TIME, META_DATA_PARAMETERS_CHANNEL, ) +from .utils import apply_map ExposureInfo = namedtuple("ExposureInfo", ["channel", "time"]) @@ -58,23 +59,19 @@ def get_measurement_params(folder): def _add_measurement_params(data_frame, params): """ adds measurement parameters to a data frame """ - for exposure_id, info in params.items(): - mask = data_frame[META_DATA_EXPOSURE_ID] == exposure_id - data_frame.loc[mask, META_DATA_PARAMETERS_CHANNEL] = info.channel - data_frame.loc[mask, META_DATA_PARAMETERS_TIME] = info.time - data_frame[META_DATA_PARAMETERS_CHANNEL] = data_frame[ - META_DATA_PARAMETERS_CHANNEL - ].astype("category") - return data_frame + columns=[META_DATA_PARAMETERS_CHANNEL, META_DATA_PARAMETERS_TIME] + map = {k: dict(zip(columns, v)) for k, v in params.items()} + return apply_map(data_frame, map, META_DATA_EXPOSURE_ID) def add_optional_measurement_parameters(data_frame, folder): """ adds measurement params to the data frame, if they could be parsed """ - data_frame[META_DATA_PARAMETERS_CHANNEL] = numpy.nan - data_frame[META_DATA_PARAMETERS_TIME] = numpy.nan params = get_measurement_params(folder) if params: available_exposures = set(data_frame[META_DATA_EXPOSURE_ID].unique()) if available_exposures == set(params.keys()): return _add_measurement_params(data_frame, params) + else: + data_frame[META_DATA_PARAMETERS_CHANNEL] = numpy.nan + data_frame[META_DATA_PARAMETERS_TIME] = numpy.nan return data_frame diff --git a/sensospot_data/utils.py b/sensospot_data/utils.py index 632fe79..af5aa6b 100644 --- a/sensospot_data/utils.py +++ b/sensospot_data/utils.py @@ -104,14 +104,37 @@ def apply_exposure_map(data_frame, exposure_map=None): _check_exposure_map(data_frame, exposure_map) - exposure_df = pandas.DataFrame.from_dict( - exposure_map, - orient="index", - columns=[SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME], - ) + columns=[SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME] + map = {k: dict(zip(columns, v)) for k, v in exposure_map.items()} + + return apply_map(data_frame, map, META_DATA_EXPOSURE_ID) + + +def apply_map(data_frame, map, index_col): + """adds a nested dictionary to a data frame on a specific index column + + map: + keys: must be the same as the values in the index column, + values: dictionary with new column names as keys and the values + + example: + + >>> df = DataFrame(data={"MyIndex": [10, 10, 20]}) + >>> map = { + ... 10: {"NewCol": "foo"}, + ... 20: {"NewCol": "Bar"}, + ... } + >>> apply_map(df, map, "MyIndex") + MyIndex NewCol + 0 10 foo + 1 10 foo + 2 20 bar + + """ + map_df = pandas.DataFrame.from_dict(map, orient="index") return data_frame.merge( - exposure_df, + map_df, how="left", - left_on=META_DATA_EXPOSURE_ID, + left_on=index_col, right_index=True, ) diff --git a/tests/test_parameters.py b/tests/test_parameters.py index d48b7c1..815c511 100644 --- a/tests/test_parameters.py +++ b/tests/test_parameters.py @@ -96,7 +96,7 @@ def test_add_optional_measurement_parameters_with_params_file( from sensospot_data.parameters import add_optional_measurement_parameters folder = example_dir / EXAMPLE_DIR_WITH_PARAMS - add_optional_measurement_parameters(exposure_df, folder) + exposure_df = add_optional_measurement_parameters(exposure_df, folder) expected = [(1, "green", 100), (2, "red", 150), (3, "red", 15)] for exposure_id, channel, time in expected: @@ -112,7 +112,7 @@ def test_add_optional_measurement_parameters_without_params_file( from sensospot_data.parameters import add_optional_measurement_parameters folder = example_dir / EXAMPLE_DIR_WO_PARAMS - add_optional_measurement_parameters(exposure_df, folder) + exposure_df = add_optional_measurement_parameters(exposure_df, folder) for exposure_id in range(1, 4): mask = exposure_df["Exposure.Id"] == exposure_id diff --git a/tests/test_sensovation_data.py b/tests/test_sensovation_data.py index 267a738..6ff602a 100644 --- a/tests/test_sensovation_data.py +++ b/tests/test_sensovation_data.py @@ -11,3 +11,4 @@ def test_import_api(): from sensospot_data import parse_folder # noqa: F401 from sensospot_data import normalize_values # noqa: F401 from sensospot_data import apply_exposure_map # noqa: F401 + from sensospot_data import apply_map # noqa: F401 diff --git a/tests/test_utils.py b/tests/test_utils.py index bcd5399..e431727 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -177,3 +177,21 @@ def test_apply_exposure_map_from_parameters_raises_error( apply_exposure_map(data_frame_without_params, None) assert str(excinfo.value).startswith("Exposure Map: measurement") + + +def test_apply_map(exposure_df): + from sensospot_data.utils import apply_map + + map = { + 1: {"SomeColumn": "A", "OtherColumn": 9}, + 2: {"SomeColumn": "B", "OtherColumn": 8}, + 3: {"SomeColumn": "C", "OtherColumn": 7}, + } + + result = apply_map(exposure_df, map, "Exposure.Id") + + for key, value in map.items(): + mask = result["Exposure.Id"] == key + partial = result.loc[mask] + assert set(partial["SomeColumn"].unique()) == {value["SomeColumn"]} + assert set(partial["OtherColumn"].unique()) == {value["OtherColumn"]} \ No newline at end of file