diff --git a/sensospot_data/utils.py b/sensospot_data/utils.py index 8d22065..4fa5d86 100644 --- a/sensospot_data/utils.py +++ b/sensospot_data/utils.py @@ -1,4 +1,6 @@ -import numpy +from collections.abc import Mapping, Sequence + +import pandas from .columns import ( META_DATA_EXPOSURE_ID, @@ -16,6 +18,47 @@ def split_data_frame(data_frame, column): return {value: data_frame[mask] for value, mask in masks.items()} +def _is_list_or_tuple(something): + """ returns true if something is a list or tuple """ + if isinstance(something, Sequence): + return not isinstance(something, str) + return False + + +def _is_numerical(something): + """ returns true if something is an int or float """ + return isinstance(something, int) or isinstance(something, float) + + +def _check_valid_exposure_map_entry(entry): + """ raises a ValueError, if an exposure map entry is not suitable """ + if not _is_list_or_tuple(entry): + raise ValueError("Eposure Map: entries must be tuples or lists") + if not len(entry) == 2: + raise ValueError("Eposure Map: entries must consist of two items") + if not _is_numerical(entry[1]): + raise ValueError("Exposure Map: second entry must be numerical") + + +def _check_exposure_map(data_frame, exposure_map): + """checks if an exposure maps fits the requirements + + Will raise an ValueError if requirements are not met + """ + if not isinstance(exposure_map, Mapping): + raise ValueError("Exposure Map: map must be a dict") + exposure_ids_in_df = set(data_frame[META_DATA_EXPOSURE_ID].unique()) + exposure_ids_in_map = set(exposure_map.keys()) + if exposure_ids_in_df != exposure_ids_in_map: + msg = ( + f"Exposure Ids {exposure_ids_in_df} don't match " + f"provided map {exposure_ids_in_map}" + ) + raise ValueError(msg) + for entry in exposure_map.values(): + _check_valid_exposure_map_entry(entry) + + def _set_exposure_data_from_parameters(data_frame): """infer the exposures from measurement parameters @@ -51,17 +94,16 @@ def apply_exposure_map(data_frame, exposure_map=None): if exposure_map is None: return _set_exposure_data_from_parameters(data_frame) - existing = set(data_frame[META_DATA_EXPOSURE_ID].unique()) - provided = set(exposure_map.keys()) - if existing != provided: - raise ValueError( - f"Exposure Map differs from data frame: {provided} != {existing}" - ) - - data_frame[SETTINGS_EXPOSURE_CHANNEL] = numpy.nan - data_frame[SETTINGS_EXPOSURE_TIME] = numpy.nan - for exposure_id, exposure_info in exposure_map.items(): - mask = data_frame[META_DATA_EXPOSURE_ID] == exposure_id - data_frame.loc[mask, SETTINGS_EXPOSURE_CHANNEL] = exposure_info.channel - data_frame.loc[mask, SETTINGS_EXPOSURE_TIME] = exposure_info.time - return data_frame + _check_exposure_map(data_frame, exposure_map) + + exposure_df = pandas.DataFrame.from_dict( + exposure_map, + orient="index", + columns=[SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME], + ) + return data_frame.merge( + exposure_df, + how="left", + left_on=META_DATA_EXPOSURE_ID, + right_index=True, + ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 51387de..0609a27 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -15,6 +15,99 @@ def test_split_data_frame(data_frame_with_params): assert set(value_df["Well.Row"].unique()) == {key} +@pytest.mark.parametrize( + "value,expected", + [ + [[1, 2], True], + [(1, 2), True], + [{1, 2}, False], + [{1: 2}, False], + ["1, 2", False], + [None, False], + ], +) +def test_is_list_or_tuple(value, expected): + from sensospot_data.utils import _is_list_or_tuple + + result = _is_list_or_tuple(value) + + assert result is expected + + +@pytest.mark.parametrize( + "value,expected", + [ + [1, True], + [1.2, True], + [{1, 2}, False], + [{1: 2}, False], + ["1", False], + [None, False], + ], +) +def test_is_numerical(value, expected): + from sensospot_data.utils import _is_numerical + + result = _is_numerical(value) + + assert result is expected + + +def test_check_valid_exposure_map_entry_ok(): + from sensospot_data.utils import _check_valid_exposure_map_entry + + result = _check_valid_exposure_map_entry((2, 1)) + + assert result is None + + +@pytest.mark.parametrize( + "value", [[], [1], (1, 2, 3), {"a": 1, "b": 2}, ("A", "B")] +) +def test_check_valid_exposure_map_entry_raises_error(value): + from sensospot_data.utils import _check_valid_exposure_map_entry + + with pytest.raises(ValueError): + _check_valid_exposure_map_entry(value) + + +def test_check_exposure_map_ok(exposure_df): + from sensospot_data.utils import _check_exposure_map + + exposure_map = {1: ("A", 10), 2: ("B", 20), 3: ("C", 30)} + + result = _check_exposure_map(exposure_df, exposure_map) + + assert result is None + + +def test_check_exposure_map_wrong_type(exposure_df): + from sensospot_data.utils import _check_exposure_map + + exposure_map = [] + + with pytest.raises(ValueError): + _check_exposure_map(exposure_df, exposure_map) + + +def test_check_exposure_map_wrong_ids(exposure_df): + from sensospot_data.utils import _check_exposure_map + + exposure_map = {1: ("A", 10), 2: ("B", 20), 4: ("D", 40)} + + with pytest.raises(ValueError): + _check_exposure_map(exposure_df, exposure_map) + + +def test_check_exposure_map_invalid_entries(exposure_df): + from sensospot_data.utils import _check_exposure_map + + exposure_map = {1: ("A", 10), 2: ("B", 20), 3: "ERROR"} + + with pytest.raises(ValueError): + _check_exposure_map(exposure_df, exposure_map) + + def test_infer_exposure_from_parameters(data_frame_with_params): from sensospot_data.utils import _set_exposure_data_from_parameters @@ -62,11 +155,9 @@ def test_apply_exposure_map_raises_error(data_frame_with_params): "X": ExposureSetting("Cy5", 150), } - with pytest.raises(ValueError) as excinfo: + with pytest.raises(ValueError): apply_exposure_map(data_frame_with_params, exposure_map) - assert str(excinfo.value).startswith("Exposure Map differs") - def test_apply_exposure_map_from_parameters(data_frame_with_params): from sensospot_data.utils import apply_exposure_map