diff --git a/README.md b/README.md index c0e198e..5f20926 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ from .parser import parse_file, parse_folder # noqa: F401 Splits a data frame based on the unique values of a column. Will return a dict, with the unique values as keys and the corresponding data frame as value - - **apply_map(data_frame, map, idex_col)** + - **apply_map(data_frame, map, index_col)** Adds information provided in the nested dictionary `map` to a data frame, based on the values in the data_frame column `index_col`. - **apply_exposure_map(data_frame, exposure_map)** diff --git a/tests/test_utils.py b/tests/test_utils.py index 517b77c..5913376 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -195,3 +195,44 @@ def test_apply_map(exposure_df): partial = result.loc[mask] assert set(partial["SomeColumn"].unique()) == {value["SomeColumn"]} assert set(partial["OtherColumn"].unique()) == {value["OtherColumn"]} + + +def test_apply_map_keys_not_in_df(exposure_df): + from sensospot_data.utils import apply_map + + map = { + 1: {"some_col": "A", "other_col": 9}, + 2: {"some_col": "B", "other_col": 8}, + 3: {"some_col": "C", "other_col": 7}, + 4: {"some_col": "D", "other_col": 6}, + } + + result = apply_map(exposure_df, map, "Exposure.Id") + + for key in (1, 2, 3): + value = map[key] + mask = result["Exposure.Id"] == key + partial = result.loc[mask] + assert set(partial["some_col"].unique()) == {value["some_col"]} + assert set(partial["other_col"].unique()) == {value["other_col"]} + + assert "D" not in set(result["some_col"].unique()) + assert "6" not in set(result["other_col"].unique()) + + +def test_apply_map_not_all_keys_map_to_df(exposure_df): + from sensospot_data.utils import apply_map + + map = { + 1: {"some_col": "A", "other_col": 9}, + 3: {"some_col": "C", "other_col": 7}, + } + + result = apply_map(exposure_df, map, "Exposure.Id") + + assert not result.iloc[0].hasnans + assert result.iloc[1].hasnans + assert not result.iloc[2].hasnans + + assert result["some_col"].hasnans + assert result["other_col"].hasnans