From 64098856d1174e4405df04614a8d63c408c98e4a Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Thu, 9 Nov 2023 10:44:43 +0100 Subject: [PATCH] If a DataFrame is split on a column containing NaN values, the rows with NaN values will now be included in the results --- pyproject.toml | 1 + src/sensospot_tools/__init__.py | 2 +- src/sensospot_tools/selection.py | 5 ++++- tests/test_selection.py | 16 +++++++++++++++- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2a0ea5a..1f787a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ Source = "https://git.cpi.imtek.uni-freiburg.de/holgi/sensospot_tools.git" [project.optional-dependencies] dev = [ "black", + "flit", "keyring", "pre-commit", "ruff", diff --git a/src/sensospot_tools/__init__.py b/src/sensospot_tools/__init__.py index 5586259..2bedf03 100644 --- a/src/sensospot_tools/__init__.py +++ b/src/sensospot_tools/__init__.py @@ -3,7 +3,7 @@ Some small tools for working with parsed Sensospot data. """ -__version__ = "0.2.0" +__version__ = "0.2.1" from .hdr import normalize, select_hdr_data # noqa: F401 from .selection import select, split # noqa: F401 diff --git a/src/sensospot_tools/selection.py b/src/sensospot_tools/selection.py index dec90ed..3aefcb3 100644 --- a/src/sensospot_tools/selection.py +++ b/src/sensospot_tools/selection.py @@ -31,7 +31,10 @@ def select( Returns: a copy of the DataFrame that has the value in the column """ - selector = data[column] == value + if pandas.isna(value): + selector = data[column].isna() + else: + selector = data[column] == value return data.loc[selector].copy() diff --git a/tests/test_selection.py b/tests/test_selection.py index 21a59a9..ab7be2a 100644 --- a/tests/test_selection.py +++ b/tests/test_selection.py @@ -28,7 +28,7 @@ def test_selection_select(example): assert list(result["value"]) == [35, 9] -def test_selection_split_one_column(example): +def test_selection_split_one_column_without_na(example): from sensospot_tools.selection import split result = dict(split(example, "carnivore")) @@ -38,6 +38,20 @@ def test_selection_split_one_column(example): assert list(result[False]["value"]) == [35, 9] +def test_selection_split_one_column_with_na(example): + import numpy + from sensospot_tools.selection import split + + example["carnivore"].iloc[1] = numpy.nan + + result = dict(split(example, "carnivore")) + + assert set(result.keys()) == {False, True, numpy.nan} + assert list(result[True]["value"]) == [3, 60] + assert list(result[False]["value"]) == [35, 9] + assert list(result[numpy.nan]["value"]) == [55] + + def test_selection_split_multiple_columns(example): from sensospot_tools.selection import split