diff --git a/pyproject.toml b/pyproject.toml index 4069451..a1960bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,8 +27,9 @@ classifiers = [ ] dependencies = [ - + "pandas" ] + [project.urls] Source = "https://git.cpi.imtek.uni-freiburg.de/holgi/sensospot_tools.git" diff --git a/src/sensospot_tools/__init__.py b/src/sensospot_tools/__init__.py index b62e5d4..003b2c7 100644 --- a/src/sensospot_tools/__init__.py +++ b/src/sensospot_tools/__init__.py @@ -5,6 +5,4 @@ Some small tools for working with parsed Sensospot data. __version__ = "0.0.1" - -def test(): - print("works") +from .selection import split, select # noqa: F401 diff --git a/src/sensospot_tools/helpers.py b/src/sensospot_tools/helpers.py new file mode 100644 index 0000000..4887fd6 --- /dev/null +++ b/src/sensospot_tools/helpers.py @@ -0,0 +1,29 @@ +from typing import Any + + +def ensure_list(something: Any) -> list[Any]: + """ensures the provided value is a list or encapsulated in a list + + This is intended to use so that where column names should be provided + as a list could also be provided as a single column name + + >>> ensure_list("abc") + ["abc"] + + >>> ensure_list({"a", "b"}) + ["a", "b"] + + >>> ensure_list(1) + [1] + + something: the value to be in or the list + returns: a list of whatever something is + """ + # strings are iterables, so here is a special case for them + if isinstance(something, str): + return [something] + try: + return list(something) + except TypeError: + # something is not an iterable + return [something] diff --git a/src/sensospot_tools/selection.py b/src/sensospot_tools/selection.py new file mode 100644 index 0000000..e9d708b --- /dev/null +++ b/src/sensospot_tools/selection.py @@ -0,0 +1,74 @@ +from typing import Any, Iterator + +import pandas + + +def select( + data: pandas.DataFrame, column: str, value: Any +) -> pandas.DataFrame: + """selects a portion of a dataframe based by a value in a column + + Example: + >>> print(data) + category value + 0 dog 1 + 1 cat 2 + 2 horse 3 + 3 cat 4 + + >>> print(select(data, "category", "cat")) + category value + 1 cat 2 + 3 cat 4 + + + data: a data DataFrame to select from + column: name of a column in a dataframe + value: rows with this value in the column will be selected + returns: a copy of the DataFrame that has the value in the column + """ + selector = data[column] == value + return data.loc[selector].copy() + + +def split( + data: pandas.DataFrame, column: str +) -> Iterator[tuple[Any, pandas.DataFrame]]: + """splits a data frame by unique values in a column + + returns an iterator where each result is key-value-pair. The key is the + unique value used for the split, the value is a slice of the dataframe + selected by the unique value contained in the column + + Example: + + >>> print(data) + category value + 0 dog 1 + 1 cat 2 + 2 horse 3 + 3 cat 4 + + >>> result = dict( split(data, column="category") ) + + >>> print(result["dog"]) + category value + 0 dog 1 + + >>> print(result["cat"]) + category value + 1 cat 2 + 3 cat 4 + + >>> print(result["horse"]) + category value + 2 horse 3 + + data: DataFrame to process + column: column identifier to split on unique values + yields: key-value-pairs of + keys: one unique value + values: slice of the dataframe that contains the unique value + """ + unique_values = data[column].unique() + return ((value, select(data, column, value)) for value in unique_values) diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..4ce1c6f --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,18 @@ +import pytest + + +@pytest.mark.parametrize( + "provided, expected", + [ + ("abc", ["abc"]), + (tuple("abc"), ["a", "b", "c"]), + ({"a": 1, "b": 2}, ["a", "b"]), + (1, [1]), + ], +) +def test_helpers_ensure_list(provided, expected): + from sensospot_tools.helpers import ensure_list + + result = ensure_list(provided) + + assert result == expected diff --git a/tests/test_selection.py b/tests/test_selection.py new file mode 100644 index 0000000..c1c8aec --- /dev/null +++ b/tests/test_selection.py @@ -0,0 +1,39 @@ +import pytest + +CSV_DATA = """ +category value +dog 3 +cat 55 +horse 35 +cat 60 +horse 9 +""" + + +@pytest.fixture +def example(): + import io + + import pandas + + buffer = io.StringIO(CSV_DATA.strip()) + yield pandas.read_csv(buffer, sep="\t") + + +def test_selection_select(example): + from sensospot_tools.selection import select + + result = select(example, "category", "horse") + assert list(result["category"]) == ["horse", "horse"] + assert list(result["value"]) == [35, 9] + + +def test_selection_split(example): + from sensospot_tools.selection import split + + result = dict(split(example, "category")) + + assert sorted(result.keys()) == ["cat", "dog", "horse"] + assert list(result["cat"]["value"]) == [55, 60] + assert list(result["dog"]["value"]) == [3] + assert list(result["horse"]["value"]) == [35, 9] diff --git a/tests/test_sensospot_tools.py b/tests/test_sensospot_tools.py index eaef6ad..71f7a21 100644 --- a/tests/test_sensospot_tools.py +++ b/tests/test_sensospot_tools.py @@ -1,41 +1,4 @@ -""" Stub file for testing the project - -There are three predefined ways to run tests: - -make test: - runs only unit tests, that are not marked with "fun" (for functional test) - in a random order. If a test failed before, only the failed tests will be - run. This is intended to be the default testing method while developing. - -make testall: - runs unit tests and functional tests in random order. Will give a complete - overview of the test suite. - -make coverage: - runs only tests marked with "fun" (for functional tests) and generates a - coverage report for the test run. The idea is to check the test coverage - only on functinal tests to see if a) everything is as much covered as - possible and b) to find dead code that is not called in end-to-end tests. - -all three test strategies will run "make lint" before to catch easily made -mistakes. -""" - -import pytest - - -def test_example_unittest(): - """example unittest - try importing the project - - will be run by 'make test' and 'make testall' but not 'make coverage' - """ - import sensospot_tools # noqa: F401 - - -@pytest.mark.functional -def test_example_functional_test(): - """example unittest - - will be by 'make coverage' and 'make testall' but not 'make test' - """ - assert True +def test_api(): + """test if the provided functionality is importable""" + from sensospot_tools import split # noqa: F401 + from sensospot_tools import select # noqa: F401