added functions for selecting from dataframes and splitting a dataframe

3 years ago · 331f41ef61
7 changed files with 167 additions and 45 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -27,8 +27,9 @@ classifiers = [
 ]
 dependencies = [
-
+    "pandas"
 ]
 [project.urls]
 Source = "https://git.cpi.imtek.uni-freiburg.de/holgi/sensospot_tools.git"
--- a/src/sensospot_tools/init.py
+++ b/src/sensospot_tools/init.py
@ -5,6 +5,4 @@ Some small tools for working with parsed Sensospot data.
 __version__ = "0.0.1"
-
+from .selection import split, select  # noqa: F401
 def test():
    print("works")
--- a/src/sensospot_tools/helpers.py
+++ b/src/sensospot_tools/helpers.py
@ -0,0 +1,29 @@
 from typing import Any
 def ensure_list(something: Any) -> list[Any]:
    """ensures the provided value is a list or encapsulated in a list
    This is intended to use so that where column names should be provided
    as a list could also be provided as a single column name
    >>> ensure_list("abc")
    ["abc"]
    >>> ensure_list({"a", "b"})
    ["a", "b"]
    >>> ensure_list(1)
    [1]
    something:  the value to be in or the list
    returns:    a list of whatever something is
    """
    # strings are iterables, so here is a special case for them
    if isinstance(something, str):
        return [something]
    try:
        return list(something)
    except TypeError:
        # something is not an iterable
        return [something]
--- a/src/sensospot_tools/selection.py
+++ b/src/sensospot_tools/selection.py
@ -0,0 +1,74 @@
 from typing import Any, Iterator
 import pandas
 def select(
    data: pandas.DataFrame, column: str, value: Any
 ) -> pandas.DataFrame:
    """selects a portion of a dataframe based by a value in a column
    Example:
    >>> print(data)
          category  value
        0      dog      1
        1      cat      2
        2    horse      3
        3      cat      4
    >>> print(select(data, "category", "cat"))
          category  value
        1      cat      2
        3      cat      4
    data:    a data DataFrame to select from
    column:  name of a column in a dataframe
    value:   rows with this value in the column will be selected
    returns: a copy of the DataFrame that has the value in the column
    """
    selector = data[column] == value
    return data.loc[selector].copy()
 def split(
    data: pandas.DataFrame, column: str
 ) -> Iterator[tuple[Any, pandas.DataFrame]]:
    """splits a data frame by unique values in a column
    returns an iterator where each result is key-value-pair. The key is the
    unique value used for the split, the value is a slice of the dataframe
    selected by the unique value contained in the column
    Example:
    >>> print(data)
          category  value
        0      dog      1
        1      cat      2
        2    horse      3
        3      cat      4
    >>> result = dict( split(data, column="category") )
    >>> print(result["dog"])
          category  value
        0      dog      1
    >>> print(result["cat"])
          category  value
        1      cat      2
        3      cat      4
    >>> print(result["horse"])
          category  value
        2    horse      3
    data:   DataFrame to process
    column: column identifier to split on unique values
    yields: key-value-pairs of
            keys: one unique value
            values: slice of the dataframe that contains the unique value
    """
    unique_values = data[column].unique()
    return ((value, select(data, column, value)) for value in unique_values)
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@ -0,0 +1,18 @@
 import pytest
@pytest.mark.parametrize(
    "provided, expected",
    [
        ("abc", ["abc"]),
        (tuple("abc"), ["a", "b", "c"]),
        ({"a": 1, "b": 2}, ["a", "b"]),
        (1, [1]),
    ],
 )
 def test_helpers_ensure_list(provided, expected):
    from sensospot_tools.helpers import ensure_list
    result = ensure_list(provided)
    assert result == expected
--- a/tests/test_selection.py
+++ b/tests/test_selection.py
@ -0,0 +1,39 @@
 import pytest
 CSV_DATA = """
 category	value
 dog	3
 cat	55
 horse	35
 cat	60
 horse	9
 """
@pytest.fixture
 def example():
    import io
    import pandas
    buffer = io.StringIO(CSV_DATA.strip())
    yield pandas.read_csv(buffer, sep="\t")
 def test_selection_select(example):
    from sensospot_tools.selection import select
    result = select(example, "category", "horse")
    assert list(result["category"]) == ["horse", "horse"]
    assert list(result["value"]) == [35, 9]
 def test_selection_split(example):
    from sensospot_tools.selection import split
    result = dict(split(example, "category"))
    assert sorted(result.keys()) == ["cat", "dog", "horse"]
    assert list(result["cat"]["value"]) == [55, 60]
    assert list(result["dog"]["value"]) == [3]
    assert list(result["horse"]["value"]) == [35, 9]
--- a/tests/test_sensospot_tools.py
+++ b/tests/test_sensospot_tools.py
@ -1,41 +1,4 @@
-""" Stub file for testing the project
+def test_api():
-
+    """test if the provided functionality is importable"""
-There are three predefined ways to run tests:
+    from sensospot_tools import split  # noqa: F401
-
+    from sensospot_tools import select  # noqa: F401
 make test:
    runs only unit tests, that are not marked with "fun" (for functional test)
    in a random order. If a test failed before, only the failed tests will be
    run. This is intended to be the default testing method while developing.
 make testall:
    runs unit tests and functional tests in random order. Will give a complete
    overview of the test suite.
 make coverage:
    runs only tests marked with "fun" (for functional tests) and generates a
    coverage report for the test run. The idea is to check the test coverage
    only on functinal tests to see if a) everything is as much covered as
    possible and b) to find dead code that is not called in end-to-end tests.
 all three test strategies will run "make lint" before to catch easily made
 mistakes.
 """
 import pytest
 def test_example_unittest():
    """example unittest - try importing the project
    will be run by 'make test' and 'make testall' but not 'make  coverage'
    """
    import sensospot_tools  # noqa: F401
@pytest.mark.functional
 def test_example_functional_test():
    """example unittest
    will be by 'make  coverage' and 'make testall' but not 'make test'
    """
    assert True