Holger Frey
2 years ago
7 changed files with 167 additions and 45 deletions
@ -0,0 +1,29 @@
@@ -0,0 +1,29 @@
|
||||
from typing import Any |
||||
|
||||
|
||||
def ensure_list(something: Any) -> list[Any]: |
||||
"""ensures the provided value is a list or encapsulated in a list |
||||
|
||||
This is intended to use so that where column names should be provided |
||||
as a list could also be provided as a single column name |
||||
|
||||
>>> ensure_list("abc") |
||||
["abc"] |
||||
|
||||
>>> ensure_list({"a", "b"}) |
||||
["a", "b"] |
||||
|
||||
>>> ensure_list(1) |
||||
[1] |
||||
|
||||
something: the value to be in or the list |
||||
returns: a list of whatever something is |
||||
""" |
||||
# strings are iterables, so here is a special case for them |
||||
if isinstance(something, str): |
||||
return [something] |
||||
try: |
||||
return list(something) |
||||
except TypeError: |
||||
# something is not an iterable |
||||
return [something] |
@ -0,0 +1,74 @@
@@ -0,0 +1,74 @@
|
||||
from typing import Any, Iterator |
||||
|
||||
import pandas |
||||
|
||||
|
||||
def select( |
||||
data: pandas.DataFrame, column: str, value: Any |
||||
) -> pandas.DataFrame: |
||||
"""selects a portion of a dataframe based by a value in a column |
||||
|
||||
Example: |
||||
>>> print(data) |
||||
category value |
||||
0 dog 1 |
||||
1 cat 2 |
||||
2 horse 3 |
||||
3 cat 4 |
||||
|
||||
>>> print(select(data, "category", "cat")) |
||||
category value |
||||
1 cat 2 |
||||
3 cat 4 |
||||
|
||||
|
||||
data: a data DataFrame to select from |
||||
column: name of a column in a dataframe |
||||
value: rows with this value in the column will be selected |
||||
returns: a copy of the DataFrame that has the value in the column |
||||
""" |
||||
selector = data[column] == value |
||||
return data.loc[selector].copy() |
||||
|
||||
|
||||
def split( |
||||
data: pandas.DataFrame, column: str |
||||
) -> Iterator[tuple[Any, pandas.DataFrame]]: |
||||
"""splits a data frame by unique values in a column |
||||
|
||||
returns an iterator where each result is key-value-pair. The key is the |
||||
unique value used for the split, the value is a slice of the dataframe |
||||
selected by the unique value contained in the column |
||||
|
||||
Example: |
||||
|
||||
>>> print(data) |
||||
category value |
||||
0 dog 1 |
||||
1 cat 2 |
||||
2 horse 3 |
||||
3 cat 4 |
||||
|
||||
>>> result = dict( split(data, column="category") ) |
||||
|
||||
>>> print(result["dog"]) |
||||
category value |
||||
0 dog 1 |
||||
|
||||
>>> print(result["cat"]) |
||||
category value |
||||
1 cat 2 |
||||
3 cat 4 |
||||
|
||||
>>> print(result["horse"]) |
||||
category value |
||||
2 horse 3 |
||||
|
||||
data: DataFrame to process |
||||
column: column identifier to split on unique values |
||||
yields: key-value-pairs of |
||||
keys: one unique value |
||||
values: slice of the dataframe that contains the unique value |
||||
""" |
||||
unique_values = data[column].unique() |
||||
return ((value, select(data, column, value)) for value in unique_values) |
@ -0,0 +1,18 @@
@@ -0,0 +1,18 @@
|
||||
import pytest |
||||
|
||||
|
||||
@pytest.mark.parametrize( |
||||
"provided, expected", |
||||
[ |
||||
("abc", ["abc"]), |
||||
(tuple("abc"), ["a", "b", "c"]), |
||||
({"a": 1, "b": 2}, ["a", "b"]), |
||||
(1, [1]), |
||||
], |
||||
) |
||||
def test_helpers_ensure_list(provided, expected): |
||||
from sensospot_tools.helpers import ensure_list |
||||
|
||||
result = ensure_list(provided) |
||||
|
||||
assert result == expected |
@ -0,0 +1,39 @@
@@ -0,0 +1,39 @@
|
||||
import pytest |
||||
|
||||
CSV_DATA = """ |
||||
category value |
||||
dog 3 |
||||
cat 55 |
||||
horse 35 |
||||
cat 60 |
||||
horse 9 |
||||
""" |
||||
|
||||
|
||||
@pytest.fixture |
||||
def example(): |
||||
import io |
||||
|
||||
import pandas |
||||
|
||||
buffer = io.StringIO(CSV_DATA.strip()) |
||||
yield pandas.read_csv(buffer, sep="\t") |
||||
|
||||
|
||||
def test_selection_select(example): |
||||
from sensospot_tools.selection import select |
||||
|
||||
result = select(example, "category", "horse") |
||||
assert list(result["category"]) == ["horse", "horse"] |
||||
assert list(result["value"]) == [35, 9] |
||||
|
||||
|
||||
def test_selection_split(example): |
||||
from sensospot_tools.selection import split |
||||
|
||||
result = dict(split(example, "category")) |
||||
|
||||
assert sorted(result.keys()) == ["cat", "dog", "horse"] |
||||
assert list(result["cat"]["value"]) == [55, 60] |
||||
assert list(result["dog"]["value"]) == [3] |
||||
assert list(result["horse"]["value"]) == [35, 9] |
@ -1,41 +1,4 @@
@@ -1,41 +1,4 @@
|
||||
""" Stub file for testing the project |
||||
|
||||
There are three predefined ways to run tests: |
||||
|
||||
make test: |
||||
runs only unit tests, that are not marked with "fun" (for functional test) |
||||
in a random order. If a test failed before, only the failed tests will be |
||||
run. This is intended to be the default testing method while developing. |
||||
|
||||
make testall: |
||||
runs unit tests and functional tests in random order. Will give a complete |
||||
overview of the test suite. |
||||
|
||||
make coverage: |
||||
runs only tests marked with "fun" (for functional tests) and generates a |
||||
coverage report for the test run. The idea is to check the test coverage |
||||
only on functinal tests to see if a) everything is as much covered as |
||||
possible and b) to find dead code that is not called in end-to-end tests. |
||||
|
||||
all three test strategies will run "make lint" before to catch easily made |
||||
mistakes. |
||||
""" |
||||
|
||||
import pytest |
||||
|
||||
|
||||
def test_example_unittest(): |
||||
"""example unittest - try importing the project |
||||
|
||||
will be run by 'make test' and 'make testall' but not 'make coverage' |
||||
""" |
||||
import sensospot_tools # noqa: F401 |
||||
|
||||
|
||||
@pytest.mark.functional |
||||
def test_example_functional_test(): |
||||
"""example unittest |
||||
|
||||
will be by 'make coverage' and 'make testall' but not 'make test' |
||||
""" |
||||
assert True |
||||
def test_api(): |
||||
"""test if the provided functionality is importable""" |
||||
from sensospot_tools import split # noqa: F401 |
||||
from sensospot_tools import select # noqa: F401 |
||||
|
Loading…
Reference in new issue