Holger Frey
2 years ago
7 changed files with 167 additions and 45 deletions
@ -0,0 +1,29 @@ |
|||||||
|
from typing import Any |
||||||
|
|
||||||
|
|
||||||
|
def ensure_list(something: Any) -> list[Any]: |
||||||
|
"""ensures the provided value is a list or encapsulated in a list |
||||||
|
|
||||||
|
This is intended to use so that where column names should be provided |
||||||
|
as a list could also be provided as a single column name |
||||||
|
|
||||||
|
>>> ensure_list("abc") |
||||||
|
["abc"] |
||||||
|
|
||||||
|
>>> ensure_list({"a", "b"}) |
||||||
|
["a", "b"] |
||||||
|
|
||||||
|
>>> ensure_list(1) |
||||||
|
[1] |
||||||
|
|
||||||
|
something: the value to be in or the list |
||||||
|
returns: a list of whatever something is |
||||||
|
""" |
||||||
|
# strings are iterables, so here is a special case for them |
||||||
|
if isinstance(something, str): |
||||||
|
return [something] |
||||||
|
try: |
||||||
|
return list(something) |
||||||
|
except TypeError: |
||||||
|
# something is not an iterable |
||||||
|
return [something] |
@ -0,0 +1,74 @@ |
|||||||
|
from typing import Any, Iterator |
||||||
|
|
||||||
|
import pandas |
||||||
|
|
||||||
|
|
||||||
|
def select( |
||||||
|
data: pandas.DataFrame, column: str, value: Any |
||||||
|
) -> pandas.DataFrame: |
||||||
|
"""selects a portion of a dataframe based by a value in a column |
||||||
|
|
||||||
|
Example: |
||||||
|
>>> print(data) |
||||||
|
category value |
||||||
|
0 dog 1 |
||||||
|
1 cat 2 |
||||||
|
2 horse 3 |
||||||
|
3 cat 4 |
||||||
|
|
||||||
|
>>> print(select(data, "category", "cat")) |
||||||
|
category value |
||||||
|
1 cat 2 |
||||||
|
3 cat 4 |
||||||
|
|
||||||
|
|
||||||
|
data: a data DataFrame to select from |
||||||
|
column: name of a column in a dataframe |
||||||
|
value: rows with this value in the column will be selected |
||||||
|
returns: a copy of the DataFrame that has the value in the column |
||||||
|
""" |
||||||
|
selector = data[column] == value |
||||||
|
return data.loc[selector].copy() |
||||||
|
|
||||||
|
|
||||||
|
def split( |
||||||
|
data: pandas.DataFrame, column: str |
||||||
|
) -> Iterator[tuple[Any, pandas.DataFrame]]: |
||||||
|
"""splits a data frame by unique values in a column |
||||||
|
|
||||||
|
returns an iterator where each result is key-value-pair. The key is the |
||||||
|
unique value used for the split, the value is a slice of the dataframe |
||||||
|
selected by the unique value contained in the column |
||||||
|
|
||||||
|
Example: |
||||||
|
|
||||||
|
>>> print(data) |
||||||
|
category value |
||||||
|
0 dog 1 |
||||||
|
1 cat 2 |
||||||
|
2 horse 3 |
||||||
|
3 cat 4 |
||||||
|
|
||||||
|
>>> result = dict( split(data, column="category") ) |
||||||
|
|
||||||
|
>>> print(result["dog"]) |
||||||
|
category value |
||||||
|
0 dog 1 |
||||||
|
|
||||||
|
>>> print(result["cat"]) |
||||||
|
category value |
||||||
|
1 cat 2 |
||||||
|
3 cat 4 |
||||||
|
|
||||||
|
>>> print(result["horse"]) |
||||||
|
category value |
||||||
|
2 horse 3 |
||||||
|
|
||||||
|
data: DataFrame to process |
||||||
|
column: column identifier to split on unique values |
||||||
|
yields: key-value-pairs of |
||||||
|
keys: one unique value |
||||||
|
values: slice of the dataframe that contains the unique value |
||||||
|
""" |
||||||
|
unique_values = data[column].unique() |
||||||
|
return ((value, select(data, column, value)) for value in unique_values) |
@ -0,0 +1,18 @@ |
|||||||
|
import pytest |
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize( |
||||||
|
"provided, expected", |
||||||
|
[ |
||||||
|
("abc", ["abc"]), |
||||||
|
(tuple("abc"), ["a", "b", "c"]), |
||||||
|
({"a": 1, "b": 2}, ["a", "b"]), |
||||||
|
(1, [1]), |
||||||
|
], |
||||||
|
) |
||||||
|
def test_helpers_ensure_list(provided, expected): |
||||||
|
from sensospot_tools.helpers import ensure_list |
||||||
|
|
||||||
|
result = ensure_list(provided) |
||||||
|
|
||||||
|
assert result == expected |
@ -0,0 +1,39 @@ |
|||||||
|
import pytest |
||||||
|
|
||||||
|
CSV_DATA = """ |
||||||
|
category value |
||||||
|
dog 3 |
||||||
|
cat 55 |
||||||
|
horse 35 |
||||||
|
cat 60 |
||||||
|
horse 9 |
||||||
|
""" |
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture |
||||||
|
def example(): |
||||||
|
import io |
||||||
|
|
||||||
|
import pandas |
||||||
|
|
||||||
|
buffer = io.StringIO(CSV_DATA.strip()) |
||||||
|
yield pandas.read_csv(buffer, sep="\t") |
||||||
|
|
||||||
|
|
||||||
|
def test_selection_select(example): |
||||||
|
from sensospot_tools.selection import select |
||||||
|
|
||||||
|
result = select(example, "category", "horse") |
||||||
|
assert list(result["category"]) == ["horse", "horse"] |
||||||
|
assert list(result["value"]) == [35, 9] |
||||||
|
|
||||||
|
|
||||||
|
def test_selection_split(example): |
||||||
|
from sensospot_tools.selection import split |
||||||
|
|
||||||
|
result = dict(split(example, "category")) |
||||||
|
|
||||||
|
assert sorted(result.keys()) == ["cat", "dog", "horse"] |
||||||
|
assert list(result["cat"]["value"]) == [55, 60] |
||||||
|
assert list(result["dog"]["value"]) == [3] |
||||||
|
assert list(result["horse"]["value"]) == [35, 9] |
@ -1,41 +1,4 @@ |
|||||||
""" Stub file for testing the project |
def test_api(): |
||||||
|
"""test if the provided functionality is importable""" |
||||||
There are three predefined ways to run tests: |
from sensospot_tools import split # noqa: F401 |
||||||
|
from sensospot_tools import select # noqa: F401 |
||||||
make test: |
|
||||||
runs only unit tests, that are not marked with "fun" (for functional test) |
|
||||||
in a random order. If a test failed before, only the failed tests will be |
|
||||||
run. This is intended to be the default testing method while developing. |
|
||||||
|
|
||||||
make testall: |
|
||||||
runs unit tests and functional tests in random order. Will give a complete |
|
||||||
overview of the test suite. |
|
||||||
|
|
||||||
make coverage: |
|
||||||
runs only tests marked with "fun" (for functional tests) and generates a |
|
||||||
coverage report for the test run. The idea is to check the test coverage |
|
||||||
only on functinal tests to see if a) everything is as much covered as |
|
||||||
possible and b) to find dead code that is not called in end-to-end tests. |
|
||||||
|
|
||||||
all three test strategies will run "make lint" before to catch easily made |
|
||||||
mistakes. |
|
||||||
""" |
|
||||||
|
|
||||||
import pytest |
|
||||||
|
|
||||||
|
|
||||||
def test_example_unittest(): |
|
||||||
"""example unittest - try importing the project |
|
||||||
|
|
||||||
will be run by 'make test' and 'make testall' but not 'make coverage' |
|
||||||
""" |
|
||||||
import sensospot_tools # noqa: F401 |
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.functional |
|
||||||
def test_example_functional_test(): |
|
||||||
"""example unittest |
|
||||||
|
|
||||||
will be by 'make coverage' and 'make testall' but not 'make test' |
|
||||||
""" |
|
||||||
assert True |
|
||||||
|
Loading…
Reference in new issue