Holger Frey
1 year ago
7 changed files with 63 additions and 205 deletions
@ -1,60 +0,0 @@
@@ -1,60 +0,0 @@
|
||||
from __future__ import annotations |
||||
|
||||
from typing import Any |
||||
|
||||
import pandas as pd |
||||
|
||||
|
||||
def _iter_uniques( |
||||
data: pd.DataFrame, *on: tuple[Any], _prev_values: None | tuple[Any] = None |
||||
) -> tuple[Any, ..., pd.DataFrame]: |
||||
"""Splits a data frame on uniques values in a column |
||||
|
||||
Returns a generator of tuples with at least two elements. |
||||
The _last_ element is the resulting partial data frame, |
||||
the element(s) before are the values used to split up the original data. |
||||
|
||||
Example: |
||||
|
||||
for well, pos, partial_data in split_uniques(full_data, "Well", "Pos"): |
||||
# `well` is one of the unique values in full_data["Well"] |
||||
# `pos` is one of the unique values in full_data["Pos"] |
||||
# parital_data is a data frame, containing values for this well and pos |
||||
|
||||
""" |
||||
if _prev_values is None: |
||||
_prev_values = () |
||||
current_column, *rest = on |
||||
for current_value in data[current_column].unique(): |
||||
selection = data[current_column] == current_value |
||||
selected = data.loc[selection].copy() |
||||
values = (*_prev_values, current_value) |
||||
if rest: |
||||
yield from _iter_uniques(selected, *rest, _prev_values=values) |
||||
else: |
||||
yield *values, selected |
||||
|
||||
|
||||
def iter_uniques( |
||||
data: pd.DataFrame, *on: tuple[Any] |
||||
) -> tuple[Any, ..., pd.DataFrame]: |
||||
"""Splits a data frame on uniques values in a column |
||||
|
||||
Returns a generator of tuples with at least two elements. |
||||
The _last_ element is the resulting partial data frame, |
||||
the element(s) before are the values used to split up the original data. |
||||
|
||||
Example: |
||||
|
||||
for well, pos, partial_data in split_uniques(full_data, "Well", "Pos"): |
||||
# `well` is one of the unique values in full_data["Well"] |
||||
# `pos` is one of the unique values in full_data["Pos"] |
||||
# parital_data is a data frame, containing values for this well and pos |
||||
|
||||
""" |
||||
yield from _iter_uniques(data, *on) |
||||
|
||||
|
||||
def select(data: pd.DataFrame, column: str, value: Any) -> pd.DataFrame: |
||||
selection = data[column] == value |
||||
return data.loc[selection].copy() |
@ -1,48 +0,0 @@
@@ -1,48 +0,0 @@
|
||||
import pandas as pd |
||||
import pytest |
||||
|
||||
|
||||
@pytest.fixture() |
||||
def example_data(): |
||||
return pd.DataFrame({"A": [1, 2, 2], "B": [3, 4, 3], "C": ["x", "y", "z"]}) |
||||
|
||||
|
||||
def test_split_uniques_one_column(example_data): |
||||
from conda_helpers import iter_uniques |
||||
|
||||
result = list(iter_uniques(example_data, "A")) |
||||
|
||||
assert len(result) == 2 |
||||
assert isinstance(result[0], tuple) |
||||
|
||||
a_value, data = result[0] |
||||
assert a_value == 1 |
||||
assert list(data["C"]) == ["x"] |
||||
|
||||
a_value, data = result[1] |
||||
assert a_value == 2 |
||||
assert list(data["C"]) == ["y", "z"] |
||||
|
||||
|
||||
def test_split_uniques_multiple_columns(example_data): |
||||
from conda_helpers import iter_uniques |
||||
|
||||
result = list(iter_uniques(example_data, "B", "A")) |
||||
|
||||
assert len(result) == 3 |
||||
assert isinstance(result[0], tuple) |
||||
|
||||
b_value, a_value, data = result[0] |
||||
assert b_value == 3 |
||||
assert a_value == 1 |
||||
assert list(data["C"]) == ["x"] |
||||
|
||||
b_value, a_value, data = result[1] |
||||
assert b_value == 3 |
||||
assert a_value == 2 |
||||
assert list(data["C"]) == ["z"] |
||||
|
||||
b_value, a_value, data = result[2] |
||||
assert b_value == 4 |
||||
assert a_value == 2 |
||||
assert list(data["C"]) == ["y"] |
Loading…
Reference in new issue