Holger Frey
1 year ago
7 changed files with 63 additions and 205 deletions
@ -1,60 +0,0 @@ |
|||||||
from __future__ import annotations |
|
||||||
|
|
||||||
from typing import Any |
|
||||||
|
|
||||||
import pandas as pd |
|
||||||
|
|
||||||
|
|
||||||
def _iter_uniques( |
|
||||||
data: pd.DataFrame, *on: tuple[Any], _prev_values: None | tuple[Any] = None |
|
||||||
) -> tuple[Any, ..., pd.DataFrame]: |
|
||||||
"""Splits a data frame on uniques values in a column |
|
||||||
|
|
||||||
Returns a generator of tuples with at least two elements. |
|
||||||
The _last_ element is the resulting partial data frame, |
|
||||||
the element(s) before are the values used to split up the original data. |
|
||||||
|
|
||||||
Example: |
|
||||||
|
|
||||||
for well, pos, partial_data in split_uniques(full_data, "Well", "Pos"): |
|
||||||
# `well` is one of the unique values in full_data["Well"] |
|
||||||
# `pos` is one of the unique values in full_data["Pos"] |
|
||||||
# parital_data is a data frame, containing values for this well and pos |
|
||||||
|
|
||||||
""" |
|
||||||
if _prev_values is None: |
|
||||||
_prev_values = () |
|
||||||
current_column, *rest = on |
|
||||||
for current_value in data[current_column].unique(): |
|
||||||
selection = data[current_column] == current_value |
|
||||||
selected = data.loc[selection].copy() |
|
||||||
values = (*_prev_values, current_value) |
|
||||||
if rest: |
|
||||||
yield from _iter_uniques(selected, *rest, _prev_values=values) |
|
||||||
else: |
|
||||||
yield *values, selected |
|
||||||
|
|
||||||
|
|
||||||
def iter_uniques( |
|
||||||
data: pd.DataFrame, *on: tuple[Any] |
|
||||||
) -> tuple[Any, ..., pd.DataFrame]: |
|
||||||
"""Splits a data frame on uniques values in a column |
|
||||||
|
|
||||||
Returns a generator of tuples with at least two elements. |
|
||||||
The _last_ element is the resulting partial data frame, |
|
||||||
the element(s) before are the values used to split up the original data. |
|
||||||
|
|
||||||
Example: |
|
||||||
|
|
||||||
for well, pos, partial_data in split_uniques(full_data, "Well", "Pos"): |
|
||||||
# `well` is one of the unique values in full_data["Well"] |
|
||||||
# `pos` is one of the unique values in full_data["Pos"] |
|
||||||
# parital_data is a data frame, containing values for this well and pos |
|
||||||
|
|
||||||
""" |
|
||||||
yield from _iter_uniques(data, *on) |
|
||||||
|
|
||||||
|
|
||||||
def select(data: pd.DataFrame, column: str, value: Any) -> pd.DataFrame: |
|
||||||
selection = data[column] == value |
|
||||||
return data.loc[selection].copy() |
|
@ -1,48 +0,0 @@ |
|||||||
import pandas as pd |
|
||||||
import pytest |
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture() |
|
||||||
def example_data(): |
|
||||||
return pd.DataFrame({"A": [1, 2, 2], "B": [3, 4, 3], "C": ["x", "y", "z"]}) |
|
||||||
|
|
||||||
|
|
||||||
def test_split_uniques_one_column(example_data): |
|
||||||
from conda_helpers import iter_uniques |
|
||||||
|
|
||||||
result = list(iter_uniques(example_data, "A")) |
|
||||||
|
|
||||||
assert len(result) == 2 |
|
||||||
assert isinstance(result[0], tuple) |
|
||||||
|
|
||||||
a_value, data = result[0] |
|
||||||
assert a_value == 1 |
|
||||||
assert list(data["C"]) == ["x"] |
|
||||||
|
|
||||||
a_value, data = result[1] |
|
||||||
assert a_value == 2 |
|
||||||
assert list(data["C"]) == ["y", "z"] |
|
||||||
|
|
||||||
|
|
||||||
def test_split_uniques_multiple_columns(example_data): |
|
||||||
from conda_helpers import iter_uniques |
|
||||||
|
|
||||||
result = list(iter_uniques(example_data, "B", "A")) |
|
||||||
|
|
||||||
assert len(result) == 3 |
|
||||||
assert isinstance(result[0], tuple) |
|
||||||
|
|
||||||
b_value, a_value, data = result[0] |
|
||||||
assert b_value == 3 |
|
||||||
assert a_value == 1 |
|
||||||
assert list(data["C"]) == ["x"] |
|
||||||
|
|
||||||
b_value, a_value, data = result[1] |
|
||||||
assert b_value == 3 |
|
||||||
assert a_value == 2 |
|
||||||
assert list(data["C"]) == ["z"] |
|
||||||
|
|
||||||
b_value, a_value, data = result[2] |
|
||||||
assert b_value == 4 |
|
||||||
assert a_value == 2 |
|
||||||
assert list(data["C"]) == ["y"] |
|
Loading…
Reference in new issue