From 2547d8ea2a922c8beef9be38343ac0c8b430272a Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Thu, 6 Jul 2023 11:10:51 +0200 Subject: [PATCH] modified the function signature of `split_uniques()` To specify multiple columns, you add them directly to the function call instead of using a container. OLD: split_uniques(data, ["A", "B"]) NEW: split_uniques(data, "A", "B") This remove the necessity to differentiate between a single string and other containers. --- README.md | 6 +++--- split_uniques.py | 17 +++++++---------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8db61bd..981e6b0 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ df = pd.DataFrame({ "C": ["x", "y", "z"] }) -result = list(split_uniques(df, ["B"])) +result = list(split_uniques(df, "B")) assert len(result) == 2 @@ -66,6 +66,6 @@ This construct might look a little bit weird, but it makes it easy to use the function in a loop definition: ```python -for well, probe, partial_data in split_uniques(full_data, ["Well", "Probe"]): - ... +for well, probe, partial_data in split_uniques(full_data, "Well", "Probe"): + # partial data only contains values for one well and one probe ``` diff --git a/split_uniques.py b/split_uniques.py index 783645c..9ac77f7 100644 --- a/split_uniques.py +++ b/split_uniques.py @@ -1,11 +1,11 @@ import pandas as pd import pytest -from typing import Iterable, Any +from typing import Any def split_uniques( - data: pd.DataFrame, on: str | Iterable[str], *, _prev_values: tuple[Any] = None + data: pd.DataFrame, *on: tuple[Any], _prev_values: tuple[Any] = None ) -> tuple[Any, ..., pd.DataFrame]: """Splits a data frame on uniques values in a column @@ -15,14 +15,12 @@ def split_uniques( Example: - for well, pos, partial_data in split_uniques(full_data, ["Well", "Pos"]): + for well, pos, partial_data in split_uniques(full_data, "Well", "Pos"): # `well` is one of the unique values in full_data["Well"] # `pos` is one of the unique values in full_data["Pos"] # parital_data is a data frame, containing values for this well and pos """ - if isinstance(on, str): - on = [on] if _prev_values is None: _prev_values = tuple() current_column, *rest = on @@ -31,7 +29,7 @@ def split_uniques( selected = data.loc[selection].copy() values = _prev_values + (current_value,) if rest: - yield from split_uniques(selected, rest, _prev_values=values) + yield from split_uniques(selected, *rest, _prev_values=values) else: yield *values, selected @@ -44,9 +42,8 @@ def example_data(): return pd.DataFrame({"A": [1, 2, 2], "B": [3, 4, 3], "C": ["x", "y", "z"]}) -@pytest.mark.parametrize("on", ["A", ["A"]]) -def test_split_uniques_one_column(example_data, on): - result = list(split_uniques(example_data, on)) +def test_split_uniques_one_column(example_data): + result = list(split_uniques(example_data, "A")) assert len(result) == 2 assert isinstance(result[0], tuple) @@ -61,7 +58,7 @@ def test_split_uniques_one_column(example_data, on): def test_split_uniques_multiple_columns(example_data): - result = list(split_uniques(example_data, ["B", "A"])) + result = list(split_uniques(example_data, "B", "A")) assert len(result) == 3 assert isinstance(result[0], tuple)