From 2547d8ea2a922c8beef9be38343ac0c8b430272a Mon Sep 17 00:00:00 2001
From: Holger Frey <frey@imtek.de>
Date: Thu, 6 Jul 2023 11:10:51 +0200
Subject: [PATCH] modified the function signature of `split_uniques()`

To specify multiple columns, you add them directly to the function call instead of using a container.

OLD: split_uniques(data, ["A", "B"])

NEW: split_uniques(data, "A", "B")

This remove the necessity to differentiate between a single string and other containers.
---
 README.md        |  6 +++---
 split_uniques.py | 17 +++++++----------
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 8db61bd..981e6b0 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ df = pd.DataFrame({
         "C": ["x", "y", "z"]
     })
 
-result = list(split_uniques(df, ["B"]))
+result = list(split_uniques(df, "B"))
 
 assert len(result) == 2
 
@@ -66,6 +66,6 @@ This construct might look a little bit weird, but it makes it easy to use the
 function in a loop definition:
 
 ```python
-for well, probe, partial_data in split_uniques(full_data, ["Well", "Probe"]):
-    ...
+for well, probe, partial_data in split_uniques(full_data, "Well", "Probe"):
+    # partial data only contains values for one well and one probe
 ```
diff --git a/split_uniques.py b/split_uniques.py
index 783645c..9ac77f7 100644
--- a/split_uniques.py
+++ b/split_uniques.py
@@ -1,11 +1,11 @@
 import pandas as pd
 import pytest
 
-from typing import Iterable, Any
+from typing import Any
 
 
 def split_uniques(
-    data: pd.DataFrame, on: str | Iterable[str], *, _prev_values: tuple[Any] = None
+    data: pd.DataFrame, *on: tuple[Any], _prev_values: tuple[Any] = None
 ) -> tuple[Any, ..., pd.DataFrame]:
     """Splits a data frame on uniques values in a column
 
@@ -15,14 +15,12 @@ def split_uniques(
 
     Example:
 
-    for well, pos, partial_data in split_uniques(full_data, ["Well", "Pos"]):
+    for well, pos, partial_data in split_uniques(full_data, "Well", "Pos"):
         # `well` is one of the unique values in full_data["Well"]
         # `pos` is one of the unique values in full_data["Pos"]
         # parital_data is a data frame, containing values for this well and pos
 
     """
-    if isinstance(on, str):
-        on = [on]
     if _prev_values is None:
         _prev_values = tuple()
     current_column, *rest = on
@@ -31,7 +29,7 @@ def split_uniques(
         selected = data.loc[selection].copy()
         values = _prev_values + (current_value,)
         if rest:
-            yield from split_uniques(selected, rest, _prev_values=values)
+            yield from split_uniques(selected, *rest, _prev_values=values)
         else:
             yield *values, selected
 
@@ -44,9 +42,8 @@ def example_data():
     return pd.DataFrame({"A": [1, 2, 2], "B": [3, 4, 3], "C": ["x", "y", "z"]})
 
 
-@pytest.mark.parametrize("on", ["A", ["A"]])
-def test_split_uniques_one_column(example_data, on):
-    result = list(split_uniques(example_data, on))
+def test_split_uniques_one_column(example_data):
+    result = list(split_uniques(example_data, "A"))
 
     assert len(result) == 2
     assert isinstance(result[0], tuple)
@@ -61,7 +58,7 @@ def test_split_uniques_one_column(example_data, on):
 
 
 def test_split_uniques_multiple_columns(example_data):
-    result = list(split_uniques(example_data, ["B", "A"]))
+    result = list(split_uniques(example_data, "B", "A"))
 
     assert len(result) == 3
     assert isinstance(result[0], tuple)