@ -1,11 +1,11 @@
import pandas as pd
import pandas as pd
import pytest
import pytest
from typing import Iterable , Any
from typing import Any
def split_uniques (
def split_uniques (
data : pd . DataFrame , on : str | Iterable [ str ] , * , _prev_values : tuple [ Any ] = None
data : pd . DataFrame , * on : tuple [ Any ] , _prev_values : tuple [ Any ] = None
) - > tuple [ Any , . . . , pd . DataFrame ] :
) - > tuple [ Any , . . . , pd . DataFrame ] :
""" Splits a data frame on uniques values in a column
""" Splits a data frame on uniques values in a column
@ -15,14 +15,12 @@ def split_uniques(
Example :
Example :
for well , pos , partial_data in split_uniques ( full_data , [ " Well " , " Pos " ] ) :
for well , pos , partial_data in split_uniques ( full_data , " Well " , " Pos " ) :
# `well` is one of the unique values in full_data["Well"]
# `well` is one of the unique values in full_data["Well"]
# `pos` is one of the unique values in full_data["Pos"]
# `pos` is one of the unique values in full_data["Pos"]
# parital_data is a data frame, containing values for this well and pos
# parital_data is a data frame, containing values for this well and pos
"""
"""
if isinstance ( on , str ) :
on = [ on ]
if _prev_values is None :
if _prev_values is None :
_prev_values = tuple ( )
_prev_values = tuple ( )
current_column , * rest = on
current_column , * rest = on
@ -31,7 +29,7 @@ def split_uniques(
selected = data . loc [ selection ] . copy ( )
selected = data . loc [ selection ] . copy ( )
values = _prev_values + ( current_value , )
values = _prev_values + ( current_value , )
if rest :
if rest :
yield from split_uniques ( selected , rest , _prev_values = values )
yield from split_uniques ( selected , * rest , _prev_values = values )
else :
else :
yield * values , selected
yield * values , selected
@ -44,9 +42,8 @@ def example_data():
return pd . DataFrame ( { " A " : [ 1 , 2 , 2 ] , " B " : [ 3 , 4 , 3 ] , " C " : [ " x " , " y " , " z " ] } )
return pd . DataFrame ( { " A " : [ 1 , 2 , 2 ] , " B " : [ 3 , 4 , 3 ] , " C " : [ " x " , " y " , " z " ] } )
@pytest . mark . parametrize ( " on " , [ " A " , [ " A " ] ] )
def test_split_uniques_one_column ( example_data ) :
def test_split_uniques_one_column ( example_data , on ) :
result = list ( split_uniques ( example_data , " A " ) )
result = list ( split_uniques ( example_data , on ) )
assert len ( result ) == 2
assert len ( result ) == 2
assert isinstance ( result [ 0 ] , tuple )
assert isinstance ( result [ 0 ] , tuple )
@ -61,7 +58,7 @@ def test_split_uniques_one_column(example_data, on):
def test_split_uniques_multiple_columns ( example_data ) :
def test_split_uniques_multiple_columns ( example_data ) :
result = list ( split_uniques ( example_data , [ " B " , " A " ] ) )
result = list ( split_uniques ( example_data , " B " , " A " ) )
assert len ( result ) == 3
assert len ( result ) == 3
assert isinstance ( result [ 0 ] , tuple )
assert isinstance ( result [ 0 ] , tuple )