You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
783 B
31 lines
783 B
2 years ago
|
import pandas as pd
|
||
|
from typing import Iterable, NamedTuple
|
||
|
|
||
|
SplitUniqueKeys = dict[str:str]
|
||
|
|
||
|
|
||
|
class SplitUniqueResult(NamedTuple):
|
||
|
keys: SplitUniqueKeys
|
||
|
data: pd.DataFrame
|
||
|
|
||
|
|
||
|
def split(
|
||
|
data: pd.DataFrame,
|
||
|
columns: str | Iterable[str],
|
||
|
*,
|
||
|
prevkeys: SplitUniqueKeys = None
|
||
|
) -> Iterable[SplitUniqueResult]:
|
||
|
if isinstance(columns, str):
|
||
|
columns = [columns]
|
||
|
if prevkeys is None:
|
||
|
prevkeys = {}
|
||
|
current, *rest = columns
|
||
|
for value in data[current].unique():
|
||
|
selection = data[current] == value
|
||
|
selected = data.loc[selection].copy()
|
||
|
keys = prevkeys | {current: value}
|
||
|
if rest:
|
||
|
yield from split(selected, rest, prevkeys=keys)
|
||
|
else:
|
||
|
yield SplitUniqueResult(keys, selected)
|