You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
783 B
30 lines
783 B
import pandas as pd |
|
from typing import Iterable, NamedTuple |
|
|
|
SplitUniqueKeys = dict[str:str] |
|
|
|
|
|
class SplitUniqueResult(NamedTuple): |
|
keys: SplitUniqueKeys |
|
data: pd.DataFrame |
|
|
|
|
|
def split( |
|
data: pd.DataFrame, |
|
columns: str | Iterable[str], |
|
*, |
|
prevkeys: SplitUniqueKeys = None |
|
) -> Iterable[SplitUniqueResult]: |
|
if isinstance(columns, str): |
|
columns = [columns] |
|
if prevkeys is None: |
|
prevkeys = {} |
|
current, *rest = columns |
|
for value in data[current].unique(): |
|
selection = data[current] == value |
|
selected = data.loc[selection].copy() |
|
keys = prevkeys | {current: value} |
|
if rest: |
|
yield from split(selected, rest, prevkeys=keys) |
|
else: |
|
yield SplitUniqueResult(keys, selected)
|
|
|