|
|
@ -34,13 +34,13 @@ def select( |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def split( |
|
|
|
def split( |
|
|
|
data: pandas.DataFrame, column: str |
|
|
|
data: pandas.DataFrame, *on: tuple[Any] |
|
|
|
) -> Iterator[tuple[Any, pandas.DataFrame]]: |
|
|
|
) -> Iterator[tuple[Any, pandas.DataFrame]]: |
|
|
|
"""Splits a data frame on unique values in a column |
|
|
|
"""Splits a data frame on unique values in columns |
|
|
|
|
|
|
|
|
|
|
|
returns an iterator where each result is key-value-pair. The key is the |
|
|
|
Returns a generator of tuples with at least two elements. |
|
|
|
unique value used for the split, the value is a slice of the dataframe |
|
|
|
The _last_ element is the resulting partial data frame, |
|
|
|
selected by the unique value contained in the column |
|
|
|
the element(s) before are the values used to split up the original data. |
|
|
|
|
|
|
|
|
|
|
|
Examples: |
|
|
|
Examples: |
|
|
|
|
|
|
|
|
|
|
@ -62,12 +62,55 @@ def split( |
|
|
|
category value |
|
|
|
category value |
|
|
|
2 horse 3 |
|
|
|
2 horse 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>>> for well, pos, partial in split_uniques(full_data, "Well", "Pos"): |
|
|
|
|
|
|
|
# `well` is one of the unique values in full_data["Well"] |
|
|
|
|
|
|
|
# `pos` is one of the unique values in full_data["Pos"] |
|
|
|
|
|
|
|
# `parital` is a slice of full_data for this well and pos |
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
Args: |
|
|
|
data: DataFrame to process |
|
|
|
data: DataFrame to process |
|
|
|
column: column identifier to split on unique values |
|
|
|
*on: one or multiple column identifiers to split on unique values |
|
|
|
|
|
|
|
Yields: |
|
|
|
|
|
|
|
a tuple with the unique values as key(s) and the resulting data frame |
|
|
|
|
|
|
|
as last object |
|
|
|
|
|
|
|
""" |
|
|
|
|
|
|
|
yield from _iter_uniques(data, *on) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _iter_uniques( |
|
|
|
|
|
|
|
data: pandas.DataFrame, |
|
|
|
|
|
|
|
*on: tuple[Any], |
|
|
|
|
|
|
|
_prev_values: None | tuple[Any] = None, |
|
|
|
|
|
|
|
) -> tuple[Any, ..., pandas.DataFrame]: |
|
|
|
|
|
|
|
"""Splits a data frame on uniques values in a column |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns a generator of tuples with at least two elements. |
|
|
|
|
|
|
|
The _last_ element is the resulting partial data frame, |
|
|
|
|
|
|
|
the element(s) before are the values used to split up the original data. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Example: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>>> for well, pos, partial in split_uniques(full_data, "Well", "Pos"): |
|
|
|
|
|
|
|
# `well` is one of the unique values in full_data["Well"] |
|
|
|
|
|
|
|
# `pos` is one of the unique values in full_data["Pos"] |
|
|
|
|
|
|
|
# `parital` is a slice of full_data for this well and pos |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
|
|
|
data: pandas DataFrame to process |
|
|
|
|
|
|
|
*on: one or multiple column names to split on unique values |
|
|
|
|
|
|
|
_prev_values: cache of unique values for recursion |
|
|
|
Yields: |
|
|
|
Yields: |
|
|
|
key-value-pairs of one unique value of the column as key and the |
|
|
|
a tuple with the unique values as key(s) and the resulting data frame |
|
|
|
corresponding slice of the dataframe as value |
|
|
|
as last object |
|
|
|
""" |
|
|
|
""" |
|
|
|
unique_values = data[column].unique() |
|
|
|
if _prev_values is None: |
|
|
|
return ((value, select(data, column, value)) for value in unique_values) |
|
|
|
_prev_values = () |
|
|
|
|
|
|
|
current_column, *rest = on |
|
|
|
|
|
|
|
for current_value in data[current_column].unique(): |
|
|
|
|
|
|
|
selected = select(data, current_column, current_value) |
|
|
|
|
|
|
|
values = (*_prev_values, current_value) |
|
|
|
|
|
|
|
if rest: |
|
|
|
|
|
|
|
yield from _iter_uniques(selected, *rest, _prev_values=values) |
|
|
|
|
|
|
|
else: |
|
|
|
|
|
|
|
yield *values, selected |
|
|
|