diff --git a/sensospot_data/__init__.py b/sensospot_data/__init__.py index b50d95b..3b19349 100644 --- a/sensospot_data/__init__.py +++ b/sensospot_data/__init__.py @@ -12,8 +12,6 @@ import click from .utils import ( # noqa: F401 split, - aggregate, - add_aggregate, apply_exposure_map, ) from .parser import parse_file, parse_folder # noqa: F401 diff --git a/sensospot_data/columns.py b/sensospot_data/columns.py index e295ec1..412c65d 100644 --- a/sensospot_data/columns.py +++ b/sensospot_data/columns.py @@ -97,7 +97,3 @@ RAW_DATA_NORMALIZATION_MAP = { RAW_DATA_BKG_SUM: CALC_NORMALIZED_BKG_SUM, RAW_DATA_SPOT_SUM: CALC_NORMALIZED_SPOT_SUM, } - - -# Pfefix for aggregated data -AGGREGATION_PREFIX = "Aggregated" diff --git a/sensospot_data/utils.py b/sensospot_data/utils.py index 17dedf5..632fe79 100644 --- a/sensospot_data/utils.py +++ b/sensospot_data/utils.py @@ -3,7 +3,6 @@ from collections.abc import Mapping, Sequence import pandas from .columns import ( - AGGREGATION_PREFIX, META_DATA_WELL_ROW, META_DATA_EXPOSURE_ID, META_DATA_WELL_COLUMN, @@ -116,52 +115,3 @@ def apply_exposure_map(data_frame, exposure_map=None): left_on=META_DATA_EXPOSURE_ID, right_index=True, ) - - -def aggregate( - data_frame, column, method, on=DEFAULT_AGGREGATION_INDEX, new_name=None -): - """returns the aggregates of one data frame column - - data_frame: pandas data frame with the data to aggregate - column: column name to aggregate - method: method of aggregation - on: list of coulumns to group by, defaults to - - Exposure.Id - - Well.Column - - Well.Row - new_name: the name of the aggregate column - if set to None, a prefix will be added to the original name - """ - if new_name is None: - method_as_name = method.title() - new_name = f"{AGGREGATION_PREFIX}.{method_as_name}.{column}" - grouped = data_frame.groupby(on) - aggregated_data = grouped.agg({column: method}) - aggregated_data.columns = [new_name] - return aggregated_data - - -def add_aggregate( - data_frame, column, method, on=DEFAULT_AGGREGATION_INDEX, new_name=None -): - """aggregates one column in a data frame and - adds the resulting column to the data frame - - data_frame: pandas data frame with the data to aggregate - column: column name to aggregate - method: method of aggregation - on: list of coulumns to group by, defaults to - - Exposure.Id - - Well.Column - - Well.Row - new_name: the name of the aggregate column, - if set to None, a prefix will be added to the original name - """ - aggregated_data = aggregate(data_frame, column, method, on, new_name) - return data_frame.merge( - aggregated_data, - how="left", - left_on=on, - right_index=True, - ) diff --git a/tests/test_sensovation_data.py b/tests/test_sensovation_data.py index 79a739b..267a738 100644 --- a/tests/test_sensovation_data.py +++ b/tests/test_sensovation_data.py @@ -6,10 +6,8 @@ def test_import_api(): from sensospot_data import run # noqa: F401 from sensospot_data import blend # noqa: F401 from sensospot_data import split # noqa: F401 - from sensospot_data import aggregate # noqa: F401 from sensospot_data import create_xdr # noqa: F401 from sensospot_data import parse_file # noqa: F401 from sensospot_data import parse_folder # noqa: F401 - from sensospot_data import add_aggregate # noqa: F401 from sensospot_data import normalize_values # noqa: F401 from sensospot_data import apply_exposure_map # noqa: F401 diff --git a/tests/test_utils.py b/tests/test_utils.py index c169828..e4e1a4f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -178,74 +178,3 @@ def test_apply_exposure_map_from_parameters_raises_error( assert str(excinfo.value).startswith("Exposure Map: measurement") - -def test_aggregate_defaults(normalization_data_frame): - from sensospot_data.utils import aggregate - - normalization_data_frame.rename( - columns={"Exposure.Time": "Exposure.Id"}, inplace=True - ) - - result = aggregate(normalization_data_frame, "Value", "median") - - assert result.columns == ["Aggregated.Median.Value"] - assert result.index.names == ["Exposure.Id", "Well.Row", "Well.Column"] - assert list(result["Aggregated.Median.Value"]) == [ - 3, - 30, - 300, - 2, - 20, - 200, - 1, - 10, - 100, - ] - - -def test_aggregate_on(normalization_data_frame): - from sensospot_data.utils import aggregate - - result = aggregate( - normalization_data_frame, "Value", "mean", on="Exposure.Time" - ) - - assert result.columns == ["Aggregated.Mean.Value"] - assert result.index.names == ["Exposure.Time"] - assert list(result["Aggregated.Mean.Value"]) == [111, 74, 37] - - -def test_aggregate_new_name(normalization_data_frame): - from sensospot_data.utils import aggregate - - result = aggregate( - normalization_data_frame, - "Value", - "mean", - on="Exposure.Time", - new_name="Foo", - ) - - assert result.columns == ["Foo"] - assert result.index.names == ["Exposure.Time"] - assert list(result["Foo"]) == [111, 74, 37] - - -def test_add_aggregate_new_name(normalization_data_frame): - from sensospot_data.utils import add_aggregate - - result = add_aggregate( - normalization_data_frame, - "Value", - "mean", - on="Exposure.Time", - new_name="Foo", - ) - - assert "Foo" in result.columns - assert len(result.columns) == len(normalization_data_frame.columns) + 1 - assert result.index.names == [None] - - for exp, val in [(10, 111), (25, 74), (50, 37)]: - mask = result["Exposure.Time"] == exp - assert result.loc[mask, "Foo"].unique() == [val]