From dc8851d40e28d233b10b3ea6bf7abe9aef7d9175 Mon Sep 17 00:00:00 2001
From: Holger Frey <frey@imtek.de>
Date: Tue, 19 Oct 2021 10:44:17 +0200
Subject: [PATCH] removed aggregate utils, they don't belong in this library

---
 sensospot_data/__init__.py     |  2 -
 sensospot_data/columns.py      |  4 --
 sensospot_data/utils.py        | 50 ------------------------
 tests/test_sensovation_data.py |  2 -
 tests/test_utils.py            | 71 ----------------------------------
 5 files changed, 129 deletions(-)

diff --git a/sensospot_data/__init__.py b/sensospot_data/__init__.py
index b50d95b..3b19349 100644
--- a/sensospot_data/__init__.py
+++ b/sensospot_data/__init__.py
@@ -12,8 +12,6 @@ import click
 
 from .utils import (  # noqa: F401
     split,
-    aggregate,
-    add_aggregate,
     apply_exposure_map,
 )
 from .parser import parse_file, parse_folder  # noqa: F401
diff --git a/sensospot_data/columns.py b/sensospot_data/columns.py
index e295ec1..412c65d 100644
--- a/sensospot_data/columns.py
+++ b/sensospot_data/columns.py
@@ -97,7 +97,3 @@ RAW_DATA_NORMALIZATION_MAP = {
     RAW_DATA_BKG_SUM: CALC_NORMALIZED_BKG_SUM,
     RAW_DATA_SPOT_SUM: CALC_NORMALIZED_SPOT_SUM,
 }
-
-
-# Pfefix for aggregated data
-AGGREGATION_PREFIX = "Aggregated"
diff --git a/sensospot_data/utils.py b/sensospot_data/utils.py
index 17dedf5..632fe79 100644
--- a/sensospot_data/utils.py
+++ b/sensospot_data/utils.py
@@ -3,7 +3,6 @@ from collections.abc import Mapping, Sequence
 import pandas
 
 from .columns import (
-    AGGREGATION_PREFIX,
     META_DATA_WELL_ROW,
     META_DATA_EXPOSURE_ID,
     META_DATA_WELL_COLUMN,
@@ -116,52 +115,3 @@ def apply_exposure_map(data_frame, exposure_map=None):
         left_on=META_DATA_EXPOSURE_ID,
         right_index=True,
     )
-
-
-def aggregate(
-    data_frame, column, method, on=DEFAULT_AGGREGATION_INDEX, new_name=None
-):
-    """returns the aggregates of one data frame column
-
-    data_frame: pandas data frame with the data to aggregate
-    column:     column name to aggregate
-    method:     method of aggregation
-    on:         list of coulumns to group by, defaults to
-                - Exposure.Id
-                - Well.Column
-                - Well.Row
-    new_name:   the name of the aggregate column
-                if set to None, a prefix will be added to the original name
-    """
-    if new_name is None:
-        method_as_name = method.title()
-        new_name = f"{AGGREGATION_PREFIX}.{method_as_name}.{column}"
-    grouped = data_frame.groupby(on)
-    aggregated_data = grouped.agg({column: method})
-    aggregated_data.columns = [new_name]
-    return aggregated_data
-
-
-def add_aggregate(
-    data_frame, column, method, on=DEFAULT_AGGREGATION_INDEX, new_name=None
-):
-    """aggregates one column in a data frame and
-        adds the resulting column to the data frame
-
-    data_frame: pandas data frame with the data to aggregate
-    column:     column name to aggregate
-    method:     method of aggregation
-    on:         list of coulumns to group by, defaults to
-                - Exposure.Id
-                - Well.Column
-                - Well.Row
-    new_name:   the name of the aggregate column,
-                if set to None, a prefix will be added to the original name
-    """
-    aggregated_data = aggregate(data_frame, column, method, on, new_name)
-    return data_frame.merge(
-        aggregated_data,
-        how="left",
-        left_on=on,
-        right_index=True,
-    )
diff --git a/tests/test_sensovation_data.py b/tests/test_sensovation_data.py
index 79a739b..267a738 100644
--- a/tests/test_sensovation_data.py
+++ b/tests/test_sensovation_data.py
@@ -6,10 +6,8 @@ def test_import_api():
     from sensospot_data import run  # noqa: F401
     from sensospot_data import blend  # noqa: F401
     from sensospot_data import split  # noqa: F401
-    from sensospot_data import aggregate  # noqa: F401
     from sensospot_data import create_xdr  # noqa: F401
     from sensospot_data import parse_file  # noqa: F401
     from sensospot_data import parse_folder  # noqa: F401
-    from sensospot_data import add_aggregate  # noqa: F401
     from sensospot_data import normalize_values  # noqa: F401
     from sensospot_data import apply_exposure_map  # noqa: F401
diff --git a/tests/test_utils.py b/tests/test_utils.py
index c169828..e4e1a4f 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -178,74 +178,3 @@ def test_apply_exposure_map_from_parameters_raises_error(
 
     assert str(excinfo.value).startswith("Exposure Map: measurement")
 
-
-def test_aggregate_defaults(normalization_data_frame):
-    from sensospot_data.utils import aggregate
-
-    normalization_data_frame.rename(
-        columns={"Exposure.Time": "Exposure.Id"}, inplace=True
-    )
-
-    result = aggregate(normalization_data_frame, "Value", "median")
-
-    assert result.columns == ["Aggregated.Median.Value"]
-    assert result.index.names == ["Exposure.Id", "Well.Row", "Well.Column"]
-    assert list(result["Aggregated.Median.Value"]) == [
-        3,
-        30,
-        300,
-        2,
-        20,
-        200,
-        1,
-        10,
-        100,
-    ]
-
-
-def test_aggregate_on(normalization_data_frame):
-    from sensospot_data.utils import aggregate
-
-    result = aggregate(
-        normalization_data_frame, "Value", "mean", on="Exposure.Time"
-    )
-
-    assert result.columns == ["Aggregated.Mean.Value"]
-    assert result.index.names == ["Exposure.Time"]
-    assert list(result["Aggregated.Mean.Value"]) == [111, 74, 37]
-
-
-def test_aggregate_new_name(normalization_data_frame):
-    from sensospot_data.utils import aggregate
-
-    result = aggregate(
-        normalization_data_frame,
-        "Value",
-        "mean",
-        on="Exposure.Time",
-        new_name="Foo",
-    )
-
-    assert result.columns == ["Foo"]
-    assert result.index.names == ["Exposure.Time"]
-    assert list(result["Foo"]) == [111, 74, 37]
-
-
-def test_add_aggregate_new_name(normalization_data_frame):
-    from sensospot_data.utils import add_aggregate
-
-    result = add_aggregate(
-        normalization_data_frame,
-        "Value",
-        "mean",
-        on="Exposure.Time",
-        new_name="Foo",
-    )
-
-    assert "Foo" in result.columns
-    assert len(result.columns) == len(normalization_data_frame.columns) + 1
-    assert result.index.names == [None]
-
-    for exp, val in [(10, 111), (25, 74), (50, 37)]:
-        mask = result["Exposure.Time"] == exp
-        assert result.loc[mask, "Foo"].unique() == [val]