Browse Source

removed aggregate utils, they don't belong in this library

xmlparsing
Holger Frey 3 years ago
parent
commit
dc8851d40e
  1. 2
      sensospot_data/__init__.py
  2. 4
      sensospot_data/columns.py
  3. 50
      sensospot_data/utils.py
  4. 2
      tests/test_sensovation_data.py
  5. 71
      tests/test_utils.py

2
sensospot_data/__init__.py

@ -12,8 +12,6 @@ import click @@ -12,8 +12,6 @@ import click
from .utils import ( # noqa: F401
split,
aggregate,
add_aggregate,
apply_exposure_map,
)
from .parser import parse_file, parse_folder # noqa: F401

4
sensospot_data/columns.py

@ -97,7 +97,3 @@ RAW_DATA_NORMALIZATION_MAP = { @@ -97,7 +97,3 @@ RAW_DATA_NORMALIZATION_MAP = {
RAW_DATA_BKG_SUM: CALC_NORMALIZED_BKG_SUM,
RAW_DATA_SPOT_SUM: CALC_NORMALIZED_SPOT_SUM,
}
# Pfefix for aggregated data
AGGREGATION_PREFIX = "Aggregated"

50
sensospot_data/utils.py

@ -3,7 +3,6 @@ from collections.abc import Mapping, Sequence @@ -3,7 +3,6 @@ from collections.abc import Mapping, Sequence
import pandas
from .columns import (
AGGREGATION_PREFIX,
META_DATA_WELL_ROW,
META_DATA_EXPOSURE_ID,
META_DATA_WELL_COLUMN,
@ -116,52 +115,3 @@ def apply_exposure_map(data_frame, exposure_map=None): @@ -116,52 +115,3 @@ def apply_exposure_map(data_frame, exposure_map=None):
left_on=META_DATA_EXPOSURE_ID,
right_index=True,
)
def aggregate(
data_frame, column, method, on=DEFAULT_AGGREGATION_INDEX, new_name=None
):
"""returns the aggregates of one data frame column
data_frame: pandas data frame with the data to aggregate
column: column name to aggregate
method: method of aggregation
on: list of coulumns to group by, defaults to
- Exposure.Id
- Well.Column
- Well.Row
new_name: the name of the aggregate column
if set to None, a prefix will be added to the original name
"""
if new_name is None:
method_as_name = method.title()
new_name = f"{AGGREGATION_PREFIX}.{method_as_name}.{column}"
grouped = data_frame.groupby(on)
aggregated_data = grouped.agg({column: method})
aggregated_data.columns = [new_name]
return aggregated_data
def add_aggregate(
data_frame, column, method, on=DEFAULT_AGGREGATION_INDEX, new_name=None
):
"""aggregates one column in a data frame and
adds the resulting column to the data frame
data_frame: pandas data frame with the data to aggregate
column: column name to aggregate
method: method of aggregation
on: list of coulumns to group by, defaults to
- Exposure.Id
- Well.Column
- Well.Row
new_name: the name of the aggregate column,
if set to None, a prefix will be added to the original name
"""
aggregated_data = aggregate(data_frame, column, method, on, new_name)
return data_frame.merge(
aggregated_data,
how="left",
left_on=on,
right_index=True,
)

2
tests/test_sensovation_data.py

@ -6,10 +6,8 @@ def test_import_api(): @@ -6,10 +6,8 @@ def test_import_api():
from sensospot_data import run # noqa: F401
from sensospot_data import blend # noqa: F401
from sensospot_data import split # noqa: F401
from sensospot_data import aggregate # noqa: F401
from sensospot_data import create_xdr # noqa: F401
from sensospot_data import parse_file # noqa: F401
from sensospot_data import parse_folder # noqa: F401
from sensospot_data import add_aggregate # noqa: F401
from sensospot_data import normalize_values # noqa: F401
from sensospot_data import apply_exposure_map # noqa: F401

71
tests/test_utils.py

@ -178,74 +178,3 @@ def test_apply_exposure_map_from_parameters_raises_error( @@ -178,74 +178,3 @@ def test_apply_exposure_map_from_parameters_raises_error(
assert str(excinfo.value).startswith("Exposure Map: measurement")
def test_aggregate_defaults(normalization_data_frame):
from sensospot_data.utils import aggregate
normalization_data_frame.rename(
columns={"Exposure.Time": "Exposure.Id"}, inplace=True
)
result = aggregate(normalization_data_frame, "Value", "median")
assert result.columns == ["Aggregated.Median.Value"]
assert result.index.names == ["Exposure.Id", "Well.Row", "Well.Column"]
assert list(result["Aggregated.Median.Value"]) == [
3,
30,
300,
2,
20,
200,
1,
10,
100,
]
def test_aggregate_on(normalization_data_frame):
from sensospot_data.utils import aggregate
result = aggregate(
normalization_data_frame, "Value", "mean", on="Exposure.Time"
)
assert result.columns == ["Aggregated.Mean.Value"]
assert result.index.names == ["Exposure.Time"]
assert list(result["Aggregated.Mean.Value"]) == [111, 74, 37]
def test_aggregate_new_name(normalization_data_frame):
from sensospot_data.utils import aggregate
result = aggregate(
normalization_data_frame,
"Value",
"mean",
on="Exposure.Time",
new_name="Foo",
)
assert result.columns == ["Foo"]
assert result.index.names == ["Exposure.Time"]
assert list(result["Foo"]) == [111, 74, 37]
def test_add_aggregate_new_name(normalization_data_frame):
from sensospot_data.utils import add_aggregate
result = add_aggregate(
normalization_data_frame,
"Value",
"mean",
on="Exposure.Time",
new_name="Foo",
)
assert "Foo" in result.columns
assert len(result.columns) == len(normalization_data_frame.columns) + 1
assert result.index.names == [None]
for exp, val in [(10, 111), (25, 74), (50, 37)]:
mask = result["Exposure.Time"] == exp
assert result.loc[mask, "Foo"].unique() == [val]

Loading…
Cancel
Save