Browse Source

added aggregation utility methods

xmlparsing
Holger Frey 4 years ago
parent
commit
6a00ac943d
  1. 9
      sensospot_data/__init__.py
  2. 4
      sensospot_data/columns.py
  3. 58
      sensospot_data/utils.py
  4. 2
      tests/test_sensovation_data.py
  5. 72
      tests/test_utils.py

9
sensospot_data/__init__.py

@ -3,14 +3,19 @@
Parsing the numerical output from Sensovations Sensospot image analysis. Parsing the numerical output from Sensovations Sensospot image analysis.
""" """
__version__ = "0.5.0" __version__ = "0.5.1"
from pathlib import Path from pathlib import Path
import click import click
from .utils import split_data_frame, apply_exposure_map # noqa: F401 from .utils import ( # noqa: F401
aggregate,
add_aggregate,
split_data_frame,
apply_exposure_map,
)
from .parser import parse_file, parse_folder # noqa: F401 from .parser import parse_file, parse_folder # noqa: F401
from .parameters import ExposureInfo # noqa: F401 from .parameters import ExposureInfo # noqa: F401
from .dynamic_range import blend, create_xdr, normalize_values # noqa: F401 from .dynamic_range import blend, create_xdr, normalize_values # noqa: F401

4
sensospot_data/columns.py

@ -97,3 +97,7 @@ RAW_DATA_NORMALIZATION_MAP = {
RAW_DATA_BKG_SUM: CALC_NORMALIZED_BKG_SUM, RAW_DATA_BKG_SUM: CALC_NORMALIZED_BKG_SUM,
RAW_DATA_SPOT_SUM: CALC_NORMALIZED_SPOT_SUM, RAW_DATA_SPOT_SUM: CALC_NORMALIZED_SPOT_SUM,
} }
# Pfefix for aggregated data
AGGREGATION_PREFIX = "Aggregated"

58
sensospot_data/utils.py

@ -3,13 +3,22 @@ from collections.abc import Mapping, Sequence
import pandas import pandas
from .columns import ( from .columns import (
AGGREGATION_PREFIX,
META_DATA_WELL_ROW,
META_DATA_EXPOSURE_ID, META_DATA_EXPOSURE_ID,
META_DATA_WELL_COLUMN,
SETTINGS_EXPOSURE_TIME, SETTINGS_EXPOSURE_TIME,
META_DATA_PARAMETERS_TIME, META_DATA_PARAMETERS_TIME,
SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_CHANNEL,
META_DATA_PARAMETERS_CHANNEL, META_DATA_PARAMETERS_CHANNEL,
) )
DEFAULT_AGGREGATION_COLUMNS = [
META_DATA_EXPOSURE_ID,
META_DATA_WELL_ROW,
META_DATA_WELL_COLUMN,
]
def split_data_frame(data_frame, column): def split_data_frame(data_frame, column):
""" splits a data frame on unique column values """ """ splits a data frame on unique column values """
@ -107,3 +116,52 @@ def apply_exposure_map(data_frame, exposure_map=None):
left_on=META_DATA_EXPOSURE_ID, left_on=META_DATA_EXPOSURE_ID,
right_index=True, right_index=True,
) )
def aggregate(
data_frame, column, method, on=DEFAULT_AGGREGATION_COLUMNS, new_name=None
):
"""returns the aggregates of one data frame column
data_frame: pandas data frame with the data to aggregate
column: column name to aggregate
method: method of aggregation
on: list of coulumns to group by, defaults to
- Exposure.Id
- Well.Column
- Well.Row
new_name: the name of the aggregate column
if set to None, a prefix will be added to the original name
"""
if new_name is None:
method_as_name = method.title()
new_name = f"{AGGREGATION_PREFIX}.{method_as_name}.{column}"
grouped = data_frame.groupby(on)
aggregated_data = grouped.agg({column: method})
aggregated_data.columns = [new_name]
return aggregated_data
def add_aggregate(
data_frame, column, method, on=DEFAULT_AGGREGATION_COLUMNS, new_name=None
):
"""aggregates one column in a data frame and
adds the resulting column to the data frame
data_frame: pandas data frame with the data to aggregate
column: column name to aggregate
method: method of aggregation
on: list of coulumns to group by, defaults to
- Exposure.Id
- Well.Column
- Well.Row
new_name: the name of the aggregate column,
if set to None, a prefix will be added to the original name
"""
aggregated_data = aggregate(data_frame, column, method, on, new_name)
return data_frame.merge(
aggregated_data,
how="left",
left_on=on,
right_index=True,
)

2
tests/test_sensovation_data.py

@ -5,9 +5,11 @@ def test_import_api():
from sensospot_data import ExposureInfo # noqa: F401 from sensospot_data import ExposureInfo # noqa: F401
from sensospot_data import run # noqa: F401 from sensospot_data import run # noqa: F401
from sensospot_data import blend # noqa: F401 from sensospot_data import blend # noqa: F401
from sensospot_data import aggregate # noqa: F401
from sensospot_data import create_xdr # noqa: F401 from sensospot_data import create_xdr # noqa: F401
from sensospot_data import parse_file # noqa: F401 from sensospot_data import parse_file # noqa: F401
from sensospot_data import parse_folder # noqa: F401 from sensospot_data import parse_folder # noqa: F401
from sensospot_data import add_aggregate # noqa: F401
from sensospot_data import normalize_values # noqa: F401 from sensospot_data import normalize_values # noqa: F401
from sensospot_data import split_data_frame # noqa: F401 from sensospot_data import split_data_frame # noqa: F401
from sensospot_data import apply_exposure_map # noqa: F401 from sensospot_data import apply_exposure_map # noqa: F401

72
tests/test_utils.py

@ -177,3 +177,75 @@ def test_apply_exposure_map_from_parameters_raises_error(
apply_exposure_map(data_frame_without_params, None) apply_exposure_map(data_frame_without_params, None)
assert str(excinfo.value).startswith("Exposure Map: measurement") assert str(excinfo.value).startswith("Exposure Map: measurement")
def test_aggregate_defaults(normalization_data_frame):
from sensospot_data.utils import aggregate
normalization_data_frame.rename(
columns={"Exposure.Time": "Exposure.Id"}, inplace=True
)
result = aggregate(normalization_data_frame, "Value", "median")
assert result.columns == ["Aggregated.Median.Value"]
assert result.index.names == ["Exposure.Id", "Well.Row", "Well.Column"]
assert list(result["Aggregated.Median.Value"]) == [
3,
30,
300,
2,
20,
200,
1,
10,
100,
]
def test_aggregate_on(normalization_data_frame):
from sensospot_data.utils import aggregate
result = aggregate(
normalization_data_frame, "Value", "mean", on="Exposure.Time"
)
assert result.columns == ["Aggregated.Mean.Value"]
assert result.index.names == ["Exposure.Time"]
assert list(result["Aggregated.Mean.Value"]) == [111, 74, 37]
def test_aggregate_new_name(normalization_data_frame):
from sensospot_data.utils import aggregate
result = aggregate(
normalization_data_frame,
"Value",
"mean",
on="Exposure.Time",
new_name="Foo",
)
assert result.columns == ["Foo"]
assert result.index.names == ["Exposure.Time"]
assert list(result["Foo"]) == [111, 74, 37]
def test_add_aggregate_new_name(normalization_data_frame):
from sensospot_data.utils import add_aggregate
result = add_aggregate(
normalization_data_frame,
"Value",
"mean",
on="Exposure.Time",
new_name="Foo",
)
assert "Foo" in result.columns
assert len(result.columns) == len(normalization_data_frame.columns) + 1
assert result.index.names == [None]
for exp, val in [(10, 111), (25, 74), (50, 37)]:
mask = result["Exposure.Time"] == exp
assert result.loc[mask, "Foo"].unique() == [val]

Loading…
Cancel
Save