From 9acf7d9c0a592978da450f826232120757b2e30c Mon Sep 17 00:00:00 2001
From: Holger Frey <mail@holgerfrey.de>
Date: Thu, 6 Aug 2020 13:02:19 +0200
Subject: [PATCH] added measurement normalization

---
 .pre-commit-config.yaml               |   2 +-
 Makefile                              |   4 +-
 sensospot_data/__init__.py            |   1 +
 sensospot_data/columns.py             |  61 ++++++
 sensospot_data/normalisation.py       | 182 ++++++++++++++++
 sensospot_data/parameters.py          |  24 ++-
 sensospot_data/parser.py              |  34 +--
 tests/conftest.py                     |  63 ++++++
 tests/test_normailsation.py           | 294 ++++++++++++++++++++++++++
 tests/test_parameters.py              |   7 +-
 tests/test_parser.py                  |  47 ++--
 tests/test_sensovation_data_parser.py |   1 +
 12 files changed, 661 insertions(+), 59 deletions(-)
 create mode 100644 sensospot_data/columns.py
 create mode 100644 sensospot_data/normalisation.py
 mode change 100644 => 100755 sensospot_data/parser.py
 create mode 100644 tests/test_normailsation.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f60dd2b..74db94f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
         pass_filenames: false
     -   id: flake8
         name: flake8
-        entry: flake8 --ignore E231 sensospot_data tests
+        entry: flake8 --ignore E231,W503 sensospot_data tests
         language: system
         pass_filenames: false
     -   id: pytest
diff --git a/Makefile b/Makefile
index 515f59a..ace9ea1 100644
--- a/Makefile
+++ b/Makefile
@@ -54,10 +54,10 @@ lint: ## reformat with black and check style with flake8
 	isort -rc sensospot_data
 	isort -rc tests
 	black sensospot_data tests
-	flake8 --ignore E231 sensospot_data tests
+	flake8 --ignore E231,W503 sensospot_data tests
 
 test: ## run tests quickly with the default Python
-	pytest tests -x --disable-warnings -k "not app"
+	pytest tests -x --disable-warnings
 
 coverage: ## full test suite, check code coverage and open coverage report
 	pytest tests --cov=sensospot_data
diff --git a/sensospot_data/__init__.py b/sensospot_data/__init__.py
index 1f921ea..38a6043 100644
--- a/sensospot_data/__init__.py
+++ b/sensospot_data/__init__.py
@@ -15,3 +15,4 @@ from .parser import (  # noqa: F401
     process_folder,
     parse_multiple_files,
 )
+from .normalisation import normalize_measurement  # noqa: F401
diff --git a/sensospot_data/columns.py b/sensospot_data/columns.py
new file mode 100644
index 0000000..4214980
--- /dev/null
+++ b/sensospot_data/columns.py
@@ -0,0 +1,61 @@
+""" Column name definitions """
+
+# original, unmodified column names
+COL_NAME_POS_X = "Pos.X"
+COL_NAME_POS_Y = "Pos.Y"
+COL_NAME_BKG_MEAN = "Bkg.Mean"
+COL_NAME_SPOT_MEAN = "Spot.Mean"
+COL_NAME_BKG_MEDIAN = "Bkg.Median"
+COL_NAME_SPOT_MEDIAN = "Spot.Median"
+COL_NAME_BKG_STDDEV = "Bkg.StdDev"
+COL_NAME_SPOT_STDDEV = "Spot.StdDev"
+COL_NAME_BKG_SUM = "Bkg.Sum"
+COL_NAME_SPOT_SUM = "Spot.Sum"
+COL_NAME_BKG_AREA = "Bkg.Area"
+COL_NAME_SPOT_AREA = "Spot.Area"
+COL_NAME_SPOT_SAT = "Spot.Sat. (%)"
+COL_NAME_POS_NOM_X = "Pos.Nom.X"
+COL_NAME_POS_NOM_Y = "Pos.Nom.Y"
+
+# replacement column names
+COL_NAME_POS_ID = "Pos.Id"
+COL_NAME_SPOT_FOUND = "Spot.Found"
+COL_NAME_SPOT_DIAMETER = "Spot.Diameter"
+
+# additional column
+COL_NAME_SPOT_OVERFLOW = "Spot.Overflow"
+
+# well information
+COL_NAME_WELL_ROW = "Well.Row"
+COL_NAME_WELL_COLUMN = "Well.Column"
+
+# parsed measurement parameter information
+COL_NAME_PARAMETERS_CHANNEL = "Parameters.Channel"
+COL_NAME_PARAMETERS_TIME = "Parameters.Time"
+
+# applied exposure info
+COL_NAME_EXPOSURE_ID = "Exposure.Id"
+COL_NAME_EXPOSURE_CHANNEL = "Exposure.Channel"
+COL_NAME_EXPOSURE_TIME = "Exposure.Time"
+
+# normalized columns
+COL_NAME_NORMALIZED_EXPOSURE_TIME = f"Normalized.{COL_NAME_EXPOSURE_TIME}"
+COL_NAME_NORMALIZED_BKG_MEAN = f"Normalized.{COL_NAME_BKG_MEAN}"
+COL_NAME_NORMALIZED_SPOT_MEAN = f"Normalized.{COL_NAME_SPOT_MEAN}"
+COL_NAME_NORMALIZED_BKG_MEDIAN = f"Normalized.{COL_NAME_BKG_MEDIAN}"
+COL_NAME_NORMALIZED_SPOT_MEDIAN = f"Normalized.{COL_NAME_SPOT_MEDIAN}"
+COL_NAME_NORMALIZED_BKG_STDDEV = f"Normalized.{COL_NAME_BKG_STDDEV}"
+COL_NAME_NORMALIZED_SPOT_STDDEV = f"Normalized.{COL_NAME_SPOT_STDDEV}"
+COL_NAME_NORMALIZED_BKG_SUM = f"Normalized.{COL_NAME_BKG_SUM}"
+COL_NAME_NORMALIZED_SPOT_SUM = f"Normalized.{COL_NAME_SPOT_SUM}"
+
+COLUMN_NORMALIZATION = {
+    COL_NAME_BKG_MEAN: COL_NAME_NORMALIZED_BKG_MEAN,
+    COL_NAME_SPOT_MEAN: COL_NAME_NORMALIZED_SPOT_MEAN,
+    COL_NAME_BKG_MEDIAN: COL_NAME_NORMALIZED_BKG_MEDIAN,
+    COL_NAME_SPOT_MEDIAN: COL_NAME_NORMALIZED_SPOT_MEDIAN,
+    COL_NAME_BKG_STDDEV: COL_NAME_NORMALIZED_BKG_STDDEV,
+    COL_NAME_SPOT_STDDEV: COL_NAME_NORMALIZED_SPOT_STDDEV,
+    COL_NAME_BKG_SUM: COL_NAME_NORMALIZED_BKG_SUM,
+    COL_NAME_SPOT_SUM: COL_NAME_NORMALIZED_SPOT_SUM,
+}
diff --git a/sensospot_data/normalisation.py b/sensospot_data/normalisation.py
new file mode 100644
index 0000000..434b82b
--- /dev/null
+++ b/sensospot_data/normalisation.py
@@ -0,0 +1,182 @@
+import numpy
+
+from .columns import (
+    COL_NAME_POS_ID,
+    COL_NAME_WELL_ROW,
+    COL_NAME_SPOT_MEAN,
+    COL_NAME_EXPOSURE_ID,
+    COL_NAME_WELL_COLUMN,
+    COLUMN_NORMALIZATION,
+    COL_NAME_EXPOSURE_TIME,
+    COL_NAME_SPOT_OVERFLOW,
+    COL_NAME_PARAMETERS_TIME,
+    COL_NAME_EXPOSURE_CHANNEL,
+    COL_NAME_PARAMETERS_CHANNEL,
+    COL_NAME_NORMALIZED_EXPOSURE_TIME,
+)
+
+
+def _split_data_frame(data_frame, column):
+    """ splits a data frame on unique column values """
+    values = data_frame[column].unique()
+    masks = {value: (data_frame[column] == value) for value in values}
+    return {value: data_frame[mask] for value, mask in masks.items()}
+
+
+def _infer_exposure_from_parameters(data_frame):
+    """ infer the exposures from measurement parameters
+
+    will raise a ValueError if the parameters contain NaNs
+    """
+    df = data_frame  # shorthand for cleaner code
+
+    if (
+        df[COL_NAME_PARAMETERS_CHANNEL].hasnans
+        or df[COL_NAME_PARAMETERS_TIME].hasnans
+    ):
+        raise ValueError("Exposure Map: measurement parameters incomplete")
+
+    df[COL_NAME_EXPOSURE_CHANNEL] = df[COL_NAME_PARAMETERS_CHANNEL]
+    df[COL_NAME_EXPOSURE_TIME] = df[COL_NAME_PARAMETERS_TIME]
+    return df
+
+
+def apply_exposure_map(data_frame, exposure_map=None):
+    """ applies the parameters of a exposure map to the data frame
+
+    exposure map:
+        keys: must be the same as the exposure ids,
+        values: objects with at least time and channel attributes
+
+    if the exposure map is None, the values from the optionally parsed
+    measurement parameters are used.
+
+    will raise an ValueError, if the provided exposure map does not map to the
+    exposure ids.
+    """
+
+    if exposure_map is None:
+        return _infer_exposure_from_parameters(data_frame)
+
+    existing = set(data_frame[COL_NAME_EXPOSURE_ID].unique())
+    provided = set(exposure_map.keys())
+    if existing != provided:
+        raise ValueError(
+            f"Exposure Map differs from data frame: {provided} != {existing}"
+        )
+
+    data_frame[COL_NAME_EXPOSURE_CHANNEL] = numpy.nan
+    data_frame[COL_NAME_EXPOSURE_TIME] = numpy.nan
+    for exposure_id, exposure_info in exposure_map.items():
+        mask = data_frame[COL_NAME_EXPOSURE_ID] == exposure_id
+        data_frame.loc[mask, COL_NAME_EXPOSURE_CHANNEL] = exposure_info.channel
+        data_frame.loc[mask, COL_NAME_EXPOSURE_TIME] = exposure_info.time
+    return data_frame
+
+
+def _check_overflow_limit(data_frame, column=COL_NAME_SPOT_MEAN, limit=0.5):
+    """ add overflow info, based on column and limit """
+    data_frame[COL_NAME_SPOT_OVERFLOW] = data_frame[column] > limit
+    return data_frame
+
+
+def reduce_overflow(data_frame, column=COL_NAME_SPOT_MEAN, limit=0.5):
+    """ reduces the data set per channel, eliminating overflowing spots """
+    data_frame = _check_overflow_limit(data_frame, column, limit)
+
+    split_frames = _split_data_frame(data_frame, COL_NAME_EXPOSURE_CHANNEL)
+
+    return {
+        channel_id: _reduce_overflow_in_channel(channel_frame)
+        for channel_id, channel_frame in split_frames.items()
+    }
+
+
+def _reduce_overflow_in_channel(channel_frame):
+    """ does the heavy lifting for reduce_overflow """
+
+    split_frames = _split_data_frame(channel_frame, COL_NAME_EXPOSURE_TIME)
+
+    if len(split_frames) == 1:
+        # shortcut, if there is only one exposure in the channel
+        return channel_frame
+
+    exposure_times = sorted(split_frames.keys(), reverse=True)
+    max_time, *rest_times = exposure_times
+
+    multi_index = [COL_NAME_WELL_ROW, COL_NAME_WELL_COLUMN, COL_NAME_POS_ID]
+    result_frame = split_frames[max_time].set_index(multi_index)
+
+    for next_time in rest_times:
+        mask = result_frame[COL_NAME_SPOT_OVERFLOW] == True  # noqa: E712
+        next_frame = split_frames[next_time].set_index(multi_index)
+        result_frame.loc[mask] = next_frame.loc[mask]
+
+    return result_frame.reset_index()
+
+
+def _infer_normalization_map(split_data_frames):
+    """ extract a time normalization map from split data frames """
+    return {
+        key: frame[COL_NAME_EXPOSURE_TIME].max()
+        for key, frame in split_data_frames.items()
+    }
+
+
+def normalize_exposure_time(split_data_frames, normalization_map=None):
+    """ add time normalized values to the split data frames
+
+    normalization_map:
+        keys: channel identifier (e.g. "Cy5")
+        values: target exposure time for normalization
+
+    If normalization_map is None, the max exposure time per channel is used
+    """
+    complete_map = _infer_normalization_map(split_data_frames)
+    if normalization_map is not None:
+        complete_map.update(normalization_map)
+
+    return {
+        key: _normalize_exposure(frame, complete_map[key])
+        for key, frame in split_data_frames.items()
+    }
+
+
+def _normalize_exposure(channel_frame, normalized_time):
+    """ add time normalized values to a channel data frames """
+    channel_frame[COL_NAME_NORMALIZED_EXPOSURE_TIME] = normalized_time
+
+    for original_col, normalized_col in COLUMN_NORMALIZATION.items():
+        channel_frame[normalized_col] = (
+            channel_frame[original_col] / channel_frame[COL_NAME_EXPOSURE_TIME]
+        ) * channel_frame[COL_NAME_NORMALIZED_EXPOSURE_TIME]
+
+    return channel_frame
+
+
+def normalize_measurement(
+    data_frame,
+    exposure_map=None,
+    normalization_map=None,
+    overflow_column=COL_NAME_SPOT_MEAN,
+    overflow_limit=0.5,
+):
+    """ augment normalize the measurement exposures
+
+    exposure map:
+        keys: must be the same as the exposure ids,
+        values: objects with at least time and channel attributes
+    if the exposure map is None, the values from the optionally parsed
+    measurement parameters are used.
+
+    normalization_map:
+        keys: channel identifier (e.g. "Cy5")
+        values: target exposure time for normalization
+    If normalization_map is None, the max exposure time per channel is used
+    """
+
+    exposure_data_frame = apply_exposure_map(data_frame, exposure_map)
+    split_data_frames = reduce_overflow(
+        exposure_data_frame, overflow_column, overflow_limit
+    )
+    return normalize_exposure_time(split_data_frames, normalization_map)
diff --git a/sensospot_data/parameters.py b/sensospot_data/parameters.py
index a16e175..80ca6e2 100644
--- a/sensospot_data/parameters.py
+++ b/sensospot_data/parameters.py
@@ -9,6 +9,12 @@ from collections import namedtuple
 import numpy
 from defusedxml import ElementTree
 
+from .columns import (
+    COL_NAME_EXPOSURE_ID,
+    COL_NAME_PARAMETERS_TIME,
+    COL_NAME_PARAMETERS_CHANNEL,
+)
+
 MeasurementParams = namedtuple("MeasurementParams", ["channel", "time"])
 
 
@@ -53,22 +59,22 @@ def _get_measurement_params(folder):
 def _add_measurement_params(data_frame, params):
     """ adds measurement parameters to a data frame """
     for exposure_id, info in params.items():
-        mask = data_frame["Exposure.Id"] == exposure_id
-        data_frame.loc[mask, "Parameters.Channel"] = info.channel
-        data_frame.loc[mask, "Parameters.Time"] = info.time
-    data_frame["Parameters.Channel"] = data_frame["Parameters.Channel"].astype(
-        "category"
-    )
+        mask = data_frame[COL_NAME_EXPOSURE_ID] == exposure_id
+        data_frame.loc[mask, COL_NAME_PARAMETERS_CHANNEL] = info.channel
+        data_frame.loc[mask, COL_NAME_PARAMETERS_TIME] = info.time
+    data_frame[COL_NAME_PARAMETERS_CHANNEL] = data_frame[
+        COL_NAME_PARAMETERS_CHANNEL
+    ].astype("category")
     return data_frame
 
 
 def add_optional_measurement_parameters(data_frame, folder):
     """ adds measurement params to the data frame, if they could be parsed """
-    data_frame["Parameters.Channel"] = numpy.nan
-    data_frame["Parameters.Time"] = numpy.nan
+    data_frame[COL_NAME_PARAMETERS_CHANNEL] = numpy.nan
+    data_frame[COL_NAME_PARAMETERS_TIME] = numpy.nan
     params = _get_measurement_params(folder)
     if params:
-        available_exposures = set(data_frame["Exposure.Id"].unique())
+        available_exposures = set(data_frame[COL_NAME_EXPOSURE_ID].unique())
         if available_exposures == set(params.keys()):
             return _add_measurement_params(data_frame, params)
     return data_frame
diff --git a/sensospot_data/parser.py b/sensospot_data/parser.py
old mode 100644
new mode 100755
index 809d715..c9d38e6
--- a/sensospot_data/parser.py
+++ b/sensospot_data/parser.py
@@ -9,6 +9,14 @@ from collections import namedtuple
 
 import pandas
 
+from .columns import (
+    COL_NAME_POS_ID,
+    COL_NAME_WELL_ROW,
+    COL_NAME_SPOT_FOUND,
+    COL_NAME_EXPOSURE_ID,
+    COL_NAME_WELL_COLUMN,
+    COL_NAME_SPOT_DIAMETER,
+)
 from .parameters import add_optional_measurement_parameters
 
 REGEX_WELL = re.compile(
@@ -21,9 +29,9 @@ REGEX_WELL = re.compile(
 
 COLUMNS_TO_DROP = ["Rect.", "Contour"]
 COLUMNS_RENAME_MAP = {
-    " ID ": "Pos.Id",
-    "Found": "Spot.Found",
-    "Dia.": "Spot.Diameter",
+    " ID ": COL_NAME_POS_ID,
+    "Found": COL_NAME_SPOT_FOUND,
+    "Dia.": COL_NAME_SPOT_DIAMETER,
 }
 
 CACHE_FILE_NAME = "raw_data.h5"
@@ -79,9 +87,9 @@ def parse_file(data_file):
     """ parses one data file and adds metadata to result """
     measurement_info = _extract_measurement_info(data_file)
     data_frame = _parse_csv(data_file)
-    data_frame["Well.Row"] = measurement_info.row
-    data_frame["Well.Column"] = measurement_info.column
-    data_frame["Exposure.Id"] = measurement_info.exposure
+    data_frame[COL_NAME_WELL_ROW] = measurement_info.row
+    data_frame[COL_NAME_WELL_COLUMN] = measurement_info.column
+    data_frame[COL_NAME_EXPOSURE_ID] = measurement_info.exposure
     return _cleanup_data_columns(data_frame)
 
 
@@ -93,7 +101,9 @@ def parse_multiple_files(file_list):
     data_frame = next(collection)
     for next_frame in collection:
         data_frame = data_frame.append(next_frame, ignore_index=True)
-    data_frame["Well.Row"] = data_frame["Well.Row"].astype("category")
+    data_frame[COL_NAME_WELL_ROW] = data_frame[COL_NAME_WELL_ROW].astype(
+        "category"
+    )
     return data_frame
 
 
@@ -107,10 +117,10 @@ def _list_csv_files(folder):
 
 def _sanity_check(data_frame):
     """ checks some basic constrains of a combined data frame """
-    field_rows = len(data_frame["Well.Row"].unique())
-    field_cols = len(data_frame["Well.Column"].unique())
-    exposures = len(data_frame["Exposure.Id"].unique())
-    spot_positions = len(data_frame["Pos.Id"].unique())
+    field_rows = len(data_frame[COL_NAME_WELL_ROW].unique())
+    field_cols = len(data_frame[COL_NAME_WELL_COLUMN].unique())
+    exposures = len(data_frame[COL_NAME_EXPOSURE_ID].unique())
+    spot_positions = len(data_frame[COL_NAME_POS_ID].unique())
     expected_rows = field_rows * field_cols * exposures * spot_positions
     if expected_rows != len(data_frame):
         raise ValueError("Measurements are missing")
@@ -125,7 +135,7 @@ def parse_folder(folder):
     return _sanity_check(data_frame)
 
 
-def process_folder(folder, exposures=None, use_cache=True):
+def process_folder(folder, use_cache=True):
     """ parses all csv files in a folder, adds some checks and more data """
     hdf5_path = folder / CACHE_FILE_NAME
     if use_cache:
diff --git a/tests/conftest.py b/tests/conftest.py
index 3858b31..735e87d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,6 +2,7 @@
 
 from pathlib import Path
 
+import pandas
 import pytest
 
 EXAMPLE_DIR_WO_PARAMS = "mtp_wo_parameters"
@@ -35,3 +36,65 @@ def dir_for_caching(tmpdir, example_file):
     dest = temp_path / example_file.name
     shutil.copy(example_file, dest)
     yield temp_path
+
+
+@pytest.fixture
+def normalization_data_frame():
+    from sensospot_data.columns import COLUMN_NORMALIZATION
+
+    overflow_test_values = [
+        (1, 1, 1, 50, 1, 0),
+        (1, 1, 2, 50, 1, 2),
+        (1, 1, 3, 50, 1, 2),
+        (1, 1, 4, 50, 1, 0),
+        (1, 1, 1, 25, 2, 0),
+        (1, 1, 2, 25, 2, 0),
+        (1, 1, 3, 25, 2, 2),
+        (1, 1, 4, 25, 2, 2),
+        (1, 1, 1, 10, 3, 0),
+        (1, 1, 2, 10, 3, 0),
+        (1, 1, 3, 10, 3, 2),
+        (1, 1, 4, 10, 3, 0),
+        (1, 2, 1, 50, 10, 0),
+        (1, 2, 2, 50, 10, 0),
+        (1, 2, 3, 50, 10, 0),
+        (1, 2, 4, 50, 10, 0),
+        (1, 2, 1, 25, 20, 0),
+        (1, 2, 2, 25, 20, 0),
+        (1, 2, 3, 25, 20, 2),
+        (1, 2, 4, 25, 20, 2),
+        (1, 2, 1, 10, 30, 0),
+        (1, 2, 2, 10, 30, 0),
+        (1, 2, 3, 10, 30, 2),
+        (1, 2, 4, 10, 30, 0),
+        (2, 1, 1, 50, 100, 0),
+        (2, 1, 2, 50, 100, 0),
+        (2, 1, 3, 50, 100, 0),
+        (2, 1, 4, 50, 100, 0),
+        (2, 1, 1, 25, 200, 0),
+        (2, 1, 2, 25, 200, 0),
+        (2, 1, 3, 25, 200, 2),
+        (2, 1, 4, 25, 200, 2),
+        (2, 1, 1, 10, 300, 0),
+        (2, 1, 2, 10, 300, 0),
+        (2, 1, 3, 10, 300, 2),
+        (2, 1, 4, 10, 300, 0),
+    ]
+    overflow_test_keys = [
+        "Well.Row",
+        "Well.Column",
+        "Pos.Id",
+        "Exposure.Time",
+        "Value",
+        "Saturation",
+    ]
+    overflow_test_data = [
+        dict(zip(overflow_test_keys, v)) for v in overflow_test_values
+    ]
+    data_frame = pandas.DataFrame(overflow_test_data)
+    data_frame["Exposure.Channel"] = "Cy5"
+
+    for value_column in COLUMN_NORMALIZATION.keys():
+        data_frame[value_column] = data_frame["Value"]
+
+    yield data_frame
diff --git a/tests/test_normailsation.py b/tests/test_normailsation.py
new file mode 100644
index 0000000..631cebf
--- /dev/null
+++ b/tests/test_normailsation.py
@@ -0,0 +1,294 @@
+from collections import namedtuple
+
+import pandas
+import pytest
+
+from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS
+
+ExposureSetting = namedtuple("ExposureSetting", ["channel", "time"])
+
+
+def test_split_data_frame(example_dir):
+    from sensospot_data.parser import process_folder
+    from sensospot_data.normalisation import _split_data_frame
+
+    data_frame = process_folder(example_dir / EXAMPLE_DIR_WITH_PARAMS)
+
+    result = _split_data_frame(data_frame, "Well.Row")
+
+    assert set(result.keys()) == set("ABC")
+    for key, value_df in result.items():
+        assert set(value_df["Well.Row"].unique()) == {key}
+
+
+def test_infer_exposure_from_parameters(example_dir):
+    from sensospot_data.parser import process_folder
+    from sensospot_data.normalisation import _infer_exposure_from_parameters
+
+    data_frame = process_folder(example_dir / EXAMPLE_DIR_WITH_PARAMS)
+    result = _infer_exposure_from_parameters(data_frame)
+
+    assert all(result["Exposure.Channel"] == result["Parameters.Channel"])
+    assert all(result["Exposure.Time"] == result["Parameters.Time"])
+
+
+def test_infer_exposure_from_parameters_raises_error(example_dir):
+    from sensospot_data.parser import process_folder
+    from sensospot_data.normalisation import _infer_exposure_from_parameters
+
+    data_frame = process_folder(example_dir / EXAMPLE_DIR_WO_PARAMS)
+
+    with pytest.raises(ValueError) as excinfo:
+        _infer_exposure_from_parameters(data_frame)
+
+    assert str(excinfo.value).startswith("Exposure Map: measurement")
+
+
+def test_apply_exposure_map(example_dir):
+    from sensospot_data.parser import process_folder
+    from sensospot_data.normalisation import apply_exposure_map
+
+    exposure_map = {
+        1: ExposureSetting("Cy3", 100),
+        2: ExposureSetting("Cy5", 15),
+        3: ExposureSetting("Cy5", 150),
+    }
+
+    data_frame = process_folder(example_dir / EXAMPLE_DIR_WITH_PARAMS)
+    result = apply_exposure_map(data_frame, exposure_map)
+
+    for key, value in exposure_map.items():
+        mask = result["Exposure.Id"] == key
+        partial = result.loc[mask]
+        assert set(partial["Exposure.Channel"].unique()) == {value.channel}
+        assert set(partial["Exposure.Time"].unique()) == {value.time}
+
+
+def test_apply_exposure_map_raises_error(example_dir):
+    from sensospot_data.parser import process_folder
+    from sensospot_data.normalisation import apply_exposure_map
+
+    exposure_map = {
+        1: ExposureSetting("Cy3", 100),
+        2: ExposureSetting("Cy5", 15),
+        "X": ExposureSetting("Cy5", 150),
+    }
+
+    data_frame = process_folder(example_dir / EXAMPLE_DIR_WITH_PARAMS)
+
+    with pytest.raises(ValueError) as excinfo:
+        apply_exposure_map(data_frame, exposure_map)
+
+    assert str(excinfo.value).startswith("Exposure Map differs")
+
+
+def test_apply_exposure_map_from_parameters(example_dir):
+    from sensospot_data.parser import process_folder
+    from sensospot_data.normalisation import apply_exposure_map
+
+    data_frame = process_folder(example_dir / EXAMPLE_DIR_WITH_PARAMS)
+    result = apply_exposure_map(data_frame, None)
+
+    assert all(result["Exposure.Channel"] == result["Parameters.Channel"])
+    assert all(result["Exposure.Time"] == result["Parameters.Time"])
+
+
+def test_apply_exposure_map_from_parameters_raises_error(example_dir):
+    from sensospot_data.parser import process_folder
+    from sensospot_data.normalisation import apply_exposure_map
+
+    data_frame = process_folder(example_dir / EXAMPLE_DIR_WO_PARAMS)
+
+    with pytest.raises(ValueError) as excinfo:
+        apply_exposure_map(data_frame, None)
+
+    assert str(excinfo.value).startswith("Exposure Map: measurement")
+
+
+def test_check_overflow_limit_defaults():
+    from sensospot_data.normalisation import _check_overflow_limit
+
+    data_frame = pandas.DataFrame(data={"Spot.Mean": [0.1, 0.5, 0.6]})
+
+    result = _check_overflow_limit(data_frame)
+
+    assert list(result["Spot.Overflow"]) == [False, False, True]
+
+
+def test_check_overflow_limit_custom_limit():
+    from sensospot_data.normalisation import _check_overflow_limit
+
+    data_frame = pandas.DataFrame(data={"Spot.Sat": [4, 2, 3, 4]})
+
+    result = _check_overflow_limit(data_frame, "Spot.Sat", 2)
+
+    assert list(result["Spot.Overflow"]) == [True, False, True, True]
+
+
+def test_reduce_overflow_in_channel(normalization_data_frame):
+    from sensospot_data.normalisation import (
+        _reduce_overflow_in_channel,
+        _check_overflow_limit,
+    )
+
+    data_frame = _check_overflow_limit(
+        normalization_data_frame, "Saturation", 1
+    )
+    result = _reduce_overflow_in_channel(data_frame)
+
+    sorted_results = result.sort_values(
+        by=["Well.Row", "Well.Column", "Pos.Id"]
+    )
+
+    assert list(sorted_results["Value"]) == [
+        1,
+        2,
+        3,
+        1,
+        10,
+        10,
+        10,
+        10,
+        100,
+        100,
+        100,
+        100,
+    ]
+
+
+def test_reduce_overflow_in_channel_shortcut(normalization_data_frame):
+    from sensospot_data.normalisation import (
+        _reduce_overflow_in_channel,
+        _check_overflow_limit,
+    )
+
+    normalization_data_frame["Exposure.Time"] = 1
+
+    data_frame = _check_overflow_limit(
+        normalization_data_frame, "Saturation", 1
+    )
+    result = _reduce_overflow_in_channel(data_frame)
+
+    assert result is data_frame
+
+
+def test_reduce_overflow(normalization_data_frame):
+    from sensospot_data.normalisation import reduce_overflow
+
+    result = reduce_overflow(normalization_data_frame, "Saturation", 1)
+
+    assert "Cy5" in result
+
+    sorted_results = result["Cy5"].sort_values(
+        by=["Well.Row", "Well.Column", "Pos.Id"]
+    )
+
+    assert list(sorted_results["Value"]) == [
+        1,
+        2,
+        3,
+        1,
+        10,
+        10,
+        10,
+        10,
+        100,
+        100,
+        100,
+        100,
+    ]
+
+
+def test_infer_normalization_map(normalization_data_frame):
+    from sensospot_data.normalisation import (
+        _infer_normalization_map,
+        _split_data_frame,
+    )
+
+    normalization_data_frame.loc[5, "Exposure.Channel"] = "Cy3"
+    split_frames = _split_data_frame(
+        normalization_data_frame, "Exposure.Channel"
+    )
+
+    result = _infer_normalization_map(split_frames)
+
+    assert result == {"Cy3": 25, "Cy5": 50}
+
+
+def test_normalize_exposure(normalization_data_frame):
+    from sensospot_data.normalisation import (
+        _normalize_exposure,
+        reduce_overflow,
+    )
+    from sensospot_data.columns import COLUMN_NORMALIZATION
+
+    reduced = reduce_overflow(normalization_data_frame, "Saturation", 1)
+    result = _normalize_exposure(reduced["Cy5"], 100)
+
+    sorted_results = result.sort_values(
+        by=["Well.Row", "Well.Column", "Pos.Id"]
+    )
+    expected_values = [2, 8, 30, 2, 20, 20, 20, 20, 200, 200, 200, 200]
+
+    for normalized_col in COLUMN_NORMALIZATION.values():
+        list(sorted_results[normalized_col]) == expected_values
+
+
+def test_normalize_exposure_time(normalization_data_frame):
+    from sensospot_data.normalisation import (
+        normalize_exposure_time,
+        reduce_overflow,
+    )
+
+    reduced = reduce_overflow(normalization_data_frame, "Saturation", 1)
+    result = normalize_exposure_time(reduced, {"Cy5": 100, "Cy3": 0})
+
+    assert "Cy5" in result
+
+    sorted_results = result["Cy5"].sort_values(
+        by=["Well.Row", "Well.Column", "Pos.Id"]
+    )
+    expected_values = [2, 8, 30, 2, 20, 20, 20, 20, 200, 200, 200, 200]
+
+    assert list(sorted_results["Normalized.Spot.Mean"]) == expected_values
+
+
+def test_normalize_exposure_time_infered_map(normalization_data_frame):
+    from sensospot_data.normalisation import (
+        normalize_exposure_time,
+        reduce_overflow,
+    )
+
+    reduced = reduce_overflow(normalization_data_frame, "Saturation", 1)
+    result = normalize_exposure_time(reduced)
+
+    assert "Cy5" in result
+
+    sorted_results = result["Cy5"].sort_values(
+        by=["Well.Row", "Well.Column", "Pos.Id"]
+    )
+    expected_values = [1, 4, 15, 1, 10, 10, 10, 10, 100, 100, 100, 100]
+
+    assert list(sorted_results["Normalized.Spot.Mean"]) == expected_values
+
+
+def test_normalize_measurement(example_dir):
+    from sensospot_data.normalisation import normalize_measurement
+    from sensospot_data.parser import process_folder
+
+    sub_dir = example_dir / EXAMPLE_DIR_WITH_PARAMS
+    data_frame = process_folder(sub_dir)
+
+    exposure_map = {
+        1: ExposureSetting("Cy3", 100),
+        2: ExposureSetting("Cy5", 15),
+        3: ExposureSetting("Cy5", 150),
+    }
+    normalization_map = {"Cy5": 25}
+
+    result = normalize_measurement(data_frame, exposure_map, normalization_map)
+    cy3_df, cy5_df = result["Cy3"], result["Cy5"]
+
+    assert set(result.keys()) == {"Cy3", "Cy5"}
+    assert cy3_df["Normalized.Exposure.Time"].unique() == 100
+    assert cy5_df["Normalized.Exposure.Time"].unique() == 25
diff --git a/tests/test_parameters.py b/tests/test_parameters.py
index 7772958..460847c 100644
--- a/tests/test_parameters.py
+++ b/tests/test_parameters.py
@@ -113,13 +113,12 @@ def test_add_optional_measurement_parameters_without_params_file(
     exposure_df, example_dir
 ):
     from sensospot_data.parameters import add_optional_measurement_parameters
-    from pandas import isnull
 
     folder = example_dir / EXAMPLE_DIR_WO_PARAMS
     add_optional_measurement_parameters(exposure_df, folder)
 
     for exposure_id in range(1, 4):
         mask = exposure_df["Exposure.Id"] == exposure_id
-        example_row = exposure_df.loc[mask].iloc[0]
-        assert isnull(example_row["Parameters.Channel"])
-        assert isnull(example_row["Parameters.Time"])
+        one_exposure_data_frame = exposure_df.loc[mask]
+        assert one_exposure_data_frame["Parameters.Channel"].hasnans
+        assert one_exposure_data_frame["Parameters.Time"].hasnans
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 1e58f91..f5a02b0 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -65,7 +65,7 @@ def test_parse_csv_no_array(example_dir):
 
 
 @pytest.mark.parametrize(
-    "input, expected", [("", "."), ("..,", "."), (".,,", ","), ("..,,", "."),]
+    "input, expected", [("", "."), ("..,", "."), (".,,", ","), ("..,,", ".")]
 )
 def test_guess_decimal_separator_returns_correct_separator(input, expected):
     from sensospot_data.parser import _guess_decimal_separator
@@ -107,7 +107,7 @@ def test_well_regex_no_match(input):
 
 @pytest.mark.parametrize(
     "filename, expected",
-    [("A1_1.csv", ("A", 1, 1)), ("test/measurement_1_H12_2", ("H", 12, 2)),],
+    [("A1_1.csv", ("A", 1, 1)), ("test/measurement_1_H12_2", ("H", 12, 2))],
 )
 def test_extract_measurement_info_ok(filename, expected):
     from sensospot_data.parser import _extract_measurement_info
@@ -242,10 +242,7 @@ def test_parse_folder(example_dir):
 
 
 def test_sanity_check_ok(example_dir):
-    from sensospot_data.parser import (
-        _sanity_check,
-        parse_multiple_files,
-    )
+    from sensospot_data.parser import _sanity_check, parse_multiple_files
 
     sub_dir = example_dir / EXAMPLE_DIR_WO_PARAMS
     file_list = [
@@ -261,10 +258,7 @@ def test_sanity_check_ok(example_dir):
 
 
 def test_sanity_check_raises_value_error(example_dir):
-    from sensospot_data.parser import (
-        _sanity_check,
-        parse_multiple_files,
-    )
+    from sensospot_data.parser import _sanity_check, parse_multiple_files
 
     sub_dir = example_dir / EXAMPLE_DIR_WO_PARAMS
     file_list = [
@@ -279,11 +273,17 @@ def test_sanity_check_raises_value_error(example_dir):
         _sanity_check(data_frame)
 
 
+def test_get_cache_table_name():
+    from sensospot_data.parser import _get_cache_table_name
+    from sensospot_data import VERSION_TABLE_NAME
+
+    result = _get_cache_table_name()
+
+    assert result == VERSION_TABLE_NAME
+
+
 def test_process_folder_creates_cache(dir_for_caching):
-    from sensospot_data.parser import (
-        process_folder,
-        CACHE_FILE_NAME,
-    )
+    from sensospot_data.parser import process_folder, CACHE_FILE_NAME
 
     cache_path = dir_for_caching / CACHE_FILE_NAME
     assert not cache_path.is_file()
@@ -309,10 +309,7 @@ def test_process_folder_reads_from_cache(dir_for_caching, example_file):
 def test_process_folder_read_cache_fails_silently(
     dir_for_caching, exposure_df
 ):
-    from sensospot_data.parser import (
-        process_folder,
-        CACHE_FILE_NAME,
-    )
+    from sensospot_data.parser import process_folder, CACHE_FILE_NAME
 
     cache_path = dir_for_caching / CACHE_FILE_NAME
     exposure_df.to_hdf(cache_path, "unknown table")
@@ -322,15 +319,6 @@ def test_process_folder_read_cache_fails_silently(
     assert result["Well.Row"][0] == "A"
 
 
-def test_get_cache_table_name():
-    from sensospot_data.parser import _get_cache_table_name
-    from sensospot_data import VERSION_TABLE_NAME
-
-    result = _get_cache_table_name()
-
-    assert result == VERSION_TABLE_NAME
-
-
 def test_process_folder_read_cache_no_cache_arg(dir_for_caching, exposure_df):
     from sensospot_data.parser import (
         process_folder,
@@ -347,10 +335,7 @@ def test_process_folder_read_cache_no_cache_arg(dir_for_caching, exposure_df):
 
 
 def test_process_folder_writes_cache(dir_for_caching):
-    from sensospot_data.parser import (
-        process_folder,
-        CACHE_FILE_NAME,
-    )
+    from sensospot_data.parser import process_folder, CACHE_FILE_NAME
 
     process_folder(dir_for_caching, use_cache=True)
 
diff --git a/tests/test_sensovation_data_parser.py b/tests/test_sensovation_data_parser.py
index 14a48f3..c348d13 100644
--- a/tests/test_sensovation_data_parser.py
+++ b/tests/test_sensovation_data_parser.py
@@ -7,3 +7,4 @@ def test_import_api():
     from sensospot_data import parse_folder  # noqa: F401
     from sensospot_data import parse_multiple_files  # noqa: F401
     from sensospot_data import process_folder  # noqa: F401
+    from sensospot_data import normalize_measurement  # noqa: F401