sensospot_parser/tests/test_normalisation.py

from collections import namedtuple

import pandas
import pytest

from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS

ExposureSetting = namedtuple("ExposureSetting", ["channel", "time"])


@pytest.fixture(scope="session")
def data_frame_with_params(example_dir):
    from sensospot_data.parser import parse_folder

    return parse_folder(example_dir / EXAMPLE_DIR_WITH_PARAMS)


@pytest.fixture(scope="session")
def data_frame_without_params(example_dir):
    from sensospot_data.parser import parse_folder

    return parse_folder(example_dir / EXAMPLE_DIR_WO_PARAMS)


@pytest.fixture
def df_wp(data_frame_with_params):
    return data_frame_with_params.copy()


@pytest.fixture
def df_wop(data_frame_without_params):
    return data_frame_without_params.copy()


def test_infer_exposure_from_parameters(df_wp):
    from sensospot_data.normalisation import _infer_exposure_from_parameters

    result = _infer_exposure_from_parameters(df_wp)

    assert all(result["Exposure.Channel"] == result["Parameters.Channel"])
    assert all(result["Exposure.Time"] == result["Parameters.Time"])


def test_infer_exposure_from_parameters_raises_error(df_wop):
    from sensospot_data.normalisation import _infer_exposure_from_parameters

    with pytest.raises(ValueError) as excinfo:
        _infer_exposure_from_parameters(df_wop)

    assert str(excinfo.value).startswith("Exposure Map: measurement")


def test_apply_exposure_map(df_wp):
    from sensospot_data.normalisation import apply_exposure_map

    exposure_map = {
        1: ExposureSetting("Cy3", 100),
        2: ExposureSetting("Cy5", 15),
        3: ExposureSetting("Cy5", 150),
    }

    result = apply_exposure_map(df_wp, exposure_map)

    for key, value in exposure_map.items():
        mask = result["Exposure.Id"] == key
        partial = result.loc[mask]
        assert set(partial["Exposure.Channel"].unique()) == {value.channel}
        assert set(partial["Exposure.Time"].unique()) == {value.time}


def test_apply_exposure_map_raises_error(df_wp):
    from sensospot_data.normalisation import apply_exposure_map

    exposure_map = {
        1: ExposureSetting("Cy3", 100),
        2: ExposureSetting("Cy5", 15),
        "X": ExposureSetting("Cy5", 150),
    }

    with pytest.raises(ValueError) as excinfo:
        apply_exposure_map(df_wp, exposure_map)

    assert str(excinfo.value).startswith("Exposure Map differs")


def test_apply_exposure_map_from_parameters(df_wp):
    from sensospot_data.normalisation import apply_exposure_map

    result = apply_exposure_map(df_wp, None)

    assert all(result["Exposure.Channel"] == result["Parameters.Channel"])
    assert all(result["Exposure.Time"] == result["Parameters.Time"])


def test_apply_exposure_map_from_parameters_raises_error(df_wop):
    from sensospot_data.normalisation import apply_exposure_map

    with pytest.raises(ValueError) as excinfo:
        apply_exposure_map(df_wop, None)

    assert str(excinfo.value).startswith("Exposure Map: measurement")


def test_check_overflow_limit_defaults():
    from sensospot_data.normalisation import _check_overflow_limit

    data_frame = pandas.DataFrame(data={"Spot.Mean": [0.1, 0.5, 0.6]})

    result = _check_overflow_limit(data_frame)

    assert list(result["Calc.Spot.Overflow"]) == [False, False, True]


def test_check_overflow_limit_custom_limit():
    from sensospot_data.normalisation import _check_overflow_limit

    data_frame = pandas.DataFrame(data={"Spot.Saturation": [4, 2, 3, 4]})

    result = _check_overflow_limit(data_frame, "Spot.Saturation", 2)

    assert list(result["Calc.Spot.Overflow"]) == [True, False, True, True]


def test_reduce_overflow_in_channel(normalization_data_frame):
    from sensospot_data.normalisation import (
        _check_overflow_limit,
        _reduce_overflow_in_channel,
    )

    data_frame = _check_overflow_limit(
        normalization_data_frame, "Saturation", 1
    )
    result = _reduce_overflow_in_channel(data_frame)

    sorted_results = result.sort_values(
        by=["Well.Row", "Well.Column", "Pos.Id"]
    )

    assert list(sorted_results["Value"]) == [
        1,
        2,
        3,
        1,
        10,
        10,
        10,
        10,
        100,
        100,
        100,
        100,
    ]


def test_reduce_overflow_in_channel_shortcut(normalization_data_frame):
    from sensospot_data.normalisation import (
        _check_overflow_limit,
        _reduce_overflow_in_channel,
    )

    normalization_data_frame["Exposure.Time"] = 1

    data_frame = _check_overflow_limit(
        normalization_data_frame, "Saturation", 1
    )
    result = _reduce_overflow_in_channel(data_frame)

    assert result is data_frame


def test_reduce_overflow(normalization_data_frame):
    from sensospot_data.normalisation import reduce_overflow

    result = reduce_overflow(normalization_data_frame, "Saturation", 1)

    assert "Cy5" in result

    sorted_results = result["Cy5"].sort_values(
        by=["Well.Row", "Well.Column", "Pos.Id"]
    )

    assert list(sorted_results["Value"]) == [
        1,
        2,
        3,
        1,
        10,
        10,
        10,
        10,
        100,
        100,
        100,
        100,
    ]


def test_infer_normalization_map(normalization_data_frame):
    from sensospot_data.utils import split_data_frame
    from sensospot_data.normalisation import (
        _infer_normalization_map,
    )

    normalization_data_frame.loc[5, "Exposure.Channel"] = "Cy3"
    split_frames = split_data_frame(
        normalization_data_frame, "Exposure.Channel"
    )

    result = _infer_normalization_map(split_frames)

    assert result == {"Cy3": 25, "Cy5": 50}


def test_normalize_channel(normalization_data_frame):
    from sensospot_data.columns import RAW_DATA_NORMALIZATION_MAP
    from sensospot_data.normalisation import reduce_overflow, normalize_channel

    reduced = reduce_overflow(normalization_data_frame, "Saturation", 1)
    result = normalize_channel(reduced["Cy5"], 50)

    sorted_results = result.sort_values(
        by=["Well.Row", "Well.Column", "Pos.Id"]
    )
    expected_values = [2, 8, 30, 2, 20, 20, 20, 20, 200, 200, 200, 200]

    for normalized_col in RAW_DATA_NORMALIZATION_MAP.values():
        list(sorted_results[normalized_col]) == expected_values


def test_normalize_exposure_time(normalization_data_frame):
    from sensospot_data.normalisation import (
        reduce_overflow,
        normalize_exposure_time,
    )

    reduced = reduce_overflow(normalization_data_frame, "Saturation", 1)
    result = normalize_exposure_time(reduced)

    assert "Cy5" in result

    sorted_results = result["Cy5"].sort_values(
        by=["Well.Row", "Well.Column", "Pos.Id"]
    )
    expected_values = [1, 4, 15, 1, 10, 10, 10, 10, 100, 100, 100, 100]

    assert list(sorted_results["Calc.Normalized.Spot.Mean"]) == expected_values


def test_normalize_exposure_time_infered_map(normalization_data_frame):
    from sensospot_data.normalisation import (
        reduce_overflow,
        normalize_exposure_time,
    )

    reduced = reduce_overflow(normalization_data_frame, "Saturation", 1)
    result = normalize_exposure_time(reduced)

    assert "Cy5" in result

    sorted_results = result["Cy5"].sort_values(
        by=["Well.Row", "Well.Column", "Pos.Id"]
    )
    expected_values = [1, 4, 15, 1, 10, 10, 10, 10, 100, 100, 100, 100]

    assert list(sorted_results["Calc.Normalized.Spot.Mean"]) == expected_values


def test_normalize_measurement(df_wp):
    from sensospot_data.normalisation import split_channels

    exposure_map = {
        1: ExposureSetting("Cy3", 100),
        2: ExposureSetting("Cy5", 15),
        3: ExposureSetting("Cy5", 150),
    }

    result = split_channels(df_wp, exposure_map)
    cy3_df, cy5_df = result["Cy3"], result["Cy5"]

    assert set(result.keys()) == {"Cy3", "Cy5"}
    assert cy3_df["Settings.Normalized.Exposure.Time"].unique() == 100
    assert cy5_df["Settings.Normalized.Exposure.Time"].unique() == 150
normalization is working prior to refactoring 4 years ago			`from collections import namedtuple`

			`import pandas`
			`import pytest`

			`from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS`

			`ExposureSetting = namedtuple("ExposureSetting", ["channel", "time"])`


			`@pytest.fixture(scope="session")`
			`def data_frame_with_params(example_dir):`
			`from sensospot_data.parser import parse_folder`

			`return parse_folder(example_dir / EXAMPLE_DIR_WITH_PARAMS)`


			`@pytest.fixture(scope="session")`
			`def data_frame_without_params(example_dir):`
			`from sensospot_data.parser import parse_folder`

			`return parse_folder(example_dir / EXAMPLE_DIR_WO_PARAMS)`


			`@pytest.fixture`
			`def df_wp(data_frame_with_params):`
			`return data_frame_with_params.copy()`


			`@pytest.fixture`
			`def df_wop(data_frame_without_params):`
			`return data_frame_without_params.copy()`



			`def test_infer_exposure_from_parameters(df_wp):`
			`from sensospot_data.normalisation import _infer_exposure_from_parameters`

			`result = _infer_exposure_from_parameters(df_wp)`

			`assert all(result["Exposure.Channel"] == result["Parameters.Channel"])`
			`assert all(result["Exposure.Time"] == result["Parameters.Time"])`


			`def test_infer_exposure_from_parameters_raises_error(df_wop):`
			`from sensospot_data.normalisation import _infer_exposure_from_parameters`

			`with pytest.raises(ValueError) as excinfo:`
			`_infer_exposure_from_parameters(df_wop)`

			`assert str(excinfo.value).startswith("Exposure Map: measurement")`


			`def test_apply_exposure_map(df_wp):`
			`from sensospot_data.normalisation import apply_exposure_map`

			`exposure_map = {`
			`1: ExposureSetting("Cy3", 100),`
			`2: ExposureSetting("Cy5", 15),`
			`3: ExposureSetting("Cy5", 150),`
			`}`

			`result = apply_exposure_map(df_wp, exposure_map)`

			`for key, value in exposure_map.items():`
			`mask = result["Exposure.Id"] == key`
			`partial = result.loc[mask]`
			`assert set(partial["Exposure.Channel"].unique()) == {value.channel}`
			`assert set(partial["Exposure.Time"].unique()) == {value.time}`


			`def test_apply_exposure_map_raises_error(df_wp):`
			`from sensospot_data.normalisation import apply_exposure_map`

			`exposure_map = {`
			`1: ExposureSetting("Cy3", 100),`
			`2: ExposureSetting("Cy5", 15),`
			`"X": ExposureSetting("Cy5", 150),`
			`}`

			`with pytest.raises(ValueError) as excinfo:`
			`apply_exposure_map(df_wp, exposure_map)`

			`assert str(excinfo.value).startswith("Exposure Map differs")`


			`def test_apply_exposure_map_from_parameters(df_wp):`
			`from sensospot_data.normalisation import apply_exposure_map`

			`result = apply_exposure_map(df_wp, None)`

			`assert all(result["Exposure.Channel"] == result["Parameters.Channel"])`
			`assert all(result["Exposure.Time"] == result["Parameters.Time"])`


			`def test_apply_exposure_map_from_parameters_raises_error(df_wop):`
			`from sensospot_data.normalisation import apply_exposure_map`

			`with pytest.raises(ValueError) as excinfo:`
			`apply_exposure_map(df_wop, None)`

			`assert str(excinfo.value).startswith("Exposure Map: measurement")`


			`def test_check_overflow_limit_defaults():`
			`from sensospot_data.normalisation import _check_overflow_limit`

			`data_frame = pandas.DataFrame(data={"Spot.Mean": [0.1, 0.5, 0.6]})`

			`result = _check_overflow_limit(data_frame)`

			`assert list(result["Calc.Spot.Overflow"]) == [False, False, True]`


			`def test_check_overflow_limit_custom_limit():`
			`from sensospot_data.normalisation import _check_overflow_limit`

			`data_frame = pandas.DataFrame(data={"Spot.Saturation": [4, 2, 3, 4]})`

			`result = _check_overflow_limit(data_frame, "Spot.Saturation", 2)`

			`assert list(result["Calc.Spot.Overflow"]) == [True, False, True, True]`


			`def test_reduce_overflow_in_channel(normalization_data_frame):`
			`from sensospot_data.normalisation import (`
			`_check_overflow_limit,`
			`_reduce_overflow_in_channel,`
			`)`

			`data_frame = _check_overflow_limit(`
			`normalization_data_frame, "Saturation", 1`
			`)`
			`result = _reduce_overflow_in_channel(data_frame)`

			`sorted_results = result.sort_values(`
			`by=["Well.Row", "Well.Column", "Pos.Id"]`
			`)`

			`assert list(sorted_results["Value"]) == [`
			`1,`
			`2,`
			`3,`
			`1,`
			`10,`
			`10,`
			`10,`
			`10,`
			`100,`
			`100,`
			`100,`
			`100,`
			`]`


			`def test_reduce_overflow_in_channel_shortcut(normalization_data_frame):`
			`from sensospot_data.normalisation import (`
			`_check_overflow_limit,`
			`_reduce_overflow_in_channel,`
			`)`

			`normalization_data_frame["Exposure.Time"] = 1`

			`data_frame = _check_overflow_limit(`
			`normalization_data_frame, "Saturation", 1`
			`)`
			`result = _reduce_overflow_in_channel(data_frame)`

			`assert result is data_frame`


			`def test_reduce_overflow(normalization_data_frame):`
			`from sensospot_data.normalisation import reduce_overflow`

			`result = reduce_overflow(normalization_data_frame, "Saturation", 1)`

			`assert "Cy5" in result`

			`sorted_results = result["Cy5"].sort_values(`
			`by=["Well.Row", "Well.Column", "Pos.Id"]`
			`)`

			`assert list(sorted_results["Value"]) == [`
			`1,`
			`2,`
			`3,`
			`1,`
			`10,`
			`10,`
			`10,`
			`10,`
			`100,`
			`100,`
			`100,`
			`100,`
			`]`


			`def test_infer_normalization_map(normalization_data_frame):`
added new submodule 'utils' 4 years ago			`from sensospot_data.utils import split_data_frame`
normalization is working prior to refactoring 4 years ago			`from sensospot_data.normalisation import (`
			`_infer_normalization_map,`
			`)`

			`normalization_data_frame.loc[5, "Exposure.Channel"] = "Cy3"`
added new submodule 'utils' 4 years ago			`split_frames = split_data_frame(`
normalization is working prior to refactoring 4 years ago			`normalization_data_frame, "Exposure.Channel"`
			`)`

			`result = _infer_normalization_map(split_frames)`

			`assert result == {"Cy3": 25, "Cy5": 50}`


			`def test_normalize_channel(normalization_data_frame):`
			`from sensospot_data.columns import RAW_DATA_NORMALIZATION_MAP`
			`from sensospot_data.normalisation import reduce_overflow, normalize_channel`

			`reduced = reduce_overflow(normalization_data_frame, "Saturation", 1)`
			`result = normalize_channel(reduced["Cy5"], 50)`

			`sorted_results = result.sort_values(`
			`by=["Well.Row", "Well.Column", "Pos.Id"]`
			`)`
			`expected_values = [2, 8, 30, 2, 20, 20, 20, 20, 200, 200, 200, 200]`

			`for normalized_col in RAW_DATA_NORMALIZATION_MAP.values():`
			`list(sorted_results[normalized_col]) == expected_values`


			`def test_normalize_exposure_time(normalization_data_frame):`
			`from sensospot_data.normalisation import (`
			`reduce_overflow,`
			`normalize_exposure_time,`
			`)`

			`reduced = reduce_overflow(normalization_data_frame, "Saturation", 1)`
			`result = normalize_exposure_time(reduced)`

			`assert "Cy5" in result`

			`sorted_results = result["Cy5"].sort_values(`
			`by=["Well.Row", "Well.Column", "Pos.Id"]`
			`)`
			`expected_values = [1, 4, 15, 1, 10, 10, 10, 10, 100, 100, 100, 100]`

			`assert list(sorted_results["Calc.Normalized.Spot.Mean"]) == expected_values`


			`def test_normalize_exposure_time_infered_map(normalization_data_frame):`
			`from sensospot_data.normalisation import (`
			`reduce_overflow,`
			`normalize_exposure_time,`
			`)`

			`reduced = reduce_overflow(normalization_data_frame, "Saturation", 1)`
			`result = normalize_exposure_time(reduced)`

			`assert "Cy5" in result`

			`sorted_results = result["Cy5"].sort_values(`
			`by=["Well.Row", "Well.Column", "Pos.Id"]`
			`)`
			`expected_values = [1, 4, 15, 1, 10, 10, 10, 10, 100, 100, 100, 100]`

			`assert list(sorted_results["Calc.Normalized.Spot.Mean"]) == expected_values`


			`def test_normalize_measurement(df_wp):`
			`from sensospot_data.normalisation import split_channels`

			`exposure_map = {`
			`1: ExposureSetting("Cy3", 100),`
			`2: ExposureSetting("Cy5", 15),`
			`3: ExposureSetting("Cy5", 150),`
			`}`

			`result = split_channels(df_wp, exposure_map)`
			`cy3_df, cy5_df = result["Cy3"], result["Cy5"]`

			`assert set(result.keys()) == {"Cy3", "Cy5"}`
			`assert cy3_df["Settings.Normalized.Exposure.Time"].unique() == 100`
			`assert cy5_df["Settings.Normalized.Exposure.Time"].unique() == 150`