diff --git a/src/conda_helpers/__init__.py b/src/conda_helpers/__init__.py index 73f1954..130f112 100644 --- a/src/conda_helpers/__init__.py +++ b/src/conda_helpers/__init__.py @@ -8,5 +8,6 @@ __version__ = "0.0.2" from sensospot_tools import select, split # noqa: F401 +from .calibration import prepare_calibration_data # noqa: F401 from .linear_regression import linear_regression # noqa: F401 from .mbp import add_exposure_info, normalize # noqa: F401 diff --git a/src/conda_helpers/calibration.py b/src/conda_helpers/calibration.py new file mode 100644 index 0000000..6189363 --- /dev/null +++ b/src/conda_helpers/calibration.py @@ -0,0 +1,83 @@ +import pandas as pd + +EXPOSURE_ID = "Exposure.Id" +EXPOSURE_CHANNEL = "Exposure.Channel" +EXPOSURE_TIME = "Exposure.Time" + +CLASSIC_CALIBRATION_EXPOSURES = 4 + +EXPOSURE_COLUMNS = [ + EXPOSURE_ID, + EXPOSURE_CHANNEL, + EXPOSURE_TIME, +] + +POS_TO_IGNORE = [ + 1, + 2, + 9, + 10, + 11, + 12, + 19, + 20, + 71, + 72, + 81, + 82, + 89, + 90, + 91, + 92, + 99, + 100, +] + +SIGNAL_SOURCE = "Signal.Source" +SIGNAL_INTENSITY = "Signal.Intensity" +SPOT_MEDIAN = "Spot.Median" +BKG_MEDIAN = "Bkg.Median" +SIGNAL_COLUMNS = [SPOT_MEDIAN, BKG_MEDIAN] + + +def add_exposure_info(data: pd.DataFrame) -> pd.DataFrame: + if len(data[EXPOSURE_ID].unique()) == CLASSIC_CALIBRATION_EXPOSURES: + exposure_values = [ + (1, "Cy3", 100), + (2, "Cy3", 10), + (3, "Cy5", 60), + (4, "Cy5", 6), + ] + else: + exposure_values = [ + (1, "Cy3", 100), + (2, "Cy3", 30), + (3, "Cy3", 10), + (4, "Cy5", 60), + (5, "Cy5", 18), + (6, "Cy5", 6), + ] + exposure_df = pd.DataFrame(exposure_values, columns=EXPOSURE_COLUMNS) + return data.merge(exposure_df, on=EXPOSURE_ID) + + +def remove_non_signal_positions(data: pd.DataFrame) -> pd.DataFrame: + selection = data["Pos.Id"].isin(POS_TO_IGNORE) + return data.loc[~selection].copy() + + +def add_signal_columns(data: pd.DataFrame) -> pd.DataFrame: + def _generate_signal_columns(): + for column in SIGNAL_COLUMNS: + copied = data.copy() + copied[SIGNAL_SOURCE] = column + copied[SIGNAL_INTENSITY] = data[column] + yield copied + + return pd.concat(_generate_signal_columns()).reset_index(drop=True) + + +def prepare_calibration_data(data: pd.DataFrame) -> pd.DataFrame: + with_exposures = add_exposure_info(data) + only_signals = remove_non_signal_positions(with_exposures) + return add_signal_columns(only_signals) diff --git a/tests/test_calibration.py b/tests/test_calibration.py new file mode 100644 index 0000000..b6d21a9 --- /dev/null +++ b/tests/test_calibration.py @@ -0,0 +1,83 @@ +import pandas as pd +import pytest + + +def _generate_example_data(exposures): + values = [(i, i * 10, i * 100) for i in range(1, exposures + 1)] + columns = ["Exposure.Id", "Bkg.Median", "Spot.Median"] + + return pd.DataFrame(values, columns=columns) + + +@pytest.fixture() +def classic_example_data(): + return _generate_example_data(4) + + +@pytest.fixture() +def regression_example_data(): + return _generate_example_data(6) + + +def test_add_exposure_info_classic(classic_example_data): + from conda_helpers.calibration import add_exposure_info + + result = add_exposure_info(classic_example_data) + + assert list(result["Exposure.Channel"]) == ["Cy3"] * 2 + ["Cy5"] * 2 + assert list(result["Exposure.Time"]) == [100, 10, 60, 6] + + +def test_add_exposure_info_regression(regression_example_data): + from conda_helpers.calibration import add_exposure_info + + result = add_exposure_info(regression_example_data) + + assert list(result["Exposure.Channel"]) == ["Cy3"] * 3 + ["Cy5"] * 3 + assert list(result["Exposure.Time"]) == [100, 30, 10, 60, 18, 6] + + +def test_remove_non_signal_positions(): + from conda_helpers.calibration import remove_non_signal_positions + + data = pd.DataFrame(range(1, 12), columns=["Pos.Id"]) + + result = remove_non_signal_positions(data) + + assert set(result["Pos.Id"]) == {3, 4, 5, 6, 7, 8} + + +def test_add_signal_columns(classic_example_data): + from conda_helpers.calibration import add_signal_columns + + result = add_signal_columns(classic_example_data) + + assert len(result) == 2 * len(classic_example_data) + + assert ( + list(result["Signal.Source"]) + == ["Spot.Median"] * 4 + ["Bkg.Median"] * 4 + ) + assert list(result["Signal.Intensity"]) == [ + 100, + 200, + 300, + 400, + 10, + 20, + 30, + 40, + ] + + +def test_prepare_calibration_data(classic_example_data): + from conda_helpers.calibration import prepare_calibration_data + + classic_example_data["Pos.Id"] = 0 + result = prepare_calibration_data(classic_example_data) + + assert len(result) == 2 * len(classic_example_data) + assert "Exposure.Channel" in result.columns + assert "Exposure.Time" in result.columns + assert "Signal.Source" in result.columns + assert "Signal.Intensity" in result.columns diff --git a/tests/test_conda_helpers.py b/tests/test_conda_helpers.py index 0b2ac26..3d893ff 100644 --- a/tests/test_conda_helpers.py +++ b/tests/test_conda_helpers.py @@ -27,6 +27,7 @@ def test_api(): add_exposure_info, # noqa: F401 linear_regression, # noqa: F401 normalize, # noqa: F401 + prepare_calibration_data, # noqa: F401 select, # noqa: F401 split, # noqa: F401 )