diff --git a/sensospot_data/__init__.py b/sensospot_data/__init__.py index a8f393a..33f35b1 100644 --- a/sensospot_data/__init__.py +++ b/sensospot_data/__init__.py @@ -12,6 +12,7 @@ import click from .parser import parse_file, parse_folder # noqa: F401 from .parameters import ExposureInfo, get_measurement_params # noqa: F401 +from .utils import split_data_frame @click.command() diff --git a/sensospot_data/normalisation.py b/sensospot_data/normalisation.py index 8e37bcc..074d5ea 100644 --- a/sensospot_data/normalisation.py +++ b/sensospot_data/normalisation.py @@ -21,13 +21,7 @@ PROBE_MULTI_INDEX = [ RAW_DATA_POS_ID, ] - -def _split_data_frame(data_frame, column): - """ splits a data frame on unique column values """ - values = data_frame[column].unique() - masks = {value: (data_frame[column] == value) for value in values} - return {value: data_frame[mask] for value, mask in masks.items()} - +from .utils import split_data_frame def _infer_exposure_from_parameters(data_frame): """infer the exposures from measurement parameters @@ -90,7 +84,7 @@ def reduce_overflow(data_frame, column=RAW_DATA_SPOT_MEAN, limit=0.5): """ reduces the data set per channel, eliminating overflowing spots """ data_frame = _check_overflow_limit(data_frame, column, limit) - split_frames = _split_data_frame(data_frame, SETTINGS_EXPOSURE_CHANNEL) + split_frames = split_data_frame(data_frame, SETTINGS_EXPOSURE_CHANNEL) return { channel_id: _reduce_overflow_in_channel(channel_frame) @@ -101,7 +95,7 @@ def reduce_overflow(data_frame, column=RAW_DATA_SPOT_MEAN, limit=0.5): def _reduce_overflow_in_channel(channel_frame): """ does the heavy lifting for reduce_overflow """ - split_frames = _split_data_frame(channel_frame, SETTINGS_EXPOSURE_TIME) + split_frames = split_data_frame(channel_frame, SETTINGS_EXPOSURE_TIME) if len(split_frames) == 1: # shortcut, if there is only one exposure in the channel diff --git a/sensospot_data/utils.py b/sensospot_data/utils.py new file mode 100644 index 0000000..e2060da --- /dev/null +++ b/sensospot_data/utils.py @@ -0,0 +1,6 @@ + +def split_data_frame(data_frame, column): + """ splits a data frame on unique column values """ + values = data_frame[column].unique() + masks = {value: (data_frame[column] == value) for value in values} + return {value: data_frame[mask] for value, mask in masks.items()} diff --git a/tests/test_normailsation.py b/tests/test_normailsation.py index b9160d3..0825bfd 100644 --- a/tests/test_normailsation.py +++ b/tests/test_normailsation.py @@ -32,15 +32,6 @@ def df_wop(data_frame_without_params): return data_frame_without_params.copy() -def test_split_data_frame(df_wp): - from sensospot_data.normalisation import _split_data_frame - - result = _split_data_frame(df_wp, "Well.Row") - - assert set(result.keys()) == set("ABC") - for key, value_df in result.items(): - assert set(value_df["Well.Row"].unique()) == {key} - def test_infer_exposure_from_parameters(df_wp): from sensospot_data.normalisation import _infer_exposure_from_parameters @@ -206,13 +197,13 @@ def test_reduce_overflow(normalization_data_frame): def test_infer_normalization_map(normalization_data_frame): + from sensospot_data.utils import split_data_frame from sensospot_data.normalisation import ( - _split_data_frame, _infer_normalization_map, ) normalization_data_frame.loc[5, "Exposure.Channel"] = "Cy3" - split_frames = _split_data_frame( + split_frames = split_data_frame( normalization_data_frame, "Exposure.Channel" ) diff --git a/tests/test_sensovation_data.py b/tests/test_sensovation_data.py index f8fa40c..20f5289 100644 --- a/tests/test_sensovation_data.py +++ b/tests/test_sensovation_data.py @@ -7,3 +7,4 @@ def test_import_api(): from sensospot_data import parse_file # noqa: F401 from sensospot_data import parse_folder # noqa: F401 from sensospot_data import get_measurement_params # noqa: F401 + from sensospot_data import split_data_frame # noqa: F401 \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..db8d7a0 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,14 @@ + +from .conftest import EXAMPLE_DIR_WITH_PARAMS + +def test_split_data_frame(example_dir): + from sensospot_data.parser import parse_folder + from sensospot_data.utils import split_data_frame + + data_frame = parse_folder(example_dir / EXAMPLE_DIR_WITH_PARAMS) + + result = split_data_frame(data_frame, "Well.Row") + + assert set(result.keys()) == set("ABC") + for key, value_df in result.items(): + assert set(value_df["Well.Row"].unique()) == {key} \ No newline at end of file