Browse Source

added new submodule 'utils'

xmlparsing
Holger Frey 4 years ago
parent
commit
67393144c6
  1. 1
      sensospot_data/__init__.py
  2. 12
      sensospot_data/normalisation.py
  3. 6
      sensospot_data/utils.py
  4. 13
      tests/test_normailsation.py
  5. 1
      tests/test_sensovation_data.py
  6. 14
      tests/test_utils.py

1
sensospot_data/__init__.py

@ -12,6 +12,7 @@ import click @@ -12,6 +12,7 @@ import click
from .parser import parse_file, parse_folder # noqa: F401
from .parameters import ExposureInfo, get_measurement_params # noqa: F401
from .utils import split_data_frame
@click.command()

12
sensospot_data/normalisation.py

@ -21,13 +21,7 @@ PROBE_MULTI_INDEX = [ @@ -21,13 +21,7 @@ PROBE_MULTI_INDEX = [
RAW_DATA_POS_ID,
]
def _split_data_frame(data_frame, column):
""" splits a data frame on unique column values """
values = data_frame[column].unique()
masks = {value: (data_frame[column] == value) for value in values}
return {value: data_frame[mask] for value, mask in masks.items()}
from .utils import split_data_frame
def _infer_exposure_from_parameters(data_frame):
"""infer the exposures from measurement parameters
@ -90,7 +84,7 @@ def reduce_overflow(data_frame, column=RAW_DATA_SPOT_MEAN, limit=0.5): @@ -90,7 +84,7 @@ def reduce_overflow(data_frame, column=RAW_DATA_SPOT_MEAN, limit=0.5):
""" reduces the data set per channel, eliminating overflowing spots """
data_frame = _check_overflow_limit(data_frame, column, limit)
split_frames = _split_data_frame(data_frame, SETTINGS_EXPOSURE_CHANNEL)
split_frames = split_data_frame(data_frame, SETTINGS_EXPOSURE_CHANNEL)
return {
channel_id: _reduce_overflow_in_channel(channel_frame)
@ -101,7 +95,7 @@ def reduce_overflow(data_frame, column=RAW_DATA_SPOT_MEAN, limit=0.5): @@ -101,7 +95,7 @@ def reduce_overflow(data_frame, column=RAW_DATA_SPOT_MEAN, limit=0.5):
def _reduce_overflow_in_channel(channel_frame):
""" does the heavy lifting for reduce_overflow """
split_frames = _split_data_frame(channel_frame, SETTINGS_EXPOSURE_TIME)
split_frames = split_data_frame(channel_frame, SETTINGS_EXPOSURE_TIME)
if len(split_frames) == 1:
# shortcut, if there is only one exposure in the channel

6
sensospot_data/utils.py

@ -0,0 +1,6 @@ @@ -0,0 +1,6 @@
def split_data_frame(data_frame, column):
""" splits a data frame on unique column values """
values = data_frame[column].unique()
masks = {value: (data_frame[column] == value) for value in values}
return {value: data_frame[mask] for value, mask in masks.items()}

13
tests/test_normailsation.py

@ -32,15 +32,6 @@ def df_wop(data_frame_without_params): @@ -32,15 +32,6 @@ def df_wop(data_frame_without_params):
return data_frame_without_params.copy()
def test_split_data_frame(df_wp):
from sensospot_data.normalisation import _split_data_frame
result = _split_data_frame(df_wp, "Well.Row")
assert set(result.keys()) == set("ABC")
for key, value_df in result.items():
assert set(value_df["Well.Row"].unique()) == {key}
def test_infer_exposure_from_parameters(df_wp):
from sensospot_data.normalisation import _infer_exposure_from_parameters
@ -206,13 +197,13 @@ def test_reduce_overflow(normalization_data_frame): @@ -206,13 +197,13 @@ def test_reduce_overflow(normalization_data_frame):
def test_infer_normalization_map(normalization_data_frame):
from sensospot_data.utils import split_data_frame
from sensospot_data.normalisation import (
_split_data_frame,
_infer_normalization_map,
)
normalization_data_frame.loc[5, "Exposure.Channel"] = "Cy3"
split_frames = _split_data_frame(
split_frames = split_data_frame(
normalization_data_frame, "Exposure.Channel"
)

1
tests/test_sensovation_data.py

@ -7,3 +7,4 @@ def test_import_api(): @@ -7,3 +7,4 @@ def test_import_api():
from sensospot_data import parse_file # noqa: F401
from sensospot_data import parse_folder # noqa: F401
from sensospot_data import get_measurement_params # noqa: F401
from sensospot_data import split_data_frame # noqa: F401

14
tests/test_utils.py

@ -0,0 +1,14 @@ @@ -0,0 +1,14 @@
from .conftest import EXAMPLE_DIR_WITH_PARAMS
def test_split_data_frame(example_dir):
from sensospot_data.parser import parse_folder
from sensospot_data.utils import split_data_frame
data_frame = parse_folder(example_dir / EXAMPLE_DIR_WITH_PARAMS)
result = split_data_frame(data_frame, "Well.Row")
assert set(result.keys()) == set("ABC")
for key, value_df in result.items():
assert set(value_df["Well.Row"].unique()) == {key}
Loading…
Cancel
Save