Browse Source

measurement parameters are parsed again

xmlparsing
Holger Frey 4 years ago
parent
commit
e7c56a1f3a
  1. 2
      sensospot_data/__init__.py
  2. 74
      sensospot_data/parameters.py
  3. 9
      sensospot_data/parser.py
  4. 0
      tests/__init__.py
  5. 37
      tests/conftest.py
  6. 125
      tests/test_parameters.py
  7. 40
      tests/test_parser.py

2
sensospot_data/__init__.py

@ -5,6 +5,8 @@ Parsing the numerical output from Sensovations Sensospot image analysis.
__version__ = "0.1.0" __version__ = "0.1.0"
VERSION_TABLE_NAME = f"v{__version__}".replace(".", "_")
from .parser import ( # noqa: F401 from .parser import ( # noqa: F401
CACHE_FILE_NAME, CACHE_FILE_NAME,

74
sensospot_data/parameters.py

@ -0,0 +1,74 @@
""" Sensospot Data Parser
Parsing the numerical output from Sensovations Sensospot image analysis.
"""
from pathlib import Path
from collections import namedtuple
import numpy
from defusedxml import ElementTree
MeasurementParams = namedtuple("MeasurementParams", ["channel", "time"])
def _search_measurement_params_file(folder):
""" searches for a exposure settings file in a folder """
folder_path = Path(folder)
params_folder = folder_path / "Parameters"
if not params_folder.is_dir():
return None
param_files = list(params_folder.glob("**/*.svexp"))
if len(param_files) == 1:
return param_files[0]
else:
return None
def _parse_measurement_params(params_file):
""" parses the cannel informations from a settings file """
file_path = Path(params_file)
with file_path.open("r") as file_handle:
tree = ElementTree.parse(file_handle)
result = {}
for child in tree.find("Channels"):
# child.tag == "ChannelConfig1"
exposure = int(child.tag[-1])
channel_description = child.attrib["Description"]
# channel_description == "[Cy3|Cy5] Green"
channel = channel_description.rsplit(" ", 1)[-1]
time = int(child.attrib["ExposureTimeMs"])
result[exposure] = MeasurementParams(channel.lower(), time)
return result
def _get_measurement_params(folder):
""" returns measurement parameters """
params_file = _search_measurement_params_file(folder)
if params_file is not None:
return _parse_measurement_params(params_file)
return None
def _add_measurement_params(data_frame, params):
""" adds measurement parameters to a data frame """
for exposure_id, info in params.items():
mask = data_frame["Exposure.Id"] == exposure_id
data_frame.loc[mask, "Parameters.Channel"] = info.channel
data_frame.loc[mask, "Parameters.Time"] = info.time
data_frame["Parameters.Channel"] = data_frame["Parameters.Channel"].astype(
"category"
)
return data_frame
def add_optional_measurement_parameters(data_frame, folder):
""" adds measurement params to the data frame, if they could be parsed """
data_frame["Parameters.Channel"] = numpy.nan
data_frame["Parameters.Time"] = numpy.nan
params = _get_measurement_params(folder)
if params:
available_exposures = set(data_frame["Exposure.Id"].unique())
if available_exposures == set(params.keys()):
return _add_measurement_params(data_frame, params)
return data_frame

9
sensospot_data/parser.py

@ -9,6 +9,8 @@ from collections import namedtuple
import pandas import pandas
from .parameters import add_optional_measurement_parameters
REGEX_WELL = re.compile( REGEX_WELL = re.compile(
r""" r"""
(?P<row>([A-Z]+)) # row name containing one or more letters (?P<row>([A-Z]+)) # row name containing one or more letters
@ -31,9 +33,9 @@ FileInfo = namedtuple("FileInfo", ["row", "column", "exposure"])
def _get_cache_table_name(): def _get_cache_table_name():
""" automatic hdf5 table name, avoids a circular import """ """ automatic hdf5 table name, avoids a circular import """
from . import __version__ from . import VERSION_TABLE_NAME
return f"v{__version__}" return VERSION_TABLE_NAME
def _guess_decimal_separator(file_handle): def _guess_decimal_separator(file_handle):
@ -91,6 +93,7 @@ def parse_multiple_files(file_list):
data_frame = next(collection) data_frame = next(collection)
for next_frame in collection: for next_frame in collection:
data_frame = data_frame.append(next_frame, ignore_index=True) data_frame = data_frame.append(next_frame, ignore_index=True)
data_frame["Well.Row"] = data_frame["Well.Row"].astype("category")
return data_frame return data_frame
@ -118,7 +121,7 @@ def parse_folder(folder):
""" parses all csv files in a folder to one large dataframe """ """ parses all csv files in a folder to one large dataframe """
file_list = _list_csv_files(folder) file_list = _list_csv_files(folder)
data_frame = parse_multiple_files(file_list) data_frame = parse_multiple_files(file_list)
data_frame["Well.Row"] = data_frame["Well.Row"].astype("category") data_frame = add_optional_measurement_parameters(data_frame, folder)
return _sanity_check(data_frame) return _sanity_check(data_frame)

0
tests/__init__.py

37
tests/conftest.py

@ -0,0 +1,37 @@
""" test fixtures """
from pathlib import Path
import pytest
EXAMPLE_DIR_WO_PARAMS = "mtp_wo_parameters"
EXAMPLE_DIR_WITH_PARAMS = "mtp_with_parameters"
@pytest.fixture
def example_dir(request):
root_dir = Path(request.config.rootdir)
yield root_dir / "example_data"
@pytest.fixture
def example_file(example_dir):
data_dir = example_dir / EXAMPLE_DIR_WO_PARAMS
yield data_dir / "160218_SG2-013-001_Regen1_Cy3-100_1_A1_1.csv"
@pytest.fixture
def exposure_df():
from pandas import DataFrame
yield DataFrame(data={"Exposure.Id": [1, 2, 3]})
@pytest.fixture
def dir_for_caching(tmpdir, example_file):
import shutil
temp_path = Path(tmpdir)
dest = temp_path / example_file.name
shutil.copy(example_file, dest)
yield temp_path

125
tests/test_parameters.py

@ -0,0 +1,125 @@
from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS
def test_search_measurement_params_file_ok(example_dir):
from sensospot_data.parameters import _search_measurement_params_file
result = _search_measurement_params_file(
example_dir / EXAMPLE_DIR_WITH_PARAMS
)
assert result.suffix == ".svexp"
def test_search_measurement_params_file_no_parameters_folder(example_dir):
from sensospot_data.parameters import _search_measurement_params_file
result = _search_measurement_params_file(
example_dir / EXAMPLE_DIR_WO_PARAMS
)
assert result is None
def test_ssearch_measurement_params_file_parameters_file(tmpdir):
from sensospot_data.parameters import _search_measurement_params_file
params_dir = tmpdir / "Parameters"
params_dir.mkdir()
result = _search_measurement_params_file(tmpdir)
assert result is None
def test_parse_channel_info(example_dir):
from sensospot_data.parameters import (
_search_measurement_params_file,
_parse_measurement_params,
)
params = _search_measurement_params_file(
example_dir / EXAMPLE_DIR_WITH_PARAMS
)
result = _parse_measurement_params(params)
assert set(result.keys()) == {1, 2, 3}
assert result[1] == ("green", 100)
assert result[2] == ("red", 150)
assert result[3] == ("red", 15)
def test_get_measurement_params_file_found(example_dir):
from sensospot_data.parameters import _get_measurement_params
result = _get_measurement_params(example_dir / EXAMPLE_DIR_WITH_PARAMS)
assert set(result.keys()) == {1, 2, 3}
assert result[1] == ("green", 100)
assert result[2] == ("red", 150)
assert result[3] == ("red", 15)
def test_get_measurement_params_file_not_found(example_dir):
from sensospot_data.parameters import _get_measurement_params
result = _get_measurement_params(example_dir / EXAMPLE_DIR_WO_PARAMS)
assert result is None
def test_add_measurement_params(exposure_df):
from sensospot_data.parameters import (
_add_measurement_params,
MeasurementParams,
)
params = {
1: MeasurementParams("red", 10),
2: MeasurementParams("green", 20),
3: MeasurementParams("blue", 50),
}
result = _add_measurement_params(exposure_df, params)
assert result["Exposure.Id"][0] == 1
assert result["Parameters.Channel"][0] == "red"
assert result["Parameters.Time"][0] == 10
assert result["Exposure.Id"][1] == 2
assert result["Parameters.Channel"][1] == "green"
assert result["Parameters.Time"][1] == 20
assert result["Exposure.Id"][2] == 3
assert result["Parameters.Channel"][2] == "blue"
assert result["Parameters.Time"][2] == 50
def test_add_optional_measurement_parameters_with_params_file(
exposure_df, example_dir
):
from sensospot_data.parameters import add_optional_measurement_parameters
folder = example_dir / EXAMPLE_DIR_WITH_PARAMS
add_optional_measurement_parameters(exposure_df, folder)
expected = [(1, "green", 100), (2, "red", 150), (3, "red", 15)]
for exposure_id, channel, time in expected:
mask = exposure_df["Exposure.Id"] == exposure_id
example_row = exposure_df.loc[mask].iloc[0]
assert example_row["Parameters.Channel"] == channel
assert example_row["Parameters.Time"] == time
def test_add_optional_measurement_parameters_without_params_file(
exposure_df, example_dir
):
from sensospot_data.parameters import add_optional_measurement_parameters
from pandas import isnull
folder = example_dir / EXAMPLE_DIR_WO_PARAMS
add_optional_measurement_parameters(exposure_df, folder)
for exposure_id in range(1, 4):
mask = exposure_df["Exposure.Id"] == exposure_id
example_row = exposure_df.loc[mask].iloc[0]
assert isnull(example_row["Parameters.Channel"])
assert isnull(example_row["Parameters.Time"])

40
tests/test_parser.py

@ -1,41 +1,10 @@
""" Stub file for testing the project """ """ Stub file for testing the project """
from pathlib import Path
import numpy import numpy
import pytest import pytest
EXAMPLE_DIR_WO_PARAMS = "mtp_wo_parameters" from .conftest import EXAMPLE_DIR_WO_PARAMS, EXAMPLE_DIR_WITH_PARAMS
EXAMPLE_DIR_WITH_PARAMS = "mtp_with_parameters"
@pytest.fixture
def example_dir(request):
root_dir = Path(request.config.rootdir)
yield root_dir / "example_data"
@pytest.fixture
def example_file(example_dir):
data_dir = example_dir / EXAMPLE_DIR_WO_PARAMS
yield data_dir / "160218_SG2-013-001_Regen1_Cy3-100_1_A1_1.csv"
@pytest.fixture
def exposure_df():
from pandas import DataFrame
yield DataFrame(data={"Exposure.Id": [1, 2, 3]})
@pytest.fixture
def dir_for_caching(tmpdir, example_file):
import shutil
temp_path = Path(tmpdir)
dest = temp_path / example_file.name
shutil.copy(example_file, dest)
yield temp_path
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -268,6 +237,8 @@ def test_parse_folder(example_dir):
assert len(data_frame["Well.Column"].unique()) == 12 assert len(data_frame["Well.Column"].unique()) == 12
assert len(data_frame["Exposure.Id"].unique()) == 3 assert len(data_frame["Exposure.Id"].unique()) == 3
assert len(data_frame["Pos.Id"].unique()) == 100 assert len(data_frame["Pos.Id"].unique()) == 100
assert len(data_frame["Parameters.Channel"].unique()) == 2
assert len(data_frame["Parameters.Time"].unique()) == 3
def test_sanity_check_ok(example_dir): def test_sanity_check_ok(example_dir):
@ -353,12 +324,11 @@ def test_process_folder_read_cache_fails_silently(
def test_get_cache_table_name(): def test_get_cache_table_name():
from sensospot_data.parser import _get_cache_table_name from sensospot_data.parser import _get_cache_table_name
from sensospot_data import __version__ from sensospot_data import VERSION_TABLE_NAME
result = _get_cache_table_name() result = _get_cache_table_name()
assert result.startswith("v") assert result == VERSION_TABLE_NAME
assert result[1:] == __version__
def test_process_folder_read_cache_no_cache_arg(dir_for_caching, exposure_df): def test_process_folder_read_cache_no_cache_arg(dir_for_caching, exposure_df):

Loading…
Cancel
Save