Browse Source

linted

xmlparsing
Holger Frey 3 years ago
parent
commit
791bc51deb
  1. 8
      sensospot_data/dynamic_range.py
  2. 14
      sensospot_data/parameters.py
  3. 18
      sensospot_data/parser.py
  4. 10
      sensospot_data/utils.py
  5. 2
      tests/test_sensovation_data.py

8
sensospot_data/dynamic_range.py

@ -21,7 +21,7 @@ PROBE_MULTI_INDEX = [
def _check_if_xdr_ready(data_frame): def _check_if_xdr_ready(data_frame):
""" check if a data frame meets the constraints for xdr """ """check if a data frame meets the constraints for xdr"""
required_columns = {SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME} required_columns = {SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME}
if not required_columns.issubset(data_frame.columns): if not required_columns.issubset(data_frame.columns):
raise ValueError("XDR: Apply an exposure map first") raise ValueError("XDR: Apply an exposure map first")
@ -34,13 +34,13 @@ def _check_if_xdr_ready(data_frame):
def _calc_overflow_info(data_frame, column=RAW_DATA_SPOT_SAT, limit=2): def _calc_overflow_info(data_frame, column=RAW_DATA_SPOT_SAT, limit=2):
""" add overflow info, based on column and limit """ """add overflow info, based on column and limit"""
data_frame.loc[:, CALC_SPOT_OVERFLOW] = data_frame[column] > limit data_frame.loc[:, CALC_SPOT_OVERFLOW] = data_frame[column] > limit
return data_frame return data_frame
def _reduce_overflow(data_frame): def _reduce_overflow(data_frame):
""" the heavy lifting for creating an extended dynamic range """ """the heavy lifting for creating an extended dynamic range"""
split_frames = split(data_frame, SETTINGS_EXPOSURE_TIME) split_frames = split(data_frame, SETTINGS_EXPOSURE_TIME)
@ -67,7 +67,7 @@ def _reduce_overflow(data_frame):
def blend(data_frame, column=RAW_DATA_SPOT_SAT, limit=2): def blend(data_frame, column=RAW_DATA_SPOT_SAT, limit=2):
""" creates an extended dynamic range, eliminating overflowing spots """ """creates an extended dynamic range, eliminating overflowing spots"""
_check_if_xdr_ready(data_frame) _check_if_xdr_ready(data_frame)
if CALC_SPOT_OVERFLOW not in data_frame.columns: if CALC_SPOT_OVERFLOW not in data_frame.columns:
data_frame = _calc_overflow_info(data_frame, column, limit) data_frame = _calc_overflow_info(data_frame, column, limit)

14
sensospot_data/parameters.py

@ -9,18 +9,18 @@ from collections import namedtuple
import numpy import numpy
from defusedxml import ElementTree from defusedxml import ElementTree
from .utils import apply_map
from .columns import ( from .columns import (
META_DATA_EXPOSURE_ID, META_DATA_EXPOSURE_ID,
META_DATA_PARAMETERS_TIME, META_DATA_PARAMETERS_TIME,
META_DATA_PARAMETERS_CHANNEL, META_DATA_PARAMETERS_CHANNEL,
) )
from .utils import apply_map
ExposureInfo = namedtuple("ExposureInfo", ["channel", "time"]) ExposureInfo = namedtuple("ExposureInfo", ["channel", "time"])
def _search_measurement_params_file(folder): def _search_measurement_params_file(folder):
""" searches for a exposure settings file in a folder """ """searches for a exposure settings file in a folder"""
folder_path = Path(folder) folder_path = Path(folder)
params_folder = folder_path / "Parameters" params_folder = folder_path / "Parameters"
if not params_folder.is_dir(): if not params_folder.is_dir():
@ -33,7 +33,7 @@ def _search_measurement_params_file(folder):
def _parse_measurement_params(params_file): def _parse_measurement_params(params_file):
""" parses the cannel informations from a settings file """ """parses the cannel informations from a settings file"""
file_path = Path(params_file) file_path = Path(params_file)
with file_path.open("r") as file_handle: with file_path.open("r") as file_handle:
tree = ElementTree.parse(file_handle) tree = ElementTree.parse(file_handle)
@ -50,7 +50,7 @@ def _parse_measurement_params(params_file):
def get_measurement_params(folder): def get_measurement_params(folder):
""" returns measurement parameters """ """returns measurement parameters"""
params_file = _search_measurement_params_file(folder) params_file = _search_measurement_params_file(folder)
if params_file is not None: if params_file is not None:
return _parse_measurement_params(params_file) return _parse_measurement_params(params_file)
@ -58,14 +58,14 @@ def get_measurement_params(folder):
def _add_measurement_params(data_frame, params): def _add_measurement_params(data_frame, params):
""" adds measurement parameters to a data frame """ """adds measurement parameters to a data frame"""
columns=[META_DATA_PARAMETERS_CHANNEL, META_DATA_PARAMETERS_TIME] columns = [META_DATA_PARAMETERS_CHANNEL, META_DATA_PARAMETERS_TIME]
map = {k: dict(zip(columns, v)) for k, v in params.items()} map = {k: dict(zip(columns, v)) for k, v in params.items()}
return apply_map(data_frame, map, META_DATA_EXPOSURE_ID) return apply_map(data_frame, map, META_DATA_EXPOSURE_ID)
def add_optional_measurement_parameters(data_frame, folder): def add_optional_measurement_parameters(data_frame, folder):
""" adds measurement params to the data frame, if they could be parsed """ """adds measurement params to the data frame, if they could be parsed"""
params = get_measurement_params(folder) params = get_measurement_params(folder)
if params: if params:
available_exposures = set(data_frame[META_DATA_EXPOSURE_ID].unique()) available_exposures = set(data_frame[META_DATA_EXPOSURE_ID].unique())

18
sensospot_data/parser.py

@ -13,9 +13,9 @@ from .columns import (
RAW_DATA_POS_ID, RAW_DATA_POS_ID,
META_DATA_WELL_ROW, META_DATA_WELL_ROW,
META_DATA_WELL_NAME, META_DATA_WELL_NAME,
PARSED_DATA_COLUMN_SET,
META_DATA_EXPOSURE_ID, META_DATA_EXPOSURE_ID,
META_DATA_WELL_COLUMN, META_DATA_WELL_COLUMN,
PARSED_DATA_COLUMN_SET,
RAW_DATA_NORMALIZATION_MAP, RAW_DATA_NORMALIZATION_MAP,
RAW_DATA_COLUMNS_RENAME_MAP, RAW_DATA_COLUMNS_RENAME_MAP,
) )
@ -33,7 +33,7 @@ FileInfo = namedtuple("FileInfo", ["row", "column", "exposure"])
def _guess_decimal_separator(file_handle): def _guess_decimal_separator(file_handle):
""" guesses the decimal spearator of a opened data file """ """guesses the decimal spearator of a opened data file"""
file_handle.seek(0) file_handle.seek(0)
headers = next(file_handle) # noqa: F841 headers = next(file_handle) # noqa: F841
data = next(file_handle) data = next(file_handle)
@ -43,7 +43,7 @@ def _guess_decimal_separator(file_handle):
def _parse_csv(data_file): def _parse_csv(data_file):
""" parse a csv sensovation data file """ """parse a csv sensovation data file"""
data_path = Path(data_file) data_path = Path(data_file)
with data_path.open("r") as handle: with data_path.open("r") as handle:
decimal_sep = _guess_decimal_separator(handle) decimal_sep = _guess_decimal_separator(handle)
@ -51,7 +51,7 @@ def _parse_csv(data_file):
def _extract_measurement_info(data_file): def _extract_measurement_info(data_file):
""" extract measurement meta data from a file name """ """extract measurement meta data from a file name"""
data_path = Path(data_file) data_path = Path(data_file)
*rest, well, exposure = data_path.stem.rsplit("_", 2) # noqa: F841 *rest, well, exposure = data_path.stem.rsplit("_", 2) # noqa: F841
matched = REGEX_WELL.match(well) matched = REGEX_WELL.match(well)
@ -64,7 +64,7 @@ def _extract_measurement_info(data_file):
def _cleanup_data_columns(data_frame): def _cleanup_data_columns(data_frame):
""" renames some data columns for consistency and drops unused columns """ """renames some data columns for consistency and drops unused columns"""
renamed = data_frame.rename(columns=RAW_DATA_COLUMNS_RENAME_MAP) renamed = data_frame.rename(columns=RAW_DATA_COLUMNS_RENAME_MAP)
surplus_columns = set(renamed.columns) - PARSED_DATA_COLUMN_SET surplus_columns = set(renamed.columns) - PARSED_DATA_COLUMN_SET
return renamed.drop(columns=surplus_columns) return renamed.drop(columns=surplus_columns)
@ -99,7 +99,7 @@ def _silenced_parse_file(data_file):
def parse_multiple_files(file_list): def parse_multiple_files(file_list):
""" parses a list of file paths to one combined dataframe """ """parses a list of file paths to one combined dataframe"""
if not file_list: if not file_list:
raise ValueError("Empty file list provided") raise ValueError("Empty file list provided")
collection = (_silenced_parse_file(path) for path in file_list) collection = (_silenced_parse_file(path) for path in file_list)
@ -114,7 +114,7 @@ def parse_multiple_files(file_list):
def list_csv_files(folder): def list_csv_files(folder):
""" returns all csv files in a folder """ """returns all csv files in a folder"""
folder_path = Path(folder) folder_path = Path(folder)
files = (item for item in folder_path.iterdir() if item.is_file()) files = (item for item in folder_path.iterdir() if item.is_file())
visible = (item for item in files if not item.stem.startswith(".")) visible = (item for item in files if not item.stem.startswith("."))
@ -122,7 +122,7 @@ def list_csv_files(folder):
def _sanity_check(data_frame): def _sanity_check(data_frame):
""" checks some basic constrains of a combined data frame """ """checks some basic constrains of a combined data frame"""
field_rows = len(data_frame[META_DATA_WELL_ROW].unique()) field_rows = len(data_frame[META_DATA_WELL_ROW].unique())
field_cols = len(data_frame[META_DATA_WELL_COLUMN].unique()) field_cols = len(data_frame[META_DATA_WELL_COLUMN].unique())
exposures = len(data_frame[META_DATA_EXPOSURE_ID].unique()) exposures = len(data_frame[META_DATA_EXPOSURE_ID].unique())
@ -139,7 +139,7 @@ def _sanity_check(data_frame):
def parse_folder(folder, quiet=False): def parse_folder(folder, quiet=False):
""" parses all csv files in a folder to one large dataframe """ """parses all csv files in a folder to one large dataframe"""
file_list = list_csv_files(Path(folder)) file_list = list_csv_files(Path(folder))
data_frame = parse_multiple_files(file_list) data_frame = parse_multiple_files(file_list)
data_frame = add_optional_measurement_parameters(data_frame, folder) data_frame = add_optional_measurement_parameters(data_frame, folder)

10
sensospot_data/utils.py

@ -20,26 +20,26 @@ DEFAULT_AGGREGATION_INDEX = [
def split(data_frame, column): def split(data_frame, column):
""" splits a data frame on unique column values """ """splits a data frame on unique column values"""
values = data_frame[column].unique() values = data_frame[column].unique()
masks = {value: (data_frame[column] == value) for value in values} masks = {value: (data_frame[column] == value) for value in values}
return {value: data_frame[mask] for value, mask in masks.items()} return {value: data_frame[mask] for value, mask in masks.items()}
def _is_list_or_tuple(something): def _is_list_or_tuple(something):
""" returns true if something is a list or tuple """ """returns true if something is a list or tuple"""
if isinstance(something, Sequence): if isinstance(something, Sequence):
return not isinstance(something, str) return not isinstance(something, str)
return False return False
def _is_numerical(something): def _is_numerical(something):
""" returns true if something is an int or float """ """returns true if something is an int or float"""
return isinstance(something, int) or isinstance(something, float) return isinstance(something, int) or isinstance(something, float)
def _check_valid_exposure_map_entry(entry): def _check_valid_exposure_map_entry(entry):
""" raises a ValueError, if an exposure map entry is not suitable """ """raises a ValueError, if an exposure map entry is not suitable"""
if not _is_list_or_tuple(entry): if not _is_list_or_tuple(entry):
raise ValueError("Eposure Map: entries must be tuples or lists") raise ValueError("Eposure Map: entries must be tuples or lists")
if not len(entry) == 2: if not len(entry) == 2:
@ -104,7 +104,7 @@ def apply_exposure_map(data_frame, exposure_map=None):
_check_exposure_map(data_frame, exposure_map) _check_exposure_map(data_frame, exposure_map)
columns=[SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME] columns = [SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME]
map = {k: dict(zip(columns, v)) for k, v in exposure_map.items()} map = {k: dict(zip(columns, v)) for k, v in exposure_map.items()}
return apply_map(data_frame, map, META_DATA_EXPOSURE_ID) return apply_map(data_frame, map, META_DATA_EXPOSURE_ID)

2
tests/test_sensovation_data.py

@ -6,9 +6,9 @@ def test_import_api():
from sensospot_data import run # noqa: F401 from sensospot_data import run # noqa: F401
from sensospot_data import blend # noqa: F401 from sensospot_data import blend # noqa: F401
from sensospot_data import split # noqa: F401 from sensospot_data import split # noqa: F401
from sensospot_data import apply_map # noqa: F401
from sensospot_data import create_xdr # noqa: F401 from sensospot_data import create_xdr # noqa: F401
from sensospot_data import parse_file # noqa: F401 from sensospot_data import parse_file # noqa: F401
from sensospot_data import parse_folder # noqa: F401 from sensospot_data import parse_folder # noqa: F401
from sensospot_data import normalize_values # noqa: F401 from sensospot_data import normalize_values # noqa: F401
from sensospot_data import apply_exposure_map # noqa: F401 from sensospot_data import apply_exposure_map # noqa: F401
from sensospot_data import apply_map # noqa: F401

Loading…
Cancel
Save