Browse Source

linted

xmlparsing
Holger Frey 3 years ago
parent
commit
791bc51deb
  1. 8
      sensospot_data/dynamic_range.py
  2. 14
      sensospot_data/parameters.py
  3. 18
      sensospot_data/parser.py
  4. 10
      sensospot_data/utils.py
  5. 2
      tests/test_sensovation_data.py

8
sensospot_data/dynamic_range.py

@ -21,7 +21,7 @@ PROBE_MULTI_INDEX = [ @@ -21,7 +21,7 @@ PROBE_MULTI_INDEX = [
def _check_if_xdr_ready(data_frame):
""" check if a data frame meets the constraints for xdr """
"""check if a data frame meets the constraints for xdr"""
required_columns = {SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME}
if not required_columns.issubset(data_frame.columns):
raise ValueError("XDR: Apply an exposure map first")
@ -34,13 +34,13 @@ def _check_if_xdr_ready(data_frame): @@ -34,13 +34,13 @@ def _check_if_xdr_ready(data_frame):
def _calc_overflow_info(data_frame, column=RAW_DATA_SPOT_SAT, limit=2):
""" add overflow info, based on column and limit """
"""add overflow info, based on column and limit"""
data_frame.loc[:, CALC_SPOT_OVERFLOW] = data_frame[column] > limit
return data_frame
def _reduce_overflow(data_frame):
""" the heavy lifting for creating an extended dynamic range """
"""the heavy lifting for creating an extended dynamic range"""
split_frames = split(data_frame, SETTINGS_EXPOSURE_TIME)
@ -67,7 +67,7 @@ def _reduce_overflow(data_frame): @@ -67,7 +67,7 @@ def _reduce_overflow(data_frame):
def blend(data_frame, column=RAW_DATA_SPOT_SAT, limit=2):
""" creates an extended dynamic range, eliminating overflowing spots """
"""creates an extended dynamic range, eliminating overflowing spots"""
_check_if_xdr_ready(data_frame)
if CALC_SPOT_OVERFLOW not in data_frame.columns:
data_frame = _calc_overflow_info(data_frame, column, limit)

14
sensospot_data/parameters.py

@ -9,18 +9,18 @@ from collections import namedtuple @@ -9,18 +9,18 @@ from collections import namedtuple
import numpy
from defusedxml import ElementTree
from .utils import apply_map
from .columns import (
META_DATA_EXPOSURE_ID,
META_DATA_PARAMETERS_TIME,
META_DATA_PARAMETERS_CHANNEL,
)
from .utils import apply_map
ExposureInfo = namedtuple("ExposureInfo", ["channel", "time"])
def _search_measurement_params_file(folder):
""" searches for a exposure settings file in a folder """
"""searches for a exposure settings file in a folder"""
folder_path = Path(folder)
params_folder = folder_path / "Parameters"
if not params_folder.is_dir():
@ -33,7 +33,7 @@ def _search_measurement_params_file(folder): @@ -33,7 +33,7 @@ def _search_measurement_params_file(folder):
def _parse_measurement_params(params_file):
""" parses the cannel informations from a settings file """
"""parses the cannel informations from a settings file"""
file_path = Path(params_file)
with file_path.open("r") as file_handle:
tree = ElementTree.parse(file_handle)
@ -50,7 +50,7 @@ def _parse_measurement_params(params_file): @@ -50,7 +50,7 @@ def _parse_measurement_params(params_file):
def get_measurement_params(folder):
""" returns measurement parameters """
"""returns measurement parameters"""
params_file = _search_measurement_params_file(folder)
if params_file is not None:
return _parse_measurement_params(params_file)
@ -58,14 +58,14 @@ def get_measurement_params(folder): @@ -58,14 +58,14 @@ def get_measurement_params(folder):
def _add_measurement_params(data_frame, params):
""" adds measurement parameters to a data frame """
columns=[META_DATA_PARAMETERS_CHANNEL, META_DATA_PARAMETERS_TIME]
"""adds measurement parameters to a data frame"""
columns = [META_DATA_PARAMETERS_CHANNEL, META_DATA_PARAMETERS_TIME]
map = {k: dict(zip(columns, v)) for k, v in params.items()}
return apply_map(data_frame, map, META_DATA_EXPOSURE_ID)
def add_optional_measurement_parameters(data_frame, folder):
""" adds measurement params to the data frame, if they could be parsed """
"""adds measurement params to the data frame, if they could be parsed"""
params = get_measurement_params(folder)
if params:
available_exposures = set(data_frame[META_DATA_EXPOSURE_ID].unique())

18
sensospot_data/parser.py

@ -13,9 +13,9 @@ from .columns import ( @@ -13,9 +13,9 @@ from .columns import (
RAW_DATA_POS_ID,
META_DATA_WELL_ROW,
META_DATA_WELL_NAME,
PARSED_DATA_COLUMN_SET,
META_DATA_EXPOSURE_ID,
META_DATA_WELL_COLUMN,
PARSED_DATA_COLUMN_SET,
RAW_DATA_NORMALIZATION_MAP,
RAW_DATA_COLUMNS_RENAME_MAP,
)
@ -33,7 +33,7 @@ FileInfo = namedtuple("FileInfo", ["row", "column", "exposure"]) @@ -33,7 +33,7 @@ FileInfo = namedtuple("FileInfo", ["row", "column", "exposure"])
def _guess_decimal_separator(file_handle):
""" guesses the decimal spearator of a opened data file """
"""guesses the decimal spearator of a opened data file"""
file_handle.seek(0)
headers = next(file_handle) # noqa: F841
data = next(file_handle)
@ -43,7 +43,7 @@ def _guess_decimal_separator(file_handle): @@ -43,7 +43,7 @@ def _guess_decimal_separator(file_handle):
def _parse_csv(data_file):
""" parse a csv sensovation data file """
"""parse a csv sensovation data file"""
data_path = Path(data_file)
with data_path.open("r") as handle:
decimal_sep = _guess_decimal_separator(handle)
@ -51,7 +51,7 @@ def _parse_csv(data_file): @@ -51,7 +51,7 @@ def _parse_csv(data_file):
def _extract_measurement_info(data_file):
""" extract measurement meta data from a file name """
"""extract measurement meta data from a file name"""
data_path = Path(data_file)
*rest, well, exposure = data_path.stem.rsplit("_", 2) # noqa: F841
matched = REGEX_WELL.match(well)
@ -64,7 +64,7 @@ def _extract_measurement_info(data_file): @@ -64,7 +64,7 @@ def _extract_measurement_info(data_file):
def _cleanup_data_columns(data_frame):
""" renames some data columns for consistency and drops unused columns """
"""renames some data columns for consistency and drops unused columns"""
renamed = data_frame.rename(columns=RAW_DATA_COLUMNS_RENAME_MAP)
surplus_columns = set(renamed.columns) - PARSED_DATA_COLUMN_SET
return renamed.drop(columns=surplus_columns)
@ -99,7 +99,7 @@ def _silenced_parse_file(data_file): @@ -99,7 +99,7 @@ def _silenced_parse_file(data_file):
def parse_multiple_files(file_list):
""" parses a list of file paths to one combined dataframe """
"""parses a list of file paths to one combined dataframe"""
if not file_list:
raise ValueError("Empty file list provided")
collection = (_silenced_parse_file(path) for path in file_list)
@ -114,7 +114,7 @@ def parse_multiple_files(file_list): @@ -114,7 +114,7 @@ def parse_multiple_files(file_list):
def list_csv_files(folder):
""" returns all csv files in a folder """
"""returns all csv files in a folder"""
folder_path = Path(folder)
files = (item for item in folder_path.iterdir() if item.is_file())
visible = (item for item in files if not item.stem.startswith("."))
@ -122,7 +122,7 @@ def list_csv_files(folder): @@ -122,7 +122,7 @@ def list_csv_files(folder):
def _sanity_check(data_frame):
""" checks some basic constrains of a combined data frame """
"""checks some basic constrains of a combined data frame"""
field_rows = len(data_frame[META_DATA_WELL_ROW].unique())
field_cols = len(data_frame[META_DATA_WELL_COLUMN].unique())
exposures = len(data_frame[META_DATA_EXPOSURE_ID].unique())
@ -139,7 +139,7 @@ def _sanity_check(data_frame): @@ -139,7 +139,7 @@ def _sanity_check(data_frame):
def parse_folder(folder, quiet=False):
""" parses all csv files in a folder to one large dataframe """
"""parses all csv files in a folder to one large dataframe"""
file_list = list_csv_files(Path(folder))
data_frame = parse_multiple_files(file_list)
data_frame = add_optional_measurement_parameters(data_frame, folder)

10
sensospot_data/utils.py

@ -20,26 +20,26 @@ DEFAULT_AGGREGATION_INDEX = [ @@ -20,26 +20,26 @@ DEFAULT_AGGREGATION_INDEX = [
def split(data_frame, column):
""" splits a data frame on unique column values """
"""splits a data frame on unique column values"""
values = data_frame[column].unique()
masks = {value: (data_frame[column] == value) for value in values}
return {value: data_frame[mask] for value, mask in masks.items()}
def _is_list_or_tuple(something):
""" returns true if something is a list or tuple """
"""returns true if something is a list or tuple"""
if isinstance(something, Sequence):
return not isinstance(something, str)
return False
def _is_numerical(something):
""" returns true if something is an int or float """
"""returns true if something is an int or float"""
return isinstance(something, int) or isinstance(something, float)
def _check_valid_exposure_map_entry(entry):
""" raises a ValueError, if an exposure map entry is not suitable """
"""raises a ValueError, if an exposure map entry is not suitable"""
if not _is_list_or_tuple(entry):
raise ValueError("Eposure Map: entries must be tuples or lists")
if not len(entry) == 2:
@ -104,7 +104,7 @@ def apply_exposure_map(data_frame, exposure_map=None): @@ -104,7 +104,7 @@ def apply_exposure_map(data_frame, exposure_map=None):
_check_exposure_map(data_frame, exposure_map)
columns=[SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME]
columns = [SETTINGS_EXPOSURE_CHANNEL, SETTINGS_EXPOSURE_TIME]
map = {k: dict(zip(columns, v)) for k, v in exposure_map.items()}
return apply_map(data_frame, map, META_DATA_EXPOSURE_ID)

2
tests/test_sensovation_data.py

@ -6,9 +6,9 @@ def test_import_api(): @@ -6,9 +6,9 @@ def test_import_api():
from sensospot_data import run # noqa: F401
from sensospot_data import blend # noqa: F401
from sensospot_data import split # noqa: F401
from sensospot_data import apply_map # noqa: F401
from sensospot_data import create_xdr # noqa: F401
from sensospot_data import parse_file # noqa: F401
from sensospot_data import parse_folder # noqa: F401
from sensospot_data import normalize_values # noqa: F401
from sensospot_data import apply_exposure_map # noqa: F401
from sensospot_data import apply_map # noqa: F401

Loading…
Cancel
Save