From 562daa738fe43d2b79a71477def2147e2ba0ebb9 Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Wed, 15 Feb 2023 12:28:39 +0100 Subject: [PATCH] added logging statements --- pyproject.toml | 7 ++++--- src/sensospot_parser/__init__.py | 26 +++++++++++++++++++++++--- src/sensospot_parser/csv_parser.py | 6 ++++++ src/sensospot_parser/parameters.py | 5 +++++ src/sensospot_parser/xml_parser.py | 8 ++++++++ 5 files changed, 46 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4749d72..6d71956 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,10 +27,10 @@ classifiers = [ ] dependencies = [ - "pandas >=1.0.0", + "click", "defusedxml >=0.6.0", + "pandas >=1.0.0", "tables >=3.6.1", - "click", ] [project.urls] Source = "https://git.cpi.imtek.uni-freiburg.de/holgi/sensospot_parser.git" @@ -112,7 +112,8 @@ target-version = "py38" [tool.ruff.per-file-ignores] # see https://github.com/charliermarsh/ruff -"tests/*" = ["FBT003", "INP001", "PLR2004", "S101"] +"src/*" = ["SLF001", "G004"] +"tests/*" = ["FBT003", "INP001", "PLR2004", "S101", "SLF001"] [tool.ruff.pydocstyle] convention = "pep257" # Accepts: "google", "numpy", or "pep257". diff --git a/src/sensospot_parser/__init__.py b/src/sensospot_parser/__init__.py index f42d5c2..2810c65 100644 --- a/src/sensospot_parser/__init__.py +++ b/src/sensospot_parser/__init__.py @@ -5,7 +5,7 @@ Parsing the numerical output from Sensovations Sensospot image analysis. __version__ = "2.0.0" - +import logging import pathlib from typing import Union @@ -16,6 +16,9 @@ from . import columns # noqa: F401 from .csv_parser import parse_csv_folder from .xml_parser import parse_xml_folder +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger("sensospot_parser") + DEFAULT_OUTPUT_FILENAME = "collected_data.csv" PathLike = Union[str, pathlib.Path] @@ -38,6 +41,9 @@ def parse_folder(source: PathLike, *, quiet: bool = False) -> pandas.DataFrame: return parse_xml_folder(source) except ValueError: pass + logger.info( + "Could not parse xml results file, using fall-back csv parsing" + ) return parse_csv_folder(source, quiet=quiet) @@ -68,14 +74,28 @@ def parse_folder(source: PathLike, *, quiet: bool = False) -> pandas.DataFrame: default=False, help="Ignore sanity check for csv file parsing", ) -def main(sources, output, quiet=False): # noqa: FBT002 +@click.option( + "-v", + "--verbose", + help="Set verbosity of log, add multiple -vv for more verbose logging", + count=True, +) +def main(sources, output, verbose, quiet=False): # noqa: FBT002 """Parses the measurement results of the Sensospot reader The resulting output is either echoed to stdout or saved to a file. At first parsing the assay result xml file is tried. - I this doesn't work, the fallback is to parse the csv files. + If this doesn't work, the fallback is to parse the csv files. """ + + if verbose == 0: + logging.disable() + elif verbose == 1: + logging.disable(level=logging.DEBUG) + else: + logging.disable(level=logging.NOTSET) + paths = (pathlib.Path(source) for source in sources) collection = (parse_folder(source, quiet=quiet) for source in paths) result = ( diff --git a/src/sensospot_parser/csv_parser.py b/src/sensospot_parser/csv_parser.py index 7736e75..aeb3cb0 100644 --- a/src/sensospot_parser/csv_parser.py +++ b/src/sensospot_parser/csv_parser.py @@ -3,6 +3,7 @@ Parsing the csv result files from Sensovations Sensospot image analysis. """ +import logging import pathlib import re from collections import namedtuple @@ -13,6 +14,8 @@ import pandas from . import columns from .parameters import add_measurement_parameters +logger = logging.getLogger("sensospot_parser") + PathLike = Union[str, pathlib.Path] REGEX_WELL = re.compile( @@ -100,6 +103,7 @@ def parse_csv_file(data_file: PathLike) -> pandas.DataFrame: ValueError: if metadata could not be extracted """ data_path = pathlib.Path(data_file).resolve() + logger.debug(f"Parsing csv file {data_path}") measurement_info = _extract_measurement_info(data_path) data_frame = _parse_csv(data_path) # normalized well name @@ -211,12 +215,14 @@ def parse_csv_folder( Returns: a pandas data frame with parsed data """ + logger.info(f"Parsing csv files in folder {folder}") folder_path = pathlib.Path(folder) file_list = find_csv_files(folder_path) try: data_frame = parse_multiple_csv_files(file_list) except ValueError as e: msg = f"No sensospot data found in folder '{folder}'" + logger.warning(msg) raise ValueError(msg) from e data_frame = add_measurement_parameters(data_frame, folder_path) diff --git a/src/sensospot_parser/parameters.py b/src/sensospot_parser/parameters.py index e87cf45..a8a4a9d 100644 --- a/src/sensospot_parser/parameters.py +++ b/src/sensospot_parser/parameters.py @@ -3,6 +3,7 @@ Parsing the numerical output from Sensovations Sensospot image analysis. """ +import logging import pathlib from typing import Any, Dict, Optional, Union from xml.etree.ElementTree import Element as ElementType @@ -15,6 +16,8 @@ from . import columns PathLike = Union[str, pathlib.Path] +logger = logging.getLogger("sensospot_parser") + def _search_params_file(folder: PathLike) -> Optional[pathlib.Path]: """searches for a exposure settings file in a folder @@ -65,6 +68,7 @@ def _parse_measurement_params(params_file: PathLike) -> pandas.DataFrame: Returns: pandas data frame with the parsed information """ + logger.debug(f"Parsing parameters file {params_file}") file_path = pathlib.Path(params_file) with file_path.open("r") as file_handle: tree = ElementTree.parse(file_handle) @@ -84,6 +88,7 @@ def get_measurement_params(folder: PathLike) -> Optional[pandas.DataFrame]: params_file = _search_params_file(folder) if params_file is not None: return _parse_measurement_params(params_file) + logger.debug(f"Could not locate parameters file in folder {folder}") return None diff --git a/src/sensospot_parser/xml_parser.py b/src/sensospot_parser/xml_parser.py index f76eb52..dfdd7a9 100644 --- a/src/sensospot_parser/xml_parser.py +++ b/src/sensospot_parser/xml_parser.py @@ -3,6 +3,7 @@ Parsing the csv result files from Sensovations Sensospot image analysis. """ +import logging import pathlib from datetime import datetime from typing import Optional, Union @@ -12,6 +13,8 @@ from defusedxml import ElementTree from . import columns, parameters +logger = logging.getLogger("sensospot_parser") + PathLike = Union[str, pathlib.Path] RESULT_TAG_TYPES = { @@ -153,9 +156,11 @@ def parse_xml_file(xml_file: PathLike) -> pandas.DataFrame: Raises: ValueError if the xml file could not be parsed """ + logger.info(f"Parsing xml results file {xml_file}") xml_file = pathlib.Path(xml_file) if not xml_file.is_file(): msg = "Xml file does not exist" + logger.debug(f"{msg}: {xml_file}") raise ValueError(msg) target = ParserTarget() @@ -165,11 +170,13 @@ def parse_xml_file(xml_file: PathLike) -> pandas.DataFrame: parser.feed(xml_file.read_text()) except (IndexError, KeyError, ValueError, TypeError) as e: msg = "Malformed data in xml file" + logger.warning(f"{msg} {xml_file}") raise ValueError(msg) from e data_frame = pandas.DataFrame(data=target.collected).reset_index() if data_frame.empty: msg = "Could not parse assay results xml file" + logger.warning(f"{msg} {xml_file}") raise ValueError(msg) return columns._cleanup_data_columns(data_frame) @@ -191,6 +198,7 @@ def parse_xml_folder(folder: PathLike) -> pandas.DataFrame: xml_file = _find_result_xml_file(folder) if xml_file is None: msg = "Could not find assay results xml file" + logger.debug(f"{msg} in folder {folder}") raise ValueError(msg) data_frame = parse_xml_file(xml_file) data_frame = parameters.add_measurement_parameters(data_frame, folder)