diff --git a/src/sensospot_parser/__init__.py b/src/sensospot_parser/__init__.py index 9a8cf36..f42d5c2 100644 --- a/src/sensospot_parser/__init__.py +++ b/src/sensospot_parser/__init__.py @@ -21,7 +21,7 @@ DEFAULT_OUTPUT_FILENAME = "collected_data.csv" PathLike = Union[str, pathlib.Path] -def parse_folder(source: PathLike, quiet: bool = False) -> pandas.DataFrame: +def parse_folder(source: PathLike, *, quiet: bool = False) -> pandas.DataFrame: """parses an assay result folder The function will first try to use an assay results xml file, and will @@ -38,7 +38,7 @@ def parse_folder(source: PathLike, quiet: bool = False) -> pandas.DataFrame: return parse_xml_folder(source) except ValueError: pass - return parse_csv_folder(source, quiet) + return parse_csv_folder(source, quiet=quiet) @click.command() @@ -68,7 +68,7 @@ def parse_folder(source: PathLike, quiet: bool = False) -> pandas.DataFrame: default=False, help="Ignore sanity check for csv file parsing", ) -def main(sources, output, quiet=False): +def main(sources, output, quiet=False): # noqa: FBT002 """Parses the measurement results of the Sensospot reader The resulting output is either echoed to stdout or saved to a file. @@ -77,7 +77,7 @@ def main(sources, output, quiet=False): I this doesn't work, the fallback is to parse the csv files. """ paths = (pathlib.Path(source) for source in sources) - collection = (parse_folder(source, quiet) for source in paths) + collection = (parse_folder(source, quiet=quiet) for source in paths) result = ( pandas.concat(collection, ignore_index=True) .reset_index() diff --git a/src/sensospot_parser/csv_parser.py b/src/sensospot_parser/csv_parser.py index 267bfea..7736e75 100644 --- a/src/sensospot_parser/csv_parser.py +++ b/src/sensospot_parser/csv_parser.py @@ -3,10 +3,10 @@ Parsing the csv result files from Sensovations Sensospot image analysis. """ -import re import pathlib -from typing import Union, TextIO, Optional, Sequence +import re from collections import namedtuple +from typing import Optional, Sequence, TextIO, Union import pandas @@ -74,10 +74,11 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo: named tuple FileInfo with parsed metadata """ data_path = pathlib.Path(data_file) - *rest, well, exposure = data_path.stem.rsplit("_", 2) # noqa: F841 + *rest, well, exposure = data_path.stem.rsplit("_", 2) matched = REGEX_WELL.match(well) if matched is None: - raise ValueError(f"not a valid well: '{well}'") + msg = f"not a valid well: '{well}'" + raise ValueError(msg) row = matched["row"].upper() column = int(matched["column"]) exposure = int(exposure) @@ -143,7 +144,8 @@ def parse_multiple_csv_files( pandas data frame with all parsed data combined """ if not file_list: - raise ValueError("Empty file list provided") + msg = "Empty file list provided" + raise ValueError(msg) collection = (_parse_csv_file_silenced(path) for path in file_list) filtered = (frame for frame in collection if frame is not None) data_frame = pandas.concat(filtered, ignore_index=True).reset_index() @@ -186,9 +188,8 @@ def _sanity_check(data_frame: pandas.DataFrame) -> pandas.DataFrame: spot_positions = len(data_frame[columns.POS_ID].unique()) expected_rows = field_rows * field_cols * exposures * spot_positions if expected_rows != len(data_frame): - raise ValueError( - f"Measurements are missing: {expected_rows} != {len(data_frame)}" - ) + msg = f"Measurements are missing: {expected_rows} != {len(data_frame)}" + raise ValueError(msg) # set the right data type for measurement columns for raw_column in columns.NUMERIC_COLUMNS: data_frame[raw_column] = pandas.to_numeric(data_frame[raw_column]) @@ -196,7 +197,7 @@ def _sanity_check(data_frame: pandas.DataFrame) -> pandas.DataFrame: def parse_csv_folder( - folder: PathLike, quiet: bool = False + folder: PathLike, *, quiet: bool = False ) -> pandas.DataFrame: """parses all csv files in a folder to one large dataframe @@ -214,8 +215,9 @@ def parse_csv_folder( file_list = find_csv_files(folder_path) try: data_frame = parse_multiple_csv_files(file_list) - except ValueError: - raise ValueError(f"No sensospot data found in folder '{folder}'") + except ValueError as e: + msg = f"No sensospot data found in folder '{folder}'" + raise ValueError(msg) from e data_frame = add_measurement_parameters(data_frame, folder_path) diff --git a/src/sensospot_parser/parameters.py b/src/sensospot_parser/parameters.py index 2beb04c..e87cf45 100644 --- a/src/sensospot_parser/parameters.py +++ b/src/sensospot_parser/parameters.py @@ -4,8 +4,8 @@ Parsing the numerical output from Sensovations Sensospot image analysis. """ import pathlib -from typing import Any, Dict, Union, Optional -from xml.etree.ElementTree import Element as ElementType # noqa: S405 +from typing import Any, Dict, Optional, Union +from xml.etree.ElementTree import Element as ElementType import numpy import pandas @@ -30,10 +30,7 @@ def _search_params_file(folder: PathLike) -> Optional[pathlib.Path]: if not params_folder.is_dir(): return None param_files = list(params_folder.glob("**/*.svexp")) - if len(param_files) == 1: - return param_files[0] - else: - return None + return param_files[0] if len(param_files) == 1 else None def _get_channel_data(channel_node: ElementType) -> Dict[str, Any]: @@ -45,9 +42,9 @@ def _get_channel_data(channel_node: ElementType) -> Dict[str, Any]: Returns: dict with the information """ - # child.tag == "ChannelConfig1" + # Example "ChannelConfig1" exposure_id = int(channel_node.tag[-1]) - # channel_description == "[Cy3|Cy5] Green" + # Example "Cy3 Green" description = channel_node.attrib["Description"] exposure_channel = description.rsplit(" ", 1)[-1] # floats can be used for exposure times, not only ints diff --git a/src/sensospot_parser/xml_parser.py b/src/sensospot_parser/xml_parser.py index d097944..f76eb52 100644 --- a/src/sensospot_parser/xml_parser.py +++ b/src/sensospot_parser/xml_parser.py @@ -4,8 +4,8 @@ Parsing the csv result files from Sensovations Sensospot image analysis. """ import pathlib -from typing import Union, Optional from datetime import datetime +from typing import Optional, Union import pandas from defusedxml import ElementTree @@ -76,7 +76,9 @@ class ParserTarget: def _data_timestamp_parser(self, data: str) -> None: """parses the data section of a "Timestamp" tag""" - timestamp = datetime.strptime(data.strip(), DATETIME_XML_FORMAT) + timestamp = datetime.strptime( # noqa: DTZ007 + data.strip(), DATETIME_XML_FORMAT + ) self._current[columns.ANALYSIS_DATETIME] = timestamp def _data_image_name_parser(self, data: str) -> None: @@ -108,7 +110,6 @@ class ParserTarget: def closed(self) -> None: """the end of the xml file is reached""" - pass def _find_result_xml_file(folder: PathLike) -> Optional[pathlib.Path]: @@ -154,7 +155,8 @@ def parse_xml_file(xml_file: PathLike) -> pandas.DataFrame: """ xml_file = pathlib.Path(xml_file) if not xml_file.is_file(): - raise ValueError("Xml file does not exist") + msg = "Xml file does not exist" + raise ValueError(msg) target = ParserTarget() parser = ElementTree.DefusedXMLParser(target=target) @@ -162,11 +164,13 @@ def parse_xml_file(xml_file: PathLike) -> pandas.DataFrame: try: parser.feed(xml_file.read_text()) except (IndexError, KeyError, ValueError, TypeError) as e: - raise ValueError("Malformed data in xml file") from e + msg = "Malformed data in xml file" + raise ValueError(msg) from e data_frame = pandas.DataFrame(data=target.collected).reset_index() if data_frame.empty: - raise ValueError("Could not parse assay results xml file") + msg = "Could not parse assay results xml file" + raise ValueError(msg) return columns._cleanup_data_columns(data_frame) @@ -186,7 +190,8 @@ def parse_xml_folder(folder: PathLike) -> pandas.DataFrame: folder = pathlib.Path(folder) xml_file = _find_result_xml_file(folder) if xml_file is None: - raise ValueError("Could not find assay results xml file") + msg = "Could not find assay results xml file" + raise ValueError(msg) data_frame = parse_xml_file(xml_file) data_frame = parameters.add_measurement_parameters(data_frame, folder) return columns._cleanup_data_columns(data_frame) diff --git a/tests/conftest.py b/tests/conftest.py index 3c575a1..b0d5601 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,23 +14,23 @@ EXAMPLE_DIR_XML_WITH_PARAMS = "xml_with_parameters" @pytest.fixture(scope="session") def example_dir(request): root_dir = Path(request.config.rootdir) - yield root_dir / "example_data" + return root_dir / "example_data" -@pytest.fixture +@pytest.fixture() def example_file(example_dir): data_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS - yield data_dir / "160218_SG2-013-001_Regen1_Cy3-100_1_A1_1.csv" + return data_dir / "160218_SG2-013-001_Regen1_Cy3-100_1_A1_1.csv" -@pytest.fixture +@pytest.fixture() def exposure_df(): from pandas import DataFrame - yield DataFrame(data={"Exposure.Id": [1, 2, 3]}) + return DataFrame(data={"Exposure.Id": [1, 2, 3]}) -@pytest.fixture +@pytest.fixture() def normalization_data_frame(): from sensospot_parser.columns import RAW_DATA_NORMALIZATION_MAP @@ -86,10 +86,10 @@ def normalization_data_frame(): data_frame = pandas.DataFrame(overflow_test_data) data_frame["Exposure.Channel"] = "Cy5" - for value_column in RAW_DATA_NORMALIZATION_MAP.keys(): + for value_column in RAW_DATA_NORMALIZATION_MAP: data_frame[value_column] = data_frame["Value"] - yield data_frame + return data_frame @pytest.fixture(scope="session") @@ -106,11 +106,11 @@ def parsed_data_frame_without_params(example_dir): return parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WO_PARAMS) -@pytest.fixture +@pytest.fixture() def data_frame_with_params(parsed_data_frame_with_params): return parsed_data_frame_with_params.copy() -@pytest.fixture +@pytest.fixture() def data_frame_without_params(parsed_data_frame_without_params): return parsed_data_frame_without_params.copy() diff --git a/tests/test_columns.py b/tests/test_columns.py index 499c221..f295df2 100644 --- a/tests/test_columns.py +++ b/tests/test_columns.py @@ -1,6 +1,5 @@ def test_cleanup_data_columns(): from pandas import DataFrame - from sensospot_parser.columns import _cleanup_data_columns columns = ["Rect.", "Contour", " ID ", "Found", "Dia."] diff --git a/tests/test_csv_parser.py b/tests/test_csv_parser.py index 371bf7f..f53ffe5 100644 --- a/tests/test_csv_parser.py +++ b/tests/test_csv_parser.py @@ -4,11 +4,11 @@ import numpy import pytest -from .conftest import EXAMPLE_DIR_CSV_WO_PARAMS, EXAMPLE_DIR_CSV_WITH_PARAMS +from .conftest import EXAMPLE_DIR_CSV_WITH_PARAMS, EXAMPLE_DIR_CSV_WO_PARAMS @pytest.mark.parametrize( - "sub_dir, file_name", + ("sub_dir", "file_name"), [ ( EXAMPLE_DIR_CSV_WO_PARAMS, @@ -65,14 +65,15 @@ def test_parse_csv_no_array(example_dir): @pytest.mark.parametrize( - "input, expected", [("", "."), ("..,", "."), (".,,", ","), ("..,,", ".")] + ("provided", "expected"), + [("", "."), ("..,", "."), (".,,", ","), ("..,,", ".")], ) -def test_guess_decimal_separator_returns_correct_separator(input, expected): +def test_guess_decimal_separator_returns_correct_separator(provided, expected): from io import StringIO from sensospot_parser.csv_parser import _guess_decimal_separator - handle = StringIO(f"header\n{input}\n") + handle = StringIO(f"header\n{provided}\n") result = _guess_decimal_separator(handle) assert result == expected @@ -98,17 +99,17 @@ def test_well_regex_ok(): assert result["column"] == "123" -@pytest.mark.parametrize("input", ["", "A", "1", "1A", "-1", "A-"]) -def test_well_regex_no_match(input): +@pytest.mark.parametrize("provided", ["", "A", "1", "1A", "-1", "A-"]) +def test_well_regex_no_match(provided): from sensospot_parser.csv_parser import REGEX_WELL - result = REGEX_WELL.match(input) + result = REGEX_WELL.match(provided) assert result is None @pytest.mark.parametrize( - "filename, expected", + ("filename", "expected"), [("A1_1.csv", ("A", 1, 1)), ("test/measurement_1_H12_2", ("H", 12, 2))], ) def test_extract_measurement_info_ok(filename, expected): @@ -123,7 +124,7 @@ def test_extract_measurement_info_ok(filename, expected): def test_extract_measurement_info_raises_error(filename): from sensospot_parser.csv_parser import _extract_measurement_info - with pytest.raises(ValueError): + with pytest.raises(ValueError): # noqa: PT011 _extract_measurement_info(filename) @@ -178,7 +179,7 @@ def test_parse_file_raises_error(example_dir): / "should_raise_value_error.csv" ) - with pytest.raises(ValueError): + with pytest.raises(ValueError): # noqa: PT011 parse_csv_file(csv_file) @@ -223,7 +224,6 @@ def testparse_multiple_files_ok(example_dir, file_list): files = [sub_dir / file for file in file_list] data_frame = parse_multiple_csv_files(files) - print(data_frame["Exposure.Id"].unique()) assert len(data_frame) == 100 * len(files) assert len(data_frame["Exposure.Id"].unique()) == len(files) @@ -232,7 +232,7 @@ def testparse_multiple_files_ok(example_dir, file_list): def testparse_multiple_files_empty_file_list(): from sensospot_parser.csv_parser import parse_multiple_csv_files - with pytest.raises(ValueError): + with pytest.raises(ValueError): # noqa: PT011 parse_multiple_csv_files([]) @@ -242,7 +242,6 @@ def testparse_multiple_files_empty_array(example_dir): files = [example_dir / "no_array_A1_1.csv"] data_frame = parse_multiple_csv_files(files) - print(data_frame["Exposure.Id"].unique()) assert len(data_frame) == 1 @@ -306,5 +305,5 @@ def test_sanity_check_raises_value_error(example_dir): data_frame = parse_multiple_csv_files(files) data_frame = data_frame.drop(data_frame.index[1]) - with pytest.raises(ValueError): + with pytest.raises(ValueError): # noqa: PT011 _sanity_check(data_frame) diff --git a/tests/test_parameters.py b/tests/test_parameters.py index ea291e4..db60a57 100644 --- a/tests/test_parameters.py +++ b/tests/test_parameters.py @@ -1,6 +1,6 @@ import pandas -from .conftest import EXAMPLE_DIR_CSV_WO_PARAMS, EXAMPLE_DIR_CSV_WITH_PARAMS +from .conftest import EXAMPLE_DIR_CSV_WITH_PARAMS, EXAMPLE_DIR_CSV_WO_PARAMS def test_search_params_file_ok(example_dir): @@ -32,8 +32,8 @@ def test_ssearch_measurement_params_file_parameters_file(tmpdir): def test_parse_channel_info(example_dir): from sensospot_parser.parameters import ( - _search_params_file, _parse_measurement_params, + _search_params_file, ) params = _search_params_file(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS) diff --git a/tests/test_sensospot_data.py b/tests/test_sensospot_data.py index c95177d..55df728 100644 --- a/tests/test_sensospot_data.py +++ b/tests/test_sensospot_data.py @@ -5,16 +5,17 @@ from .conftest import EXAMPLE_DIR_CSV_WO_PARAMS, EXAMPLE_DIR_XML_WO_PARAMS def test_import_api(): - from sensospot_parser import main # noqa: F401 - from sensospot_parser import columns # noqa: F401 - from sensospot_parser import parse_folder # noqa: F401 - from sensospot_parser import parse_csv_folder # noqa: F401 - from sensospot_parser import parse_xml_folder # noqa: F401 + from sensospot_parser import ( + columns, # noqa: F401 + main, # noqa: F401 + parse_csv_folder, # noqa: F401 + parse_folder, # noqa: F401 + parse_xml_folder, # noqa: F401 + ) def test_compare_xml_to_csv(example_dir): import pandas - from sensospot_parser import parse_csv_folder, parse_xml_folder folder = example_dir / EXAMPLE_DIR_XML_WO_PARAMS @@ -33,7 +34,7 @@ def test_compare_xml_to_csv(example_dir): @pytest.mark.parametrize( - "folder, length, hasnans", + ("folder", "length", "hasnans"), [ (EXAMPLE_DIR_XML_WO_PARAMS, 6400, False), (EXAMPLE_DIR_CSV_WO_PARAMS, 28800, True), @@ -41,7 +42,6 @@ def test_compare_xml_to_csv(example_dir): ) def test_parse_folder_switches_parser(example_dir, folder, length, hasnans): import pandas - from sensospot_parser import parse_folder result = parse_folder(example_dir / folder) diff --git a/tests/test_xml_parser.py b/tests/test_xml_parser.py index a485ddf..d0ed449 100644 --- a/tests/test_xml_parser.py +++ b/tests/test_xml_parser.py @@ -2,7 +2,7 @@ from datetime import datetime import pytest -from .conftest import EXAMPLE_DIR_XML_WO_PARAMS, EXAMPLE_DIR_XML_WITH_PARAMS +from .conftest import EXAMPLE_DIR_XML_WITH_PARAMS, EXAMPLE_DIR_XML_WO_PARAMS class DummyDataFunc: @@ -28,7 +28,7 @@ def test_parser_target_init(): @pytest.mark.parametrize( - "tag, attributes, expected", + ("tag", "attributes", "expected"), [ ("UnknownTag", {"ID": "something"}, {}), ( @@ -84,7 +84,7 @@ def test_parser_target_start_image_file_name(): @pytest.mark.parametrize( - "data_type, value, expected", + ("data_type", "value", "expected"), [ ("unknown type", 1, "1"), ("System.Int32", "12", 12), @@ -108,16 +108,40 @@ def test_parser_target_result_attributes_parser(data_type, value, expected): @pytest.mark.parametrize( - "value, expected", + ("value", "expected"), [ - ("3/7/2022 5:31:47 PM", datetime(2022, 3, 7, 17, 31, 47)), - ("03/7/2022 5:31:47 PM", datetime(2022, 3, 7, 17, 31, 47)), - ("3/07/2022 5:31:47 PM", datetime(2022, 3, 7, 17, 31, 47)), - ("03/07/2022 5:31:47 PM", datetime(2022, 3, 7, 17, 31, 47)), - ("3/7/2022 5:3:47 PM", datetime(2022, 3, 7, 17, 3, 47)), - ("3/7/2022 5:31:4 PM", datetime(2022, 3, 7, 17, 31, 4)), - ("3/7/2022 5:31:47 pm", datetime(2022, 3, 7, 17, 31, 47)), - ("3/7/2022 5:31:47 AM", datetime(2022, 3, 7, 5, 31, 47)), + ( + "3/7/2022 5:31:47 PM", + datetime(2022, 3, 7, 17, 31, 47), # noqa: DTZ001 + ), + ( + "03/7/2022 5:31:47 PM", + datetime(2022, 3, 7, 17, 31, 47), # noqa: DTZ001 + ), + ( + "3/07/2022 5:31:47 PM", + datetime(2022, 3, 7, 17, 31, 47), # noqa: DTZ001 + ), + ( + "03/07/2022 5:31:47 PM", + datetime(2022, 3, 7, 17, 31, 47), # noqa: DTZ001 + ), + ( + "3/7/2022 5:3:47 PM", + datetime(2022, 3, 7, 17, 3, 47), # noqa: DTZ001 + ), + ( + "3/7/2022 5:31:4 PM", + datetime(2022, 3, 7, 17, 31, 4), # noqa: DTZ001 + ), + ( + "3/7/2022 5:31:47 pm", + datetime(2022, 3, 7, 17, 31, 47), # noqa: DTZ001 + ), + ( + "3/7/2022 5:31:47 AM", + datetime(2022, 3, 7, 5, 31, 47), # noqa: DTZ001 + ), ], ) def test_parser_target_data_timestamp_parser(value, expected): @@ -203,8 +227,6 @@ def test_find_result_xml_file_ok(tmp_path): xml_file = tmp_path / "result.xml" xml_file.touch() - print(list(tmp_path.iterdir())) - result = _find_result_xml_file(tmp_path) assert result == xml_file @@ -257,8 +279,6 @@ def test_find_result_hidden_xsl_file(tmp_path): xml_file = tmp_path / ".result.xml" xml_file.touch() - print(list(tmp_path.iterdir())) - result = _find_result_xml_file(tmp_path) assert result is None @@ -266,10 +286,9 @@ def test_find_result_hidden_xsl_file(tmp_path): def test_parse_xml_file_ok(example_dir): import pandas - from sensospot_parser.xml_parser import ( - parse_xml_file, _find_result_xml_file, + parse_xml_file, ) folder = example_dir / EXAMPLE_DIR_XML_WO_PARAMS @@ -288,10 +307,10 @@ def test_parse_xml_file_ok(example_dir): @pytest.mark.parametrize( - "file_name, message", + ("file_name", "message"), [ ("not_existing.xml", "Xml file does not exist"), - ("incomplete.xml", "Could not parse assay results xml file"), + ("defect.xml", "Could not parse assay results xml file"), ("malformed_data.xml", "Malformed data in xml file"), ], ) @@ -300,14 +319,14 @@ def test_parse_xml_file_raies_error(file_name, message, example_dir): xml_file = example_dir / file_name - with pytest.raises(ValueError) as e: + with pytest.raises(ValueError) as e: # noqa: PT011 parse_xml_file(xml_file) - assert message in str(e) + + assert message in str(e) def test_parse_xml_folder_with_params(example_dir): import pandas - from sensospot_parser.xml_parser import parse_xml_folder folder = example_dir / EXAMPLE_DIR_XML_WITH_PARAMS @@ -321,7 +340,6 @@ def test_parse_xml_folder_with_params(example_dir): def test_parse_xml_folder_without_params(example_dir): import pandas - from sensospot_parser.xml_parser import parse_xml_folder folder = example_dir / EXAMPLE_DIR_XML_WO_PARAMS @@ -336,6 +354,7 @@ def test_parse_xml_folder_without_params(example_dir): def test_parse_xml_folder_non_existing_xml_file(tmp_path): from sensospot_parser.xml_parser import parse_xml_folder - with pytest.raises(ValueError) as e: + with pytest.raises(ValueError) as e: # noqa: PT011 parse_xml_folder(tmp_path) - assert "Could not find assay results xml file" in str(e) + + assert "Could not find assay results xml file" in str(e)