Changes after linting with "ruff"

3 years ago · 872381382a
10 changed files with 113 additions and 92 deletions
--- a/src/sensospot_parser/init.py
+++ b/src/sensospot_parser/init.py
@ -21,7 +21,7 @@ DEFAULT_OUTPUT_FILENAME = "collected_data.csv"
 PathLike = Union[str, pathlib.Path]
-def parse_folder(source: PathLike, quiet: bool = False) -> pandas.DataFrame:
+def parse_folder(source: PathLike, *, quiet: bool = False) -> pandas.DataFrame:
    """parses an assay result folder
    The function will first try to use an assay results xml file, and will
@ -38,7 +38,7 @@ def parse_folder(source: PathLike, quiet: bool = False) -> pandas.DataFrame:
        return parse_xml_folder(source)
    except ValueError:
        pass
-    return parse_csv_folder(source, quiet)
+    return parse_csv_folder(source, quiet=quiet)
@click.command()
@ -68,7 +68,7 @@ def parse_folder(source: PathLike, quiet: bool = False) -> pandas.DataFrame:
    default=False,
    help="Ignore sanity check for csv file parsing",
 )
-def main(sources, output, quiet=False):
+def main(sources, output, quiet=False):  # noqa: FBT002
    """Parses the measurement results of the Sensospot reader
    The resulting output is either echoed to stdout or saved to a file.
@ -77,7 +77,7 @@ def main(sources, output, quiet=False):
    I this doesn't work, the fallback is to parse the csv files.
    """
    paths = (pathlib.Path(source) for source in sources)
-    collection = (parse_folder(source, quiet) for source in paths)
+    collection = (parse_folder(source, quiet=quiet) for source in paths)
    result = (
        pandas.concat(collection, ignore_index=True)
        .reset_index()
--- a/src/sensospot_parser/csv_parser.py
+++ b/src/sensospot_parser/csv_parser.py
@ -3,10 +3,10 @@
 Parsing the csv result files from Sensovations Sensospot image analysis.
 """
 import re
 import pathlib
-from typing import Union, TextIO, Optional, Sequence
+import re
 from collections import namedtuple
 from typing import Optional, Sequence, TextIO, Union
 import pandas
@ -74,10 +74,11 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo:
        named tuple FileInfo with parsed metadata
    """
    data_path = pathlib.Path(data_file)
-    *rest, well, exposure = data_path.stem.rsplit("_", 2)  # noqa: F841
+    *rest, well, exposure = data_path.stem.rsplit("_", 2)
    matched = REGEX_WELL.match(well)
    if matched is None:
-        raise ValueError(f"not a valid well: '{well}'")
+        msg = f"not a valid well: '{well}'"
        raise ValueError(msg)
    row = matched["row"].upper()
    column = int(matched["column"])
    exposure = int(exposure)
@ -143,7 +144,8 @@ def parse_multiple_csv_files(
        pandas data frame with all parsed data combined
    """
    if not file_list:
-        raise ValueError("Empty file list provided")
+        msg = "Empty file list provided"
        raise ValueError(msg)
    collection = (_parse_csv_file_silenced(path) for path in file_list)
    filtered = (frame for frame in collection if frame is not None)
    data_frame = pandas.concat(filtered, ignore_index=True).reset_index()
@ -186,9 +188,8 @@ def _sanity_check(data_frame: pandas.DataFrame) -> pandas.DataFrame:
    spot_positions = len(data_frame[columns.POS_ID].unique())
    expected_rows = field_rows * field_cols * exposures * spot_positions
    if expected_rows != len(data_frame):
-        raise ValueError(
+        msg = f"Measurements are missing: {expected_rows} != {len(data_frame)}"
-            f"Measurements are missing: {expected_rows} != {len(data_frame)}"
+        raise ValueError(msg)
        )
    # set the right data type for measurement columns
    for raw_column in columns.NUMERIC_COLUMNS:
        data_frame[raw_column] = pandas.to_numeric(data_frame[raw_column])
@ -196,7 +197,7 @@ def _sanity_check(data_frame: pandas.DataFrame) -> pandas.DataFrame:
 def parse_csv_folder(
-    folder: PathLike, quiet: bool = False
+    folder: PathLike, *, quiet: bool = False
 ) -> pandas.DataFrame:
    """parses all csv files in a folder to one large dataframe
@ -214,8 +215,9 @@ def parse_csv_folder(
    file_list = find_csv_files(folder_path)
    try:
        data_frame = parse_multiple_csv_files(file_list)
-    except ValueError:
+    except ValueError as e:
-        raise ValueError(f"No sensospot data found in folder '{folder}'")
+        msg = f"No sensospot data found in folder '{folder}'"
        raise ValueError(msg) from e
    data_frame = add_measurement_parameters(data_frame, folder_path)
--- a/src/sensospot_parser/parameters.py
+++ b/src/sensospot_parser/parameters.py
@ -4,8 +4,8 @@ Parsing the numerical output from Sensovations Sensospot image analysis.
 """
 import pathlib
-from typing import Any, Dict, Union, Optional
+from typing import Any, Dict, Optional, Union
-from xml.etree.ElementTree import Element as ElementType  # noqa: S405
+from xml.etree.ElementTree import Element as ElementType
 import numpy
 import pandas
@ -30,10 +30,7 @@ def _search_params_file(folder: PathLike) -> Optional[pathlib.Path]:
    if not params_folder.is_dir():
        return None
    param_files = list(params_folder.glob("**/*.svexp"))
-    if len(param_files) == 1:
+    return param_files[0] if len(param_files) == 1 else None
        return param_files[0]
    else:
        return None
 def _get_channel_data(channel_node: ElementType) -> Dict[str, Any]:
@ -45,9 +42,9 @@ def _get_channel_data(channel_node: ElementType) -> Dict[str, Any]:
    Returns:
        dict with the information
    """
-    # child.tag == "ChannelConfig1"
+    # Example "ChannelConfig1"
    exposure_id = int(channel_node.tag[-1])
-    # channel_description == "[Cy3|Cy5] Green"
+    # Example "Cy3 Green"
    description = channel_node.attrib["Description"]
    exposure_channel = description.rsplit(" ", 1)[-1]
    # floats can be used for exposure times, not only ints
--- a/src/sensospot_parser/xml_parser.py
+++ b/src/sensospot_parser/xml_parser.py
@ -4,8 +4,8 @@ Parsing the csv result files from Sensovations Sensospot image analysis.
 """
 import pathlib
 from typing import Union, Optional
 from datetime import datetime
 from typing import Optional, Union
 import pandas
 from defusedxml import ElementTree
@ -76,7 +76,9 @@ class ParserTarget:
    def _data_timestamp_parser(self, data: str) -> None:
        """parses the data section of a "Timestamp" tag"""
-        timestamp = datetime.strptime(data.strip(), DATETIME_XML_FORMAT)
+        timestamp = datetime.strptime(  # noqa: DTZ007
            data.strip(), DATETIME_XML_FORMAT
        )
        self._current[columns.ANALYSIS_DATETIME] = timestamp
    def _data_image_name_parser(self, data: str) -> None:
@ -108,7 +110,6 @@ class ParserTarget:
    def closed(self) -> None:
        """the end of the xml file is reached"""
        pass
 def _find_result_xml_file(folder: PathLike) -> Optional[pathlib.Path]:
@ -154,7 +155,8 @@ def parse_xml_file(xml_file: PathLike) -> pandas.DataFrame:
    """
    xml_file = pathlib.Path(xml_file)
    if not xml_file.is_file():
-        raise ValueError("Xml file does not exist")
+        msg = "Xml file does not exist"
        raise ValueError(msg)
    target = ParserTarget()
    parser = ElementTree.DefusedXMLParser(target=target)
@ -162,11 +164,13 @@ def parse_xml_file(xml_file: PathLike) -> pandas.DataFrame:
    try:
        parser.feed(xml_file.read_text())
    except (IndexError, KeyError, ValueError, TypeError) as e:
-        raise ValueError("Malformed data in xml file") from e
+        msg = "Malformed data in xml file"
        raise ValueError(msg) from e
    data_frame = pandas.DataFrame(data=target.collected).reset_index()
    if data_frame.empty:
-        raise ValueError("Could not parse assay results xml file")
+        msg = "Could not parse assay results xml file"
        raise ValueError(msg)
    return columns._cleanup_data_columns(data_frame)
@ -186,7 +190,8 @@ def parse_xml_folder(folder: PathLike) -> pandas.DataFrame:
    folder = pathlib.Path(folder)
    xml_file = _find_result_xml_file(folder)
    if xml_file is None:
-        raise ValueError("Could not find assay results xml file")
+        msg = "Could not find assay results xml file"
        raise ValueError(msg)
    data_frame = parse_xml_file(xml_file)
    data_frame = parameters.add_measurement_parameters(data_frame, folder)
    return columns._cleanup_data_columns(data_frame)
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -14,23 +14,23 @@ EXAMPLE_DIR_XML_WITH_PARAMS = "xml_with_parameters"
@pytest.fixture(scope="session")
 def example_dir(request):
    root_dir = Path(request.config.rootdir)
-    yield root_dir / "example_data"
+    return root_dir / "example_data"
-@pytest.fixture
+@pytest.fixture()
 def example_file(example_dir):
    data_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS
-    yield data_dir / "160218_SG2-013-001_Regen1_Cy3-100_1_A1_1.csv"
+    return data_dir / "160218_SG2-013-001_Regen1_Cy3-100_1_A1_1.csv"
-@pytest.fixture
+@pytest.fixture()
 def exposure_df():
    from pandas import DataFrame
-    yield DataFrame(data={"Exposure.Id": [1, 2, 3]})
+    return DataFrame(data={"Exposure.Id": [1, 2, 3]})
-@pytest.fixture
+@pytest.fixture()
 def normalization_data_frame():
    from sensospot_parser.columns import RAW_DATA_NORMALIZATION_MAP
@ -86,10 +86,10 @@ def normalization_data_frame():
    data_frame = pandas.DataFrame(overflow_test_data)
    data_frame["Exposure.Channel"] = "Cy5"
-    for value_column in RAW_DATA_NORMALIZATION_MAP.keys():
+    for value_column in RAW_DATA_NORMALIZATION_MAP:
        data_frame[value_column] = data_frame["Value"]
-    yield data_frame
+    return data_frame
@pytest.fixture(scope="session")
@ -106,11 +106,11 @@ def parsed_data_frame_without_params(example_dir):
    return parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WO_PARAMS)
-@pytest.fixture
+@pytest.fixture()
 def data_frame_with_params(parsed_data_frame_with_params):
    return parsed_data_frame_with_params.copy()
-@pytest.fixture
+@pytest.fixture()
 def data_frame_without_params(parsed_data_frame_without_params):
    return parsed_data_frame_without_params.copy()
--- a/tests/test_columns.py
+++ b/tests/test_columns.py
@ -1,6 +1,5 @@
 def test_cleanup_data_columns():
    from pandas import DataFrame
    from sensospot_parser.columns import _cleanup_data_columns
    columns = ["Rect.", "Contour", " ID ", "Found", "Dia."]
--- a/tests/test_csv_parser.py
+++ b/tests/test_csv_parser.py
@ -4,11 +4,11 @@
 import numpy
 import pytest
-from .conftest import EXAMPLE_DIR_CSV_WO_PARAMS, EXAMPLE_DIR_CSV_WITH_PARAMS
+from .conftest import EXAMPLE_DIR_CSV_WITH_PARAMS, EXAMPLE_DIR_CSV_WO_PARAMS
@pytest.mark.parametrize(
-    "sub_dir, file_name",
+    ("sub_dir", "file_name"),
    [
        (
            EXAMPLE_DIR_CSV_WO_PARAMS,
@ -65,14 +65,15 @@ def test_parse_csv_no_array(example_dir):
@pytest.mark.parametrize(
-    "input, expected", [("", "."), ("..,", "."), (".,,", ","), ("..,,", ".")]
+    ("provided", "expected"),
    [("", "."), ("..,", "."), (".,,", ","), ("..,,", ".")],
 )
-def test_guess_decimal_separator_returns_correct_separator(input, expected):
+def test_guess_decimal_separator_returns_correct_separator(provided, expected):
    from io import StringIO
    from sensospot_parser.csv_parser import _guess_decimal_separator
-    handle = StringIO(f"header\n{input}\n")
+    handle = StringIO(f"header\n{provided}\n")
    result = _guess_decimal_separator(handle)
    assert result == expected
@ -98,17 +99,17 @@ def test_well_regex_ok():
    assert result["column"] == "123"
-@pytest.mark.parametrize("input", ["", "A", "1", "1A", "-1", "A-"])
+@pytest.mark.parametrize("provided", ["", "A", "1", "1A", "-1", "A-"])
-def test_well_regex_no_match(input):
+def test_well_regex_no_match(provided):
    from sensospot_parser.csv_parser import REGEX_WELL
-    result = REGEX_WELL.match(input)
+    result = REGEX_WELL.match(provided)
    assert result is None
@pytest.mark.parametrize(
-    "filename, expected",
+    ("filename", "expected"),
    [("A1_1.csv", ("A", 1, 1)), ("test/measurement_1_H12_2", ("H", 12, 2))],
 )
 def test_extract_measurement_info_ok(filename, expected):
@ -123,7 +124,7 @@ def test_extract_measurement_info_ok(filename, expected):
 def test_extract_measurement_info_raises_error(filename):
    from sensospot_parser.csv_parser import _extract_measurement_info
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError):  # noqa: PT011
        _extract_measurement_info(filename)
@ -178,7 +179,7 @@ def test_parse_file_raises_error(example_dir):
        / "should_raise_value_error.csv"
    )
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError):  # noqa: PT011
        parse_csv_file(csv_file)
@ -223,7 +224,6 @@ def testparse_multiple_files_ok(example_dir, file_list):
    files = [sub_dir / file for file in file_list]
    data_frame = parse_multiple_csv_files(files)
    print(data_frame["Exposure.Id"].unique())
    assert len(data_frame) == 100 * len(files)
    assert len(data_frame["Exposure.Id"].unique()) == len(files)
@ -232,7 +232,7 @@ def testparse_multiple_files_ok(example_dir, file_list):
 def testparse_multiple_files_empty_file_list():
    from sensospot_parser.csv_parser import parse_multiple_csv_files
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError):  # noqa: PT011
        parse_multiple_csv_files([])
@ -242,7 +242,6 @@ def testparse_multiple_files_empty_array(example_dir):
    files = [example_dir / "no_array_A1_1.csv"]
    data_frame = parse_multiple_csv_files(files)
    print(data_frame["Exposure.Id"].unique())
    assert len(data_frame) == 1
@ -306,5 +305,5 @@ def test_sanity_check_raises_value_error(example_dir):
    data_frame = parse_multiple_csv_files(files)
    data_frame = data_frame.drop(data_frame.index[1])
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError):  # noqa: PT011
        _sanity_check(data_frame)
--- a/tests/test_parameters.py
+++ b/tests/test_parameters.py
@ -1,6 +1,6 @@
 import pandas
-from .conftest import EXAMPLE_DIR_CSV_WO_PARAMS, EXAMPLE_DIR_CSV_WITH_PARAMS
+from .conftest import EXAMPLE_DIR_CSV_WITH_PARAMS, EXAMPLE_DIR_CSV_WO_PARAMS
 def test_search_params_file_ok(example_dir):
@ -32,8 +32,8 @@ def test_ssearch_measurement_params_file_parameters_file(tmpdir):
 def test_parse_channel_info(example_dir):
    from sensospot_parser.parameters import (
        _search_params_file,
        _parse_measurement_params,
        _search_params_file,
    )
    params = _search_params_file(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS)
--- a/tests/test_sensospot_data.py
+++ b/tests/test_sensospot_data.py
@ -5,16 +5,17 @@ from .conftest import EXAMPLE_DIR_CSV_WO_PARAMS, EXAMPLE_DIR_XML_WO_PARAMS
 def test_import_api():
-    from sensospot_parser import main  # noqa: F401
+    from sensospot_parser import (
-    from sensospot_parser import columns  # noqa: F401
+        columns,  # noqa: F401
-    from sensospot_parser import parse_folder  # noqa: F401
+        main,  # noqa: F401
-    from sensospot_parser import parse_csv_folder  # noqa: F401
+        parse_csv_folder,  # noqa: F401
-    from sensospot_parser import parse_xml_folder  # noqa: F401
+        parse_folder,  # noqa: F401
        parse_xml_folder,  # noqa: F401
    )
 def test_compare_xml_to_csv(example_dir):
    import pandas
    from sensospot_parser import parse_csv_folder, parse_xml_folder
    folder = example_dir / EXAMPLE_DIR_XML_WO_PARAMS
@ -33,7 +34,7 @@ def test_compare_xml_to_csv(example_dir):
@pytest.mark.parametrize(
-    "folder, length, hasnans",
+    ("folder", "length", "hasnans"),
    [
        (EXAMPLE_DIR_XML_WO_PARAMS, 6400, False),
        (EXAMPLE_DIR_CSV_WO_PARAMS, 28800, True),
@ -41,7 +42,6 @@ def test_compare_xml_to_csv(example_dir):
 )
 def test_parse_folder_switches_parser(example_dir, folder, length, hasnans):
    import pandas
    from sensospot_parser import parse_folder
    result = parse_folder(example_dir / folder)
--- a/tests/test_xml_parser.py
+++ b/tests/test_xml_parser.py
@ -2,7 +2,7 @@ from datetime import datetime
 import pytest
-from .conftest import EXAMPLE_DIR_XML_WO_PARAMS, EXAMPLE_DIR_XML_WITH_PARAMS
+from .conftest import EXAMPLE_DIR_XML_WITH_PARAMS, EXAMPLE_DIR_XML_WO_PARAMS
 class DummyDataFunc:
@ -28,7 +28,7 @@ def test_parser_target_init():
@pytest.mark.parametrize(
-    "tag, attributes, expected",
+    ("tag", "attributes", "expected"),
    [
        ("UnknownTag", {"ID": "something"}, {}),
        (
@ -84,7 +84,7 @@ def test_parser_target_start_image_file_name():
@pytest.mark.parametrize(
-    "data_type, value, expected",
+    ("data_type", "value", "expected"),
    [
        ("unknown type", 1, "1"),
        ("System.Int32", "12", 12),
@ -108,16 +108,40 @@ def test_parser_target_result_attributes_parser(data_type, value, expected):
@pytest.mark.parametrize(
-    "value, expected",
+    ("value", "expected"),
    [
-        ("3/7/2022 5:31:47 PM", datetime(2022, 3, 7, 17, 31, 47)),
+        (
-        ("03/7/2022 5:31:47 PM", datetime(2022, 3, 7, 17, 31, 47)),
+            "3/7/2022 5:31:47 PM",
-        ("3/07/2022 5:31:47 PM", datetime(2022, 3, 7, 17, 31, 47)),
+            datetime(2022, 3, 7, 17, 31, 47),  # noqa: DTZ001
-        ("03/07/2022 5:31:47 PM", datetime(2022, 3, 7, 17, 31, 47)),
+        ),
-        ("3/7/2022 5:3:47 PM", datetime(2022, 3, 7, 17, 3, 47)),
+        (
-        ("3/7/2022 5:31:4 PM", datetime(2022, 3, 7, 17, 31, 4)),
+            "03/7/2022 5:31:47 PM",
-        ("3/7/2022 5:31:47 pm", datetime(2022, 3, 7, 17, 31, 47)),
+            datetime(2022, 3, 7, 17, 31, 47),  # noqa: DTZ001
-        ("3/7/2022 5:31:47 AM", datetime(2022, 3, 7, 5, 31, 47)),
+        ),
        (
            "3/07/2022 5:31:47 PM",
            datetime(2022, 3, 7, 17, 31, 47),  # noqa: DTZ001
        ),
        (
            "03/07/2022 5:31:47 PM",
            datetime(2022, 3, 7, 17, 31, 47),  # noqa: DTZ001
        ),
        (
            "3/7/2022 5:3:47 PM",
            datetime(2022, 3, 7, 17, 3, 47),  # noqa: DTZ001
        ),
        (
            "3/7/2022 5:31:4 PM",
            datetime(2022, 3, 7, 17, 31, 4),  # noqa: DTZ001
        ),
        (
            "3/7/2022 5:31:47 pm",
            datetime(2022, 3, 7, 17, 31, 47),  # noqa: DTZ001
        ),
        (
            "3/7/2022 5:31:47 AM",
            datetime(2022, 3, 7, 5, 31, 47),  # noqa: DTZ001
        ),
    ],
 )
 def test_parser_target_data_timestamp_parser(value, expected):
@ -203,8 +227,6 @@ def test_find_result_xml_file_ok(tmp_path):
    xml_file = tmp_path / "result.xml"
    xml_file.touch()
    print(list(tmp_path.iterdir()))
    result = _find_result_xml_file(tmp_path)
    assert result == xml_file
@ -257,8 +279,6 @@ def test_find_result_hidden_xsl_file(tmp_path):
    xml_file = tmp_path / ".result.xml"
    xml_file.touch()
    print(list(tmp_path.iterdir()))
    result = _find_result_xml_file(tmp_path)
    assert result is None
@ -266,10 +286,9 @@ def test_find_result_hidden_xsl_file(tmp_path):
 def test_parse_xml_file_ok(example_dir):
    import pandas
    from sensospot_parser.xml_parser import (
        parse_xml_file,
        _find_result_xml_file,
        parse_xml_file,
    )
    folder = example_dir / EXAMPLE_DIR_XML_WO_PARAMS
@ -288,10 +307,10 @@ def test_parse_xml_file_ok(example_dir):
@pytest.mark.parametrize(
-    "file_name, message",
+    ("file_name", "message"),
    [
        ("not_existing.xml", "Xml file does not exist"),
-        ("incomplete.xml", "Could not parse assay results xml file"),
+        ("defect.xml", "Could not parse assay results xml file"),
        ("malformed_data.xml", "Malformed data in xml file"),
    ],
 )
@ -300,14 +319,14 @@ def test_parse_xml_file_raies_error(file_name, message, example_dir):
    xml_file = example_dir / file_name
-    with pytest.raises(ValueError) as e:
+    with pytest.raises(ValueError) as e:  # noqa: PT011
        parse_xml_file(xml_file)
-        assert message in str(e)
+
    assert message in str(e)
 def test_parse_xml_folder_with_params(example_dir):
    import pandas
    from sensospot_parser.xml_parser import parse_xml_folder
    folder = example_dir / EXAMPLE_DIR_XML_WITH_PARAMS
@ -321,7 +340,6 @@ def test_parse_xml_folder_with_params(example_dir):
 def test_parse_xml_folder_without_params(example_dir):
    import pandas
    from sensospot_parser.xml_parser import parse_xml_folder
    folder = example_dir / EXAMPLE_DIR_XML_WO_PARAMS
@ -336,6 +354,7 @@ def test_parse_xml_folder_without_params(example_dir):
 def test_parse_xml_folder_non_existing_xml_file(tmp_path):
    from sensospot_parser.xml_parser import parse_xml_folder
-    with pytest.raises(ValueError) as e:
+    with pytest.raises(ValueError) as e:  # noqa: PT011
        parse_xml_folder(tmp_path)
-        assert "Could not find assay results xml file" in str(e)
+
    assert "Could not find assay results xml file" in str(e)