renamed some functions in "csv_parser" module to have more explicit names

3 years ago · 988c7562d9
8 changed files with 47 additions and 106 deletions
--- a/example_data/xml_with_parameters/Parameters/Assay/S
+++ b/example_data/xml_with_parameters/Parameters/Assay/S
@ -1,25 +0,0 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!--Algorithm configuration-->
 <Algorithm>
  <BrightnessCheck>
    <Settings Active="False" MinimumBrightnessPercent="70" MaximumBrightnessPercent="100" HistogrammPercent="95" OverrideMaximumPixelValue="0" SubSampleX="2" SubSampleY="2" />
  </BrightnessCheck>
  <Shading>
    <Settings Active="true" />
  </Shading>
  <OrientationDetection>
    <Settings Active="True" HoughCircleSensitivitySliderUsed="True" BlobMethod="Classic,HoughCircle" HoughCircleGamma="1.1" HoughCircleScale="1" HoughCircleMinDist="5" HoughCircleCannyThresh="100" HoughCircleAccumThresh="15" HoughCircleMinRadius="5" HoughCircleMaxRadius="30" HoughCircleSmoothRadius="0" HoughCircleDilationRadius="3" HoughCircleSensitivitySlider="2" RefPatternFittingMethod="ICP" BinThresholdPercent="2" JoinRadius="2" OpeningNeighborhoodSizeXY="7" BlobAreaMin="50">
    </Settings>
    <DebugSettings ImageDebugAODResult="0" ImageDebugAODBinary="0" ImageDebugAODMorphed="1" ShowResults="1" ShowSearchOrder="1" ShowDistances="1" />
  </OrientationDetection>
  <Flip>
    <Settings Direction="FLIP_NONE" />
  </Flip>
  <MicroArraySpotFinding>
    <Settings Active="True" BkgAvgAreaPix="10" HistogramPerCent="10" BinarizationThresholdOffsetPerc="7" SpotImageSubSampling="1" ApertureWidth="3" MinArea="50" MinSpotDiameterMm="0.2" SpotFindingSensitivity="10" SpotShape="CIRCLE" SelectionPreference="COMPACTEST" InsideGridRectCriteria="RECTANGLE" BlobbingActive="True" CircleDetectionActive="True" MeanBeforeEdgeDetectionActive="False" CannyThreshold="50" MinMatchQuality="0.4" CircleMatchExponent="1" />
  </MicroArraySpotFinding>
  <MicroArrayAnalysis>
    <Settings Active="True" MinBkgThresholdPerc="1" MaxSpotThresholdPerc="100" AutoAdjustAnalysisDiameter="False" MinSpotSizeMm="0.2" MaxSpotSizeMm="0.28" AutoAdjustTolerancePercentage="10">
    </Settings>
  </MicroArrayAnalysis>
 </Algorithm>
--- a/example_data/xml_with_parameters/Parameters/Assay/S
+++ b/example_data/xml_with_parameters/Parameters/Assay/S
@ -1,27 +0,0 @@
 <?xml version="1.0" encoding="utf-8"?>
 <!--Definition of the microarray and the-->
 <!--embedded reference pattern-->
 <MicroArray>
  <Layout NofSpotsX="10" NofSpotsY="10" SpotDistMmX="0.303" SpotDistMmY="0.297" />
  <ReferencePattern ReferencePointOffsetMm.X="1.6392982179205673" ReferencePointOffsetMm.Y="2.0253288169139632">
    <Features MarkerType="BRIGHT">
    </Features>
    <Tolerances ScalePercent="5" RotationPercent="5">
      <CIRCLE PositionMm="0.06" SizePercent="50" SimpleCompactnessMax="17" EccentricityMax="0.2" />
      <RECT PositionMm="0.1" SizePercent="30" SimpleCompactnessMin="13" SimpleCompactnessMax="22" EccentricityMin="0.25" EccentricityMax="0.4">
      </RECT>
    </Tolerances>
    <Shapes>
      <Shape0 ShapeType="CIRCLE" PosMm.X="0" PosMm.Y="0" DiameterMm="0.187">
      </Shape0>
      <Shape1 ShapeType="CIRCLE" PosMm.X="2.728" PosMm.Y="2.682" DiameterMm="0.184">
      </Shape1>
      <Shape2 ShapeType="CIRCLE" PosMm.X="2.701" PosMm.Y="-0.021" DiameterMm="0.187">
      </Shape2>
      <Shape3 ShapeType="CIRCLE" PosMm.X="0.024" PosMm.Y="2.7" DiameterMm="0.191">
      </Shape3>
    </Shapes>
  </ReferencePattern>
  <RelationArrayToRefPattern OffsetMm.X="0" OffsetMm.Y="0" ScalingFactor="0.99430589150056758" RotationAngleDeg="-0.13126175132547502">
  </RelationArrayToRefPattern>
 </MicroArray>
--- a/example_data/xml_with_parameters/Parameters/Assay/S
+++ b/example_data/xml_with_parameters/Parameters/Assay/S
@ -1,19 +0,0 @@
 <?xml version="1.0"?>
 <Assay>
  <Channels>
    <ChannelConfig1 IlluminationID="4" Description="Cy3/Cy5 Green" ExposureTimeMs="100" Intensity="100">
    </ChannelConfig1>
    <ChannelConfig2 IlluminationID="3" Description="Cy3/Cy5 Red" ExposureTimeMs="150" Intensity="100">
    </ChannelConfig2>
    <ChannelConfig3 IlluminationID="3" Description="Cy3/Cy5 Red" ExposureTimeMs="15" Intensity="100">
    </ChannelConfig3>
  </Channels>
  <Components MicroArray="S QC 10x10 Cy3 100ms Cy5 150-15ms" AlgoConfig="S QC 10x10 Cy3 100ms Cy5 150-15ms">
  </Components>
  <DebugSwitches DoImageProcessing="True" DoDarkImageCorrection="True" SaveResultAsCSVFile="True" SaveResultAsXmlFile="True">
  </DebugSwitches>
  <RefPattern ChannelConfig="Channel1">
  </RefPattern>
  <WorkbookAnalysis Active="True" UseSingleWorkbook="YES" TemplateWorkbookName="160212_wb10x10_Spectra_V3_Ver03.2.3.xlsx" PasteWorksheetName="Input_Data" PasteStartingCell="W300" ParameterWorksheetName="parameter" ResultWorksheetName="Net Intensity" KeepDataWorkbookOpen="NO">
  </WorkbookAnalysis>
 </Assay>
--- a/src/sensospot_parser/init.py
+++ b/src/sensospot_parser/init.py
@ -12,7 +12,7 @@ import click
 import pandas
 from . import columns  # noqa: F401
-from .csv_parser import parse_file, parse_folder  # noqa: F401
+from .csv_parser import parse_csv_file, parse_csv_folder  # noqa: F401
 DEFAULT_OUTPUT_FILENAME = "collected_data.csv"
@ -51,7 +51,7 @@ def main(sources, output, quiet=False):
    """
    paths = (pathlib.Path(source) for source in sources)
-    collection = (parse_folder(source, quiet) for source in paths)
+    collection = (parse_csv_folder(source, quiet) for source in paths)
    result = pandas.concat(collection, ignore_index=True).to_csv(
        output, sep="\t", index=False
    )
--- a/src/sensospot_parser/csv_parser.py
+++ b/src/sensospot_parser/csv_parser.py
@ -84,7 +84,7 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo:
    return FileInfo(row, column, exposure)
-def parse_file(data_file: PathLike) -> pandas.DataFrame:
+def parse_csv_file(data_file: PathLike) -> pandas.DataFrame:
    """parses one data file and adds metadata to result
    will race a ValueError, if metadata could not be extracted
@ -113,7 +113,9 @@ def parse_file(data_file: PathLike) -> pandas.DataFrame:
    return columns._cleanup_data_columns(data_frame)
-def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]:
+def _parse_csv_file_silenced(
    data_file: PathLike,
 ) -> Optional[pandas.DataFrame]:
    """parses one data file and adds metadata
    Safety checks are supressed
@ -125,12 +127,14 @@ def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]:
        pandas data frame with the parsed data or None on error
    """
    try:
-        return parse_file(data_file)
+        return parse_csv_file(data_file)
    except ValueError:
        return None
-def parse_multiple_files(file_list: Sequence[PathLike]) -> pandas.DataFrame:
+def parse_multiple_csv_files(
    file_list: Sequence[PathLike],
 ) -> pandas.DataFrame:
    """parses a list of file paths to one combined data frame
    Args:
@ -140,7 +144,7 @@ def parse_multiple_files(file_list: Sequence[PathLike]) -> pandas.DataFrame:
    """
    if not file_list:
        raise ValueError("Empty file list provided")
-    collection = (_parse_file_silenced(path) for path in file_list)
+    collection = (_parse_csv_file_silenced(path) for path in file_list)
    filtered = (frame for frame in collection if frame is not None)
    data_frame = pandas.concat(filtered, ignore_index=True).reset_index()
    data_frame[columns.WELL_ROW] = data_frame[columns.WELL_ROW].astype(
@ -191,7 +195,9 @@ def _sanity_check(data_frame: pandas.DataFrame) -> pandas.DataFrame:
    return data_frame
-def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame:
+def parse_csv_folder(
    folder: PathLike, quiet: bool = False
 ) -> pandas.DataFrame:
    """parses all csv files in a folder to one large dataframe
    Will raise an ValueError, if no sensospot data could be found in
@ -207,7 +213,7 @@ def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame:
    folder_path = pathlib.Path(folder)
    file_list = find_csv_files(folder_path)
    try:
-        data_frame = parse_multiple_files(file_list)
+        data_frame = parse_multiple_csv_files(file_list)
    except ValueError:
        raise ValueError(f"No sensospot data found in folder '{folder}'")
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -94,16 +94,16 @@ def normalization_data_frame():
@pytest.fixture(scope="session")
 def parsed_data_frame_with_params(example_dir):
-    from sensospot_parser.csv_parser import parse_folder
+    from sensospot_parser.csv_parser import parse_csv_folder
-    return parse_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS)
+    return parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS)
@pytest.fixture(scope="session")
 def parsed_data_frame_without_params(example_dir):
-    from sensospot_parser.csv_parser import parse_folder
+    from sensospot_parser.csv_parser import parse_csv_folder
-    return parse_folder(example_dir / EXAMPLE_DIR_CSV_WO_PARAMS)
+    return parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WO_PARAMS)
@pytest.fixture
--- a/tests/test_csv_parser.py
+++ b/tests/test_csv_parser.py
@ -128,9 +128,9 @@ def test_extract_measurement_info_raises_error(filename):
 def test_parse_file(example_file):
-    from sensospot_parser.csv_parser import parse_file
+    from sensospot_parser.csv_parser import parse_csv_file
-    result = parse_file(example_file)
+    result = parse_csv_file(example_file)
    columns = {
        "Pos.Id",
@ -170,7 +170,7 @@ def test_parse_file(example_file):
 def test_parse_file_raises_error(example_dir):
-    from sensospot_parser.csv_parser import parse_file
+    from sensospot_parser.csv_parser import parse_csv_file
    csv_file = (
        example_dir
@ -179,13 +179,13 @@ def test_parse_file_raises_error(example_dir):
    )
    with pytest.raises(ValueError):
-        parse_file(csv_file)
+        parse_csv_file(csv_file)
 def test_parse_file_silenced_returns_data_frame(example_file):
-    from sensospot_parser.csv_parser import _parse_file_silenced
+    from sensospot_parser.csv_parser import _parse_csv_file_silenced
-    result = _parse_file_silenced(example_file)
+    result = _parse_csv_file_silenced(example_file)
    assert result["Well.Row"][0] == "A"
    assert result["Well.Column"][0] == 1
@ -193,7 +193,7 @@ def test_parse_file_silenced_returns_data_frame(example_file):
 def test_parse_file_silenced_returns_none_on_error(example_dir):
-    from sensospot_parser.csv_parser import _parse_file_silenced
+    from sensospot_parser.csv_parser import _parse_csv_file_silenced
    csv_file = (
        example_dir
@ -201,7 +201,7 @@ def test_parse_file_silenced_returns_none_on_error(example_dir):
        / "should_raise_value_error.csv"
    )
-    result = _parse_file_silenced(csv_file)
+    result = _parse_csv_file_silenced(csv_file)
    assert result is None
@ -217,12 +217,12 @@ def test_parse_file_silenced_returns_none_on_error(example_dir):
    ],
 )
 def testparse_multiple_files_ok(example_dir, file_list):
-    from sensospot_parser.csv_parser import parse_multiple_files
+    from sensospot_parser.csv_parser import parse_multiple_csv_files
    sub_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS
    files = [sub_dir / file for file in file_list]
-    data_frame = parse_multiple_files(files)
+    data_frame = parse_multiple_csv_files(files)
    print(data_frame["Exposure.Id"].unique())
    assert len(data_frame) == 100 * len(files)
@ -230,18 +230,18 @@ def testparse_multiple_files_ok(example_dir, file_list):
 def testparse_multiple_files_empty_file_list():
-    from sensospot_parser.csv_parser import parse_multiple_files
+    from sensospot_parser.csv_parser import parse_multiple_csv_files
    with pytest.raises(ValueError):
-        parse_multiple_files([])
+        parse_multiple_csv_files([])
 def testparse_multiple_files_empty_array(example_dir):
-    from sensospot_parser.csv_parser import parse_multiple_files
+    from sensospot_parser.csv_parser import parse_multiple_csv_files
    files = [example_dir / "no_array_A1_1.csv"]
-    data_frame = parse_multiple_files(files)
+    data_frame = parse_multiple_csv_files(files)
    print(data_frame["Exposure.Id"].unique())
    assert len(data_frame) == 1
@ -258,9 +258,9 @@ def test_find_csv_files(example_dir):
 def test_parse_folder_no_datetime_records(example_dir):
-    from sensospot_parser.csv_parser import parse_folder
+    from sensospot_parser.csv_parser import parse_csv_folder
-    data_frame = parse_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS)
+    data_frame = parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS)
    assert len(data_frame) == 36 * 3 * 100
    assert len(data_frame["Well.Row"].unique()) == 3
@ -273,7 +273,10 @@ def test_parse_folder_no_datetime_records(example_dir):
 def test_sanity_check_ok(example_dir):
-    from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files
+    from sensospot_parser.csv_parser import (
        _sanity_check,
        parse_multiple_csv_files,
    )
    sub_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS
    file_list = [
@ -281,7 +284,7 @@ def test_sanity_check_ok(example_dir):
        "160218_SG2-013-001_Regen1_Cy3-100_1_A1_2.csv",
    ]
    files = [sub_dir / file for file in file_list]
-    data_frame = parse_multiple_files(files)
+    data_frame = parse_multiple_csv_files(files)
    result = _sanity_check(data_frame)
@ -289,7 +292,10 @@ def test_sanity_check_ok(example_dir):
 def test_sanity_check_raises_value_error(example_dir):
-    from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files
+    from sensospot_parser.csv_parser import (
        _sanity_check,
        parse_multiple_csv_files,
    )
    sub_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS
    file_list = [
@ -297,7 +303,7 @@ def test_sanity_check_raises_value_error(example_dir):
        "160218_SG2-013-001_Regen1_Cy3-100_1_A1_2.csv",
    ]
    files = [sub_dir / file for file in file_list]
-    data_frame = parse_multiple_files(files)
+    data_frame = parse_multiple_csv_files(files)
    data_frame = data_frame.drop(data_frame.index[1])
    with pytest.raises(ValueError):
--- a/tests/test_sensospot_data.py
+++ b/tests/test_sensospot_data.py
@ -4,5 +4,5 @@
 def test_import_api():
    from sensospot_parser import main  # noqa: F401
    from sensospot_parser import columns  # noqa: F401
-    from sensospot_parser import parse_file  # noqa: F401
+    from sensospot_parser import parse_csv_file  # noqa: F401
-    from sensospot_parser import parse_folder  # noqa: F401
+    from sensospot_parser import parse_csv_folder  # noqa: F401