From 988c7562d9ad28594a9ecf5e0164f9466fba5aeb Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Tue, 3 Jan 2023 17:00:58 +0100 Subject: [PATCH] renamed some functions in "csv_parser" module to have more explicit names --- .../S QC 10x10 Cy3 100ms Cy5 150-15ms.svalg | 25 ---------- .../S QC 10x10 Cy3 100ms Cy5 150-15ms.svary | 27 ----------- .../S QC 10x10 Cy3 100ms Cy5 150-15ms.svexp | 19 -------- src/sensospot_parser/__init__.py | 4 +- src/sensospot_parser/csv_parser.py | 20 +++++--- tests/conftest.py | 8 ++-- tests/test_csv_parser.py | 46 +++++++++++-------- tests/test_sensospot_data.py | 4 +- 8 files changed, 47 insertions(+), 106 deletions(-) delete mode 100644 example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svalg delete mode 100644 example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svary delete mode 100644 example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svexp diff --git a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svalg b/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svalg deleted file mode 100644 index ffa263a..0000000 --- a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svalg +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svary b/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svary deleted file mode 100644 index fc336c3..0000000 --- a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svary +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svexp b/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svexp deleted file mode 100644 index 33be21d..0000000 --- a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svexp +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/src/sensospot_parser/__init__.py b/src/sensospot_parser/__init__.py index 8450620..74c69ab 100644 --- a/src/sensospot_parser/__init__.py +++ b/src/sensospot_parser/__init__.py @@ -12,7 +12,7 @@ import click import pandas from . import columns # noqa: F401 -from .csv_parser import parse_file, parse_folder # noqa: F401 +from .csv_parser import parse_csv_file, parse_csv_folder # noqa: F401 DEFAULT_OUTPUT_FILENAME = "collected_data.csv" @@ -51,7 +51,7 @@ def main(sources, output, quiet=False): """ paths = (pathlib.Path(source) for source in sources) - collection = (parse_folder(source, quiet) for source in paths) + collection = (parse_csv_folder(source, quiet) for source in paths) result = pandas.concat(collection, ignore_index=True).to_csv( output, sep="\t", index=False ) diff --git a/src/sensospot_parser/csv_parser.py b/src/sensospot_parser/csv_parser.py index 24b29c2..13ef0ce 100644 --- a/src/sensospot_parser/csv_parser.py +++ b/src/sensospot_parser/csv_parser.py @@ -84,7 +84,7 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo: return FileInfo(row, column, exposure) -def parse_file(data_file: PathLike) -> pandas.DataFrame: +def parse_csv_file(data_file: PathLike) -> pandas.DataFrame: """parses one data file and adds metadata to result will race a ValueError, if metadata could not be extracted @@ -113,7 +113,9 @@ def parse_file(data_file: PathLike) -> pandas.DataFrame: return columns._cleanup_data_columns(data_frame) -def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]: +def _parse_csv_file_silenced( + data_file: PathLike, +) -> Optional[pandas.DataFrame]: """parses one data file and adds metadata Safety checks are supressed @@ -125,12 +127,14 @@ def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]: pandas data frame with the parsed data or None on error """ try: - return parse_file(data_file) + return parse_csv_file(data_file) except ValueError: return None -def parse_multiple_files(file_list: Sequence[PathLike]) -> pandas.DataFrame: +def parse_multiple_csv_files( + file_list: Sequence[PathLike], +) -> pandas.DataFrame: """parses a list of file paths to one combined data frame Args: @@ -140,7 +144,7 @@ def parse_multiple_files(file_list: Sequence[PathLike]) -> pandas.DataFrame: """ if not file_list: raise ValueError("Empty file list provided") - collection = (_parse_file_silenced(path) for path in file_list) + collection = (_parse_csv_file_silenced(path) for path in file_list) filtered = (frame for frame in collection if frame is not None) data_frame = pandas.concat(filtered, ignore_index=True).reset_index() data_frame[columns.WELL_ROW] = data_frame[columns.WELL_ROW].astype( @@ -191,7 +195,9 @@ def _sanity_check(data_frame: pandas.DataFrame) -> pandas.DataFrame: return data_frame -def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame: +def parse_csv_folder( + folder: PathLike, quiet: bool = False +) -> pandas.DataFrame: """parses all csv files in a folder to one large dataframe Will raise an ValueError, if no sensospot data could be found in @@ -207,7 +213,7 @@ def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame: folder_path = pathlib.Path(folder) file_list = find_csv_files(folder_path) try: - data_frame = parse_multiple_files(file_list) + data_frame = parse_multiple_csv_files(file_list) except ValueError: raise ValueError(f"No sensospot data found in folder '{folder}'") diff --git a/tests/conftest.py b/tests/conftest.py index 8e4d43c..d176031 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -94,16 +94,16 @@ def normalization_data_frame(): @pytest.fixture(scope="session") def parsed_data_frame_with_params(example_dir): - from sensospot_parser.csv_parser import parse_folder + from sensospot_parser.csv_parser import parse_csv_folder - return parse_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS) + return parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS) @pytest.fixture(scope="session") def parsed_data_frame_without_params(example_dir): - from sensospot_parser.csv_parser import parse_folder + from sensospot_parser.csv_parser import parse_csv_folder - return parse_folder(example_dir / EXAMPLE_DIR_CSV_WO_PARAMS) + return parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WO_PARAMS) @pytest.fixture diff --git a/tests/test_csv_parser.py b/tests/test_csv_parser.py index c8678d3..371bf7f 100644 --- a/tests/test_csv_parser.py +++ b/tests/test_csv_parser.py @@ -128,9 +128,9 @@ def test_extract_measurement_info_raises_error(filename): def test_parse_file(example_file): - from sensospot_parser.csv_parser import parse_file + from sensospot_parser.csv_parser import parse_csv_file - result = parse_file(example_file) + result = parse_csv_file(example_file) columns = { "Pos.Id", @@ -170,7 +170,7 @@ def test_parse_file(example_file): def test_parse_file_raises_error(example_dir): - from sensospot_parser.csv_parser import parse_file + from sensospot_parser.csv_parser import parse_csv_file csv_file = ( example_dir @@ -179,13 +179,13 @@ def test_parse_file_raises_error(example_dir): ) with pytest.raises(ValueError): - parse_file(csv_file) + parse_csv_file(csv_file) def test_parse_file_silenced_returns_data_frame(example_file): - from sensospot_parser.csv_parser import _parse_file_silenced + from sensospot_parser.csv_parser import _parse_csv_file_silenced - result = _parse_file_silenced(example_file) + result = _parse_csv_file_silenced(example_file) assert result["Well.Row"][0] == "A" assert result["Well.Column"][0] == 1 @@ -193,7 +193,7 @@ def test_parse_file_silenced_returns_data_frame(example_file): def test_parse_file_silenced_returns_none_on_error(example_dir): - from sensospot_parser.csv_parser import _parse_file_silenced + from sensospot_parser.csv_parser import _parse_csv_file_silenced csv_file = ( example_dir @@ -201,7 +201,7 @@ def test_parse_file_silenced_returns_none_on_error(example_dir): / "should_raise_value_error.csv" ) - result = _parse_file_silenced(csv_file) + result = _parse_csv_file_silenced(csv_file) assert result is None @@ -217,12 +217,12 @@ def test_parse_file_silenced_returns_none_on_error(example_dir): ], ) def testparse_multiple_files_ok(example_dir, file_list): - from sensospot_parser.csv_parser import parse_multiple_files + from sensospot_parser.csv_parser import parse_multiple_csv_files sub_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS files = [sub_dir / file for file in file_list] - data_frame = parse_multiple_files(files) + data_frame = parse_multiple_csv_files(files) print(data_frame["Exposure.Id"].unique()) assert len(data_frame) == 100 * len(files) @@ -230,18 +230,18 @@ def testparse_multiple_files_ok(example_dir, file_list): def testparse_multiple_files_empty_file_list(): - from sensospot_parser.csv_parser import parse_multiple_files + from sensospot_parser.csv_parser import parse_multiple_csv_files with pytest.raises(ValueError): - parse_multiple_files([]) + parse_multiple_csv_files([]) def testparse_multiple_files_empty_array(example_dir): - from sensospot_parser.csv_parser import parse_multiple_files + from sensospot_parser.csv_parser import parse_multiple_csv_files files = [example_dir / "no_array_A1_1.csv"] - data_frame = parse_multiple_files(files) + data_frame = parse_multiple_csv_files(files) print(data_frame["Exposure.Id"].unique()) assert len(data_frame) == 1 @@ -258,9 +258,9 @@ def test_find_csv_files(example_dir): def test_parse_folder_no_datetime_records(example_dir): - from sensospot_parser.csv_parser import parse_folder + from sensospot_parser.csv_parser import parse_csv_folder - data_frame = parse_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS) + data_frame = parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS) assert len(data_frame) == 36 * 3 * 100 assert len(data_frame["Well.Row"].unique()) == 3 @@ -273,7 +273,10 @@ def test_parse_folder_no_datetime_records(example_dir): def test_sanity_check_ok(example_dir): - from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files + from sensospot_parser.csv_parser import ( + _sanity_check, + parse_multiple_csv_files, + ) sub_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS file_list = [ @@ -281,7 +284,7 @@ def test_sanity_check_ok(example_dir): "160218_SG2-013-001_Regen1_Cy3-100_1_A1_2.csv", ] files = [sub_dir / file for file in file_list] - data_frame = parse_multiple_files(files) + data_frame = parse_multiple_csv_files(files) result = _sanity_check(data_frame) @@ -289,7 +292,10 @@ def test_sanity_check_ok(example_dir): def test_sanity_check_raises_value_error(example_dir): - from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files + from sensospot_parser.csv_parser import ( + _sanity_check, + parse_multiple_csv_files, + ) sub_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS file_list = [ @@ -297,7 +303,7 @@ def test_sanity_check_raises_value_error(example_dir): "160218_SG2-013-001_Regen1_Cy3-100_1_A1_2.csv", ] files = [sub_dir / file for file in file_list] - data_frame = parse_multiple_files(files) + data_frame = parse_multiple_csv_files(files) data_frame = data_frame.drop(data_frame.index[1]) with pytest.raises(ValueError): diff --git a/tests/test_sensospot_data.py b/tests/test_sensospot_data.py index 549de0d..a8a2e16 100644 --- a/tests/test_sensospot_data.py +++ b/tests/test_sensospot_data.py @@ -4,5 +4,5 @@ def test_import_api(): from sensospot_parser import main # noqa: F401 from sensospot_parser import columns # noqa: F401 - from sensospot_parser import parse_file # noqa: F401 - from sensospot_parser import parse_folder # noqa: F401 + from sensospot_parser import parse_csv_file # noqa: F401 + from sensospot_parser import parse_csv_folder # noqa: F401