From fa94fbc6a551fdb52a750626752e372a8d58d1ce Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Wed, 17 Feb 2021 10:08:15 +0100 Subject: [PATCH] added silenced parsing of data files --- sensospot_data/parser.py | 17 ++++++++++++++--- tests/test_parser.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/sensospot_data/parser.py b/sensospot_data/parser.py index 75e36d2..fc1de19 100755 --- a/sensospot_data/parser.py +++ b/sensospot_data/parser.py @@ -69,8 +69,8 @@ def _cleanup_data_columns(data_frame): def parse_file(data_file): - """ parses one data file and adds metadata to result - + """parses one data file and adds metadata to result + will race a ValueError, if metadata could not be extracted """ measurement_info = _extract_measurement_info(Path(data_file)) @@ -81,11 +81,22 @@ def parse_file(data_file): return _cleanup_data_columns(data_frame) +def _silenced_parse_file(data_file): + """parses one data file and adds metadata + + returns data frame or None on ValueError + """ + try: + return parse_file(data_file) + except ValueError: + return None + + def parse_multiple_files(file_list): """ parses a list of file paths to one combined dataframe """ if not file_list: raise ValueError("Empty file list provided") - collection = (parse_file(path) for path in file_list) + collection = (_silenced_parse_file(path) for path in file_list) filtered = (frame for frame in collection if frame is not None) data_frame = next(filtered) for next_frame in filtered: diff --git a/tests/test_parser.py b/tests/test_parser.py index a568ec6..3c97fe9 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -179,6 +179,39 @@ def test_parse_file(example_file): assert result["Exposure.Id"][0] == 1 +def test_parse_file_raises_error(example_dir): + from sensospot_data.parser import parse_file + + csv_file = ( + example_dir / EXAMPLE_DIR_WITH_PARAMS / "should_raise_value_error.csv" + ) + + with pytest.raises(ValueError): + parse_file(csv_file) + + +def test_silenced_parse_file_returns_data_frame(example_file): + from sensospot_data.parser import _silenced_parse_file + + result = _silenced_parse_file(example_file) + + assert result["Well.Row"][0] == "A" + assert result["Well.Column"][0] == 1 + assert result["Exposure.Id"][0] == 1 + + +def test_silenced_parse_file_returns_none_on_error(example_dir): + from sensospot_data.parser import _silenced_parse_file + + csv_file = ( + example_dir / EXAMPLE_DIR_WITH_PARAMS / "should_raise_value_error.csv" + ) + + result = _silenced_parse_file(csv_file) + + assert result is None + + @pytest.mark.parametrize( "file_list", [ @@ -225,7 +258,7 @@ def test_list_csv_files(example_dir): result = list(list_csv_files(example_dir / EXAMPLE_DIR_WITH_PARAMS)) - assert len(result) == 36 * 3 + assert len(result) == (36 * 3) + 1 # 36 wells, 3 exposure + one error file assert all(str(item).endswith(".csv") for item in result) assert all(not item.stem.startswith(".") for item in result)