diff --git a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svalg b/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svalg
deleted file mode 100644
index ffa263a..0000000
--- a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svalg
+++ /dev/null
@@ -1,25 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svary b/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svary
deleted file mode 100644
index fc336c3..0000000
--- a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svary
+++ /dev/null
@@ -1,27 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svexp b/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svexp
deleted file mode 100644
index 33be21d..0000000
--- a/example_data/xml_with_parameters/Parameters/Assay/S QC 10x10 Cy3 100ms Cy5 150-15ms/S QC 10x10 Cy3 100ms Cy5 150-15ms.svexp
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/src/sensospot_parser/__init__.py b/src/sensospot_parser/__init__.py
index 8450620..74c69ab 100644
--- a/src/sensospot_parser/__init__.py
+++ b/src/sensospot_parser/__init__.py
@@ -12,7 +12,7 @@ import click
import pandas
from . import columns # noqa: F401
-from .csv_parser import parse_file, parse_folder # noqa: F401
+from .csv_parser import parse_csv_file, parse_csv_folder # noqa: F401
DEFAULT_OUTPUT_FILENAME = "collected_data.csv"
@@ -51,7 +51,7 @@ def main(sources, output, quiet=False):
"""
paths = (pathlib.Path(source) for source in sources)
- collection = (parse_folder(source, quiet) for source in paths)
+ collection = (parse_csv_folder(source, quiet) for source in paths)
result = pandas.concat(collection, ignore_index=True).to_csv(
output, sep="\t", index=False
)
diff --git a/src/sensospot_parser/csv_parser.py b/src/sensospot_parser/csv_parser.py
index 24b29c2..13ef0ce 100644
--- a/src/sensospot_parser/csv_parser.py
+++ b/src/sensospot_parser/csv_parser.py
@@ -84,7 +84,7 @@ def _extract_measurement_info(data_file: PathLike) -> FileInfo:
return FileInfo(row, column, exposure)
-def parse_file(data_file: PathLike) -> pandas.DataFrame:
+def parse_csv_file(data_file: PathLike) -> pandas.DataFrame:
"""parses one data file and adds metadata to result
will race a ValueError, if metadata could not be extracted
@@ -113,7 +113,9 @@ def parse_file(data_file: PathLike) -> pandas.DataFrame:
return columns._cleanup_data_columns(data_frame)
-def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]:
+def _parse_csv_file_silenced(
+ data_file: PathLike,
+) -> Optional[pandas.DataFrame]:
"""parses one data file and adds metadata
Safety checks are supressed
@@ -125,12 +127,14 @@ def _parse_file_silenced(data_file: PathLike) -> Optional[pandas.DataFrame]:
pandas data frame with the parsed data or None on error
"""
try:
- return parse_file(data_file)
+ return parse_csv_file(data_file)
except ValueError:
return None
-def parse_multiple_files(file_list: Sequence[PathLike]) -> pandas.DataFrame:
+def parse_multiple_csv_files(
+ file_list: Sequence[PathLike],
+) -> pandas.DataFrame:
"""parses a list of file paths to one combined data frame
Args:
@@ -140,7 +144,7 @@ def parse_multiple_files(file_list: Sequence[PathLike]) -> pandas.DataFrame:
"""
if not file_list:
raise ValueError("Empty file list provided")
- collection = (_parse_file_silenced(path) for path in file_list)
+ collection = (_parse_csv_file_silenced(path) for path in file_list)
filtered = (frame for frame in collection if frame is not None)
data_frame = pandas.concat(filtered, ignore_index=True).reset_index()
data_frame[columns.WELL_ROW] = data_frame[columns.WELL_ROW].astype(
@@ -191,7 +195,9 @@ def _sanity_check(data_frame: pandas.DataFrame) -> pandas.DataFrame:
return data_frame
-def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame:
+def parse_csv_folder(
+ folder: PathLike, quiet: bool = False
+) -> pandas.DataFrame:
"""parses all csv files in a folder to one large dataframe
Will raise an ValueError, if no sensospot data could be found in
@@ -207,7 +213,7 @@ def parse_folder(folder: PathLike, quiet: bool = False) -> pandas.DataFrame:
folder_path = pathlib.Path(folder)
file_list = find_csv_files(folder_path)
try:
- data_frame = parse_multiple_files(file_list)
+ data_frame = parse_multiple_csv_files(file_list)
except ValueError:
raise ValueError(f"No sensospot data found in folder '{folder}'")
diff --git a/tests/conftest.py b/tests/conftest.py
index 8e4d43c..d176031 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -94,16 +94,16 @@ def normalization_data_frame():
@pytest.fixture(scope="session")
def parsed_data_frame_with_params(example_dir):
- from sensospot_parser.csv_parser import parse_folder
+ from sensospot_parser.csv_parser import parse_csv_folder
- return parse_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS)
+ return parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS)
@pytest.fixture(scope="session")
def parsed_data_frame_without_params(example_dir):
- from sensospot_parser.csv_parser import parse_folder
+ from sensospot_parser.csv_parser import parse_csv_folder
- return parse_folder(example_dir / EXAMPLE_DIR_CSV_WO_PARAMS)
+ return parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WO_PARAMS)
@pytest.fixture
diff --git a/tests/test_csv_parser.py b/tests/test_csv_parser.py
index c8678d3..371bf7f 100644
--- a/tests/test_csv_parser.py
+++ b/tests/test_csv_parser.py
@@ -128,9 +128,9 @@ def test_extract_measurement_info_raises_error(filename):
def test_parse_file(example_file):
- from sensospot_parser.csv_parser import parse_file
+ from sensospot_parser.csv_parser import parse_csv_file
- result = parse_file(example_file)
+ result = parse_csv_file(example_file)
columns = {
"Pos.Id",
@@ -170,7 +170,7 @@ def test_parse_file(example_file):
def test_parse_file_raises_error(example_dir):
- from sensospot_parser.csv_parser import parse_file
+ from sensospot_parser.csv_parser import parse_csv_file
csv_file = (
example_dir
@@ -179,13 +179,13 @@ def test_parse_file_raises_error(example_dir):
)
with pytest.raises(ValueError):
- parse_file(csv_file)
+ parse_csv_file(csv_file)
def test_parse_file_silenced_returns_data_frame(example_file):
- from sensospot_parser.csv_parser import _parse_file_silenced
+ from sensospot_parser.csv_parser import _parse_csv_file_silenced
- result = _parse_file_silenced(example_file)
+ result = _parse_csv_file_silenced(example_file)
assert result["Well.Row"][0] == "A"
assert result["Well.Column"][0] == 1
@@ -193,7 +193,7 @@ def test_parse_file_silenced_returns_data_frame(example_file):
def test_parse_file_silenced_returns_none_on_error(example_dir):
- from sensospot_parser.csv_parser import _parse_file_silenced
+ from sensospot_parser.csv_parser import _parse_csv_file_silenced
csv_file = (
example_dir
@@ -201,7 +201,7 @@ def test_parse_file_silenced_returns_none_on_error(example_dir):
/ "should_raise_value_error.csv"
)
- result = _parse_file_silenced(csv_file)
+ result = _parse_csv_file_silenced(csv_file)
assert result is None
@@ -217,12 +217,12 @@ def test_parse_file_silenced_returns_none_on_error(example_dir):
],
)
def testparse_multiple_files_ok(example_dir, file_list):
- from sensospot_parser.csv_parser import parse_multiple_files
+ from sensospot_parser.csv_parser import parse_multiple_csv_files
sub_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS
files = [sub_dir / file for file in file_list]
- data_frame = parse_multiple_files(files)
+ data_frame = parse_multiple_csv_files(files)
print(data_frame["Exposure.Id"].unique())
assert len(data_frame) == 100 * len(files)
@@ -230,18 +230,18 @@ def testparse_multiple_files_ok(example_dir, file_list):
def testparse_multiple_files_empty_file_list():
- from sensospot_parser.csv_parser import parse_multiple_files
+ from sensospot_parser.csv_parser import parse_multiple_csv_files
with pytest.raises(ValueError):
- parse_multiple_files([])
+ parse_multiple_csv_files([])
def testparse_multiple_files_empty_array(example_dir):
- from sensospot_parser.csv_parser import parse_multiple_files
+ from sensospot_parser.csv_parser import parse_multiple_csv_files
files = [example_dir / "no_array_A1_1.csv"]
- data_frame = parse_multiple_files(files)
+ data_frame = parse_multiple_csv_files(files)
print(data_frame["Exposure.Id"].unique())
assert len(data_frame) == 1
@@ -258,9 +258,9 @@ def test_find_csv_files(example_dir):
def test_parse_folder_no_datetime_records(example_dir):
- from sensospot_parser.csv_parser import parse_folder
+ from sensospot_parser.csv_parser import parse_csv_folder
- data_frame = parse_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS)
+ data_frame = parse_csv_folder(example_dir / EXAMPLE_DIR_CSV_WITH_PARAMS)
assert len(data_frame) == 36 * 3 * 100
assert len(data_frame["Well.Row"].unique()) == 3
@@ -273,7 +273,10 @@ def test_parse_folder_no_datetime_records(example_dir):
def test_sanity_check_ok(example_dir):
- from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files
+ from sensospot_parser.csv_parser import (
+ _sanity_check,
+ parse_multiple_csv_files,
+ )
sub_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS
file_list = [
@@ -281,7 +284,7 @@ def test_sanity_check_ok(example_dir):
"160218_SG2-013-001_Regen1_Cy3-100_1_A1_2.csv",
]
files = [sub_dir / file for file in file_list]
- data_frame = parse_multiple_files(files)
+ data_frame = parse_multiple_csv_files(files)
result = _sanity_check(data_frame)
@@ -289,7 +292,10 @@ def test_sanity_check_ok(example_dir):
def test_sanity_check_raises_value_error(example_dir):
- from sensospot_parser.csv_parser import _sanity_check, parse_multiple_files
+ from sensospot_parser.csv_parser import (
+ _sanity_check,
+ parse_multiple_csv_files,
+ )
sub_dir = example_dir / EXAMPLE_DIR_CSV_WO_PARAMS
file_list = [
@@ -297,7 +303,7 @@ def test_sanity_check_raises_value_error(example_dir):
"160218_SG2-013-001_Regen1_Cy3-100_1_A1_2.csv",
]
files = [sub_dir / file for file in file_list]
- data_frame = parse_multiple_files(files)
+ data_frame = parse_multiple_csv_files(files)
data_frame = data_frame.drop(data_frame.index[1])
with pytest.raises(ValueError):
diff --git a/tests/test_sensospot_data.py b/tests/test_sensospot_data.py
index 549de0d..a8a2e16 100644
--- a/tests/test_sensospot_data.py
+++ b/tests/test_sensospot_data.py
@@ -4,5 +4,5 @@
def test_import_api():
from sensospot_parser import main # noqa: F401
from sensospot_parser import columns # noqa: F401
- from sensospot_parser import parse_file # noqa: F401
- from sensospot_parser import parse_folder # noqa: F401
+ from sensospot_parser import parse_csv_file # noqa: F401
+ from sensospot_parser import parse_csv_folder # noqa: F401