diff --git a/README.md b/README.md index e7a7467..57a450e 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ Options: -o, --outfile TEXT Output file name, use a dash '-' for stdout, default: 'collected_data.csv' -q, --quiet Ignore Sanity Check + -r, --recurse Recurse into folders one level down --help Show this message and exit. ``` diff --git a/pyproject.toml b/pyproject.toml index e4e8ab0..b7dd605 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dev = [ ] [tool.flit.scripts] -parse_sensospot_data = "sensospot_data:run" +sensospot_parse = "sensospot_data:main" [tool.black] line-length = 79 diff --git a/sensospot_data/__init__.py b/sensospot_data/__init__.py index 8990ed8..1591c36 100644 --- a/sensospot_data/__init__.py +++ b/sensospot_data/__init__.py @@ -10,10 +10,13 @@ import sys from pathlib import Path import click +import pandas from .parser import parse_file, parse_folder # noqa: F401 from .parameters import ExposureInfo # noqa: F401 +DEFAULT_OUTPUT_FILENAME = "collected_data.csv" + @click.command() @click.argument( @@ -29,10 +32,10 @@ from .parameters import ExposureInfo # noqa: F401 @click.option( "-o", "--outfile", - default="collected_data.csv", + default=DEFAULT_OUTPUT_FILENAME, help=( "Output file name, use a dash '-' for stdout, " - "default: 'collected_data.csv'" + f"default: '{DEFAULT_OUTPUT_FILENAME}'" ), ) @click.option( @@ -42,12 +45,48 @@ from .parameters import ExposureInfo # noqa: F401 default=False, help="Ignore Sanity Check", ) -def run(source, outfile, quiet=False): +@click.option( + "-r", + "--recurse", + is_flag=True, + default=False, + help="Recurse into folders one level down", +) +def main(source, outfile, quiet=False, recurse=False): + if recurse: + _parse_recursive(source, outfile, quiet) + else: + _parse_one_folder(source, outfile, quiet) + + +def _output(data, folder, outfile): + """output a datafarme to stdout or csv file + + data: the pandas dataframe + folder: the folder to save the file to + outfile: the name of the outfile, '-' will output to stdout + """ + if outfile.strip() == "-": + data.to_csv(sys.stdout, sep="\t") + else: + csv_file = Path(folder) / outfile + data.to_csv(csv_file, sep="\t") + + +def _parse_one_folder(source, outfile, quiet): + """parses the data of one folder""" source_path = Path(source) # read the raw data of a folder raw_data = parse_folder(source_path, quiet=quiet) - if outfile.strip() == "-": - raw_data.to_csv(sys.stdout, sep="\t") - else: - csv_file = source_path / outfile - raw_data.to_csv(csv_file, sep="\t") + _output(raw_data, source_path, outfile) + return raw_data + + +def _parse_recursive(source, outfile, quiet): + """parses all folder one level down and collects the data""" + child_outfile = DEFAULT_OUTPUT_FILENAME + source_path = Path(source) + folders = (i for i in source_path.iterdir() if i.is_dir()) + collection = (_parse_one_folder(f, child_outfile, quiet) for f in folders) + collected_data = pandas.concat(collection, ignore_index=True).reset_index() + _output(collected_data, source_path, outfile) diff --git a/tests/test_sensovation_data.py b/tests/test_sensovation_data.py index 6fd50af..dcf3891 100644 --- a/tests/test_sensovation_data.py +++ b/tests/test_sensovation_data.py @@ -3,6 +3,6 @@ def test_import_api(): from sensospot_data import ExposureInfo # noqa: F401 - from sensospot_data import run # noqa: F401 + from sensospot_data import main # noqa: F401 from sensospot_data import parse_file # noqa: F401 from sensospot_data import parse_folder # noqa: F401