Browse Source

added option to recurse one level down

xmlparsing
Holger Frey 3 years ago
parent
commit
c03cff6ad4
  1. 1
      README.md
  2. 2
      pyproject.toml
  3. 55
      sensospot_data/__init__.py
  4. 2
      tests/test_sensovation_data.py

1
README.md

@ -47,6 +47,7 @@ Options:
-o, --outfile TEXT Output file name, use a dash '-' for stdout, default: -o, --outfile TEXT Output file name, use a dash '-' for stdout, default:
'collected_data.csv' 'collected_data.csv'
-q, --quiet Ignore Sanity Check -q, --quiet Ignore Sanity Check
-r, --recurse Recurse into folders one level down
--help Show this message and exit. --help Show this message and exit.
``` ```

2
pyproject.toml

@ -48,7 +48,7 @@ dev = [
] ]
[tool.flit.scripts] [tool.flit.scripts]
parse_sensospot_data = "sensospot_data:run" sensospot_parse = "sensospot_data:main"
[tool.black] [tool.black]
line-length = 79 line-length = 79

55
sensospot_data/__init__.py

@ -10,10 +10,13 @@ import sys
from pathlib import Path from pathlib import Path
import click import click
import pandas
from .parser import parse_file, parse_folder # noqa: F401 from .parser import parse_file, parse_folder # noqa: F401
from .parameters import ExposureInfo # noqa: F401 from .parameters import ExposureInfo # noqa: F401
DEFAULT_OUTPUT_FILENAME = "collected_data.csv"
@click.command() @click.command()
@click.argument( @click.argument(
@ -29,10 +32,10 @@ from .parameters import ExposureInfo # noqa: F401
@click.option( @click.option(
"-o", "-o",
"--outfile", "--outfile",
default="collected_data.csv", default=DEFAULT_OUTPUT_FILENAME,
help=( help=(
"Output file name, use a dash '-' for stdout, " "Output file name, use a dash '-' for stdout, "
"default: 'collected_data.csv'" f"default: '{DEFAULT_OUTPUT_FILENAME}'"
), ),
) )
@click.option( @click.option(
@ -42,12 +45,48 @@ from .parameters import ExposureInfo # noqa: F401
default=False, default=False,
help="Ignore Sanity Check", help="Ignore Sanity Check",
) )
def run(source, outfile, quiet=False): @click.option(
"-r",
"--recurse",
is_flag=True,
default=False,
help="Recurse into folders one level down",
)
def main(source, outfile, quiet=False, recurse=False):
if recurse:
_parse_recursive(source, outfile, quiet)
else:
_parse_one_folder(source, outfile, quiet)
def _output(data, folder, outfile):
"""output a datafarme to stdout or csv file
data: the pandas dataframe
folder: the folder to save the file to
outfile: the name of the outfile, '-' will output to stdout
"""
if outfile.strip() == "-":
data.to_csv(sys.stdout, sep="\t")
else:
csv_file = Path(folder) / outfile
data.to_csv(csv_file, sep="\t")
def _parse_one_folder(source, outfile, quiet):
"""parses the data of one folder"""
source_path = Path(source) source_path = Path(source)
# read the raw data of a folder # read the raw data of a folder
raw_data = parse_folder(source_path, quiet=quiet) raw_data = parse_folder(source_path, quiet=quiet)
if outfile.strip() == "-": _output(raw_data, source_path, outfile)
raw_data.to_csv(sys.stdout, sep="\t") return raw_data
else:
csv_file = source_path / outfile
raw_data.to_csv(csv_file, sep="\t") def _parse_recursive(source, outfile, quiet):
"""parses all folder one level down and collects the data"""
child_outfile = DEFAULT_OUTPUT_FILENAME
source_path = Path(source)
folders = (i for i in source_path.iterdir() if i.is_dir())
collection = (_parse_one_folder(f, child_outfile, quiet) for f in folders)
collected_data = pandas.concat(collection, ignore_index=True).reset_index()
_output(collected_data, source_path, outfile)

2
tests/test_sensovation_data.py

@ -3,6 +3,6 @@
def test_import_api(): def test_import_api():
from sensospot_data import ExposureInfo # noqa: F401 from sensospot_data import ExposureInfo # noqa: F401
from sensospot_data import run # noqa: F401 from sensospot_data import main # noqa: F401
from sensospot_data import parse_file # noqa: F401 from sensospot_data import parse_file # noqa: F401
from sensospot_data import parse_folder # noqa: F401 from sensospot_data import parse_folder # noqa: F401

Loading…
Cancel
Save