From 6760e373cee3e84d668ea8cfca124181711fe863 Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Mon, 23 Mar 2020 18:39:45 +0100 Subject: [PATCH] moved superx parsing in submodule --- superx_budget/__init__.py | 85 ------------------- superx_budget/superx.py | 85 +++++++++++++++++++ tests/conftest.py | 10 +++ ...superx_budget.py => test_superx_parser.py} | 61 ++++++------- 4 files changed, 121 insertions(+), 120 deletions(-) create mode 100644 superx_budget/superx.py create mode 100644 tests/conftest.py rename tests/{test_superx_budget.py => test_superx_parser.py} (70%) diff --git a/superx_budget/__init__.py b/superx_budget/__init__.py index b7def04..76d8d7c 100644 --- a/superx_budget/__init__.py +++ b/superx_budget/__init__.py @@ -4,88 +4,3 @@ Creating a budget overview from a SuperX export """ __version__ = "0.0.1" - -from datetime import datetime -from collections import namedtuple - -EXPECTED_HEADLINE = "Verwendungsnachweis und Kassenstand SAP" -EXPECTED_METADATA_KEYS = {"Haushaltsjahr", "Stand", "Gruppierung"} -EXPECTED_EXPORT_GROUPING = "automatisch" -EXPECTED_DATA_TABLE_HEADER = "Kostenstelle" - - -SuperXResult = namedtuple( - "SuperXResult", ["account_year", "export_date", "data"] -) -SuperXData = namedtuple( - "SuperXData", - [ - "cost_center", - "fonds", - "project", - "kind", - "budget_year", - "obligo", - "expenses", - "revenue_actual", - "revenue_target", - "acutal_value", - ], -) - - -class SuperXError(ValueError): - pass - - -def _check_export_headline(row): - """ checks the first line of the excel data if it's what we'd expect """ - headline = row[0].value - if headline != EXPECTED_HEADLINE: - raise SuperXError(f"unexpected headline: '{headline}'") - - -def _get_export_metadata(row): - """ extracts the metadata from the second row of the excel sheet """ - data = row[0].value - entries = data.split(";") - parts = [entry.split(":", 1) for entry in entries] - metadata = {key.strip(): value.strip() for key, value in parts} - if EXPECTED_METADATA_KEYS - set(metadata.keys()): - raise SuperXError(f"unexpected metadata: '{data}'") - if metadata["Gruppierung"] != EXPECTED_EXPORT_GROUPING: - raise SuperXError(f"unexpected grouping: {metadata['Gruppierung']}") - return SuperXResult( - metadata["Haushaltsjahr"], - datetime.strptime(metadata["Stand"], "%d.%m.%Y"), - None, - ) - - -def _skip_export_data_until_table_header(rows): - """ skip rows until data table headers """ - for line in rows: - first_cell = line[0] - if first_cell.value == EXPECTED_DATA_TABLE_HEADER: - break - else: - raise SuperXError("could not find table header") - - -def _parse_data_table(rows): - """ parses non-empty lines of the data table """ - for line in rows: - if not line[0].value: - continue - data = [cell.value for cell in line[:10]] - yield SuperXData(*data) - - -def parse_export_data(xls_sheet): - """ parses the exported superx data """ - rows = xls_sheet.rows - _check_export_headline(next(rows)) - metadata = _get_export_metadata(next(rows)) - _skip_export_data_until_table_header(rows) - data = list(_parse_data_table(rows)) - return SuperXResult(metadata.account_year, metadata.export_date, data) diff --git a/superx_budget/superx.py b/superx_budget/superx.py new file mode 100644 index 0000000..951cfc7 --- /dev/null +++ b/superx_budget/superx.py @@ -0,0 +1,85 @@ +""" SuperX Parser """ + +from datetime import datetime +from collections import namedtuple + +EXPECTED_HEADLINE = "Verwendungsnachweis und Kassenstand SAP" +EXPECTED_METADATA_KEYS = {"Haushaltsjahr", "Stand", "Gruppierung"} +EXPECTED_EXPORT_GROUPING = "automatisch" +EXPECTED_DATA_TABLE_HEADER = "Kostenstelle" + + +SuperXResult = namedtuple( + "SuperXResult", ["account_year", "export_date", "data"] +) +SuperXData = namedtuple( + "SuperXData", + [ + "cost_center", + "fonds", + "project", + "kind", + "budget_year", + "obligo", + "expenses", + "revenue_actual", + "revenue_target", + "acutal_value", + ], +) + + +class SuperXError(ValueError): + pass + + +def _check_export_headline(row): + """ checks the first line of the excel data if it's what we'd expect """ + headline = row[0] + if headline != EXPECTED_HEADLINE: + raise SuperXError(f"unexpected headline: '{headline}'") + + +def _get_export_metadata(row): + """ extracts the metadata from the second row of the excel sheet """ + data = row[0] + entries = data.split(";") + parts = [entry.split(":", 1) for entry in entries] + metadata = {key.strip(): value.strip() for key, value in parts} + if EXPECTED_METADATA_KEYS - set(metadata.keys()): + raise SuperXError(f"unexpected metadata: '{data}'") + if metadata["Gruppierung"] != EXPECTED_EXPORT_GROUPING: + raise SuperXError(f"unexpected grouping: {metadata['Gruppierung']}") + return SuperXResult( + metadata["Haushaltsjahr"], + datetime.strptime(metadata["Stand"], "%d.%m.%Y"), + None, + ) + + +def _skip_export_data_until_table_header(rows): + """ skip rows until data table headers """ + for line in rows: + first_cell = line[0] + if first_cell == EXPECTED_DATA_TABLE_HEADER: + break + else: + raise SuperXError("could not find table header") + + +def _parse_data_table(rows): + """ parses non-empty lines of the data table """ + for line in rows: + if not line[0]: + continue + yield SuperXData(*line[:10]) + + +def parse_export_data(xls_sheet): + """ parses the exported superx data """ + rows = xls_sheet.values + _check_export_headline(next(rows)) + metadata = _get_export_metadata(next(rows)) + _skip_export_data_until_table_header(rows) + data = list(_parse_data_table(rows)) + return SuperXResult(metadata.account_year, metadata.export_date, data) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..3428f80 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,10 @@ +from pathlib import Path + +import pytest + + +@pytest.fixture +def example_file(request): + root_dir = Path(request.config.rootdir) + data_dir = root_dir / "test data" + return data_dir / "Verwendungsnachweis_und_Kassenstand_SAP_Zahlen.xlsx" diff --git a/tests/test_superx_budget.py b/tests/test_superx_parser.py similarity index 70% rename from tests/test_superx_budget.py rename to tests/test_superx_parser.py index 02f5146..ff7451a 100644 --- a/tests/test_superx_budget.py +++ b/tests/test_superx_parser.py @@ -1,12 +1,8 @@ """ Stub file for testing the project """ -from pathlib import Path -from collections import namedtuple import pytest -DummyValue = namedtuple("DummyValue", "value") - class DummySheet: def __init__(self, data): @@ -14,15 +10,7 @@ class DummySheet: @property def rows(self): - for line in self._data: - yield [DummyValue(value) for value in line] - - -@pytest.fixture -def example_file(request): - root_dir = Path(request.config.rootdir) - data_dir = root_dir / "test data" - return data_dir / "Verwendungsnachweis_und_Kassenstand_SAP_Zahlen.xlsx" + return iter(self._data) @pytest.fixture @@ -33,20 +21,20 @@ def example_workbook(example_file): def test_check_export_headline(): - from superx_budget import _check_export_headline, SuperXError + from superx_budget.superx import _check_export_headline, SuperXError - row = [DummyValue("nomatching header")] + row = ["nomatching header"] with pytest.raises(SuperXError): _check_export_headline(row) def test_get_export_metadata_ok(): - from superx_budget import _get_export_metadata + from superx_budget.superx import _get_export_metadata from datetime import datetime value = "Haushaltsjahr: XXX;Stand:31.12.2020;Gruppierung:automatisch" - row = [DummyValue(value)] + row = [value] metadata = _get_export_metadata(row) assert metadata.account_year == "XXX" @@ -65,38 +53,41 @@ def test_get_export_metadata_ok(): ], ) def test_get_export_metadata_raises_error(faulty_data): - from superx_budget import _get_export_metadata + from superx_budget.superx import _get_export_metadata - row = [DummyValue(faulty_data)] + row = [faulty_data] with pytest.raises(ValueError): # SuperXError is a subclass of ValueError _get_export_metadata(row) def test_skip_export_data_until_table_header_ok(): - from superx_budget import _skip_export_data_until_table_header + from superx_budget.superx import _skip_export_data_until_table_header rows = [ - [DummyValue("")], - [DummyValue("")], - [DummyValue("Kostenstelle")], - [DummyValue("Daten")], + [""], + [""], + ["Kostenstelle"], + ["Daten"], ] iterator = iter(rows) _skip_export_data_until_table_header(iterator) data_line = next(iterator) - assert data_line[0].value == "Daten" + assert data_line[0] == "Daten" def test_skip_export_data_until_table_header_raises_error(): - from superx_budget import _skip_export_data_until_table_header, SuperXError + from superx_budget.superx import ( + _skip_export_data_until_table_header, + SuperXError, + ) rows = [ - [DummyValue("")], - [DummyValue("")], - [DummyValue("Keine Kostenstelle")], - [DummyValue("Daten")], + [""], + [""], + ["Keine Kostenstelle"], + ["Daten"], ] iterator = iter(rows) with pytest.raises(SuperXError): @@ -104,12 +95,12 @@ def test_skip_export_data_until_table_header_raises_error(): def test_parse_data_table(): - from superx_budget import _parse_data_table + from superx_budget.superx import _parse_data_table rows = [ - [DummyValue(v) for v in "ABCDEFGHIJ"], - [DummyValue("") for i in range(10)], - [DummyValue(v) for v in "qrstuvwxyzX"], # one column more + list("ABCDEFGHIJ"), + ["" for i in range(10)], + list("qrstuvwxyzX"), # one column more ] result = _parse_data_table(rows) first_value, second_value = list(result) @@ -122,7 +113,7 @@ def test_parse_data_table(): def test_parse_export_data(example_workbook): - from superx_budget import parse_export_data + from superx_budget.superx import parse_export_data from datetime import datetime result = parse_export_data(example_workbook.active)