Browse Source

first superx parser version

pull/1/head
Holger Frey 5 years ago
parent
commit
db3463bff0
  1. 85
      superx_budget/__init__.py
  2. BIN
      test data/Verwendungsnachweis_und_Kassenstand_SAP_Zahlen.xlsx
  3. 153
      tests/test_superx_budget.py

85
superx_budget/__init__.py

@ -4,3 +4,88 @@ Creating a budget overview from a SuperX export @@ -4,3 +4,88 @@ Creating a budget overview from a SuperX export
"""
__version__ = "0.0.1"
from datetime import datetime
from collections import namedtuple
EXPECTED_HEADLINE = "Verwendungsnachweis und Kassenstand SAP"
EXPECTED_METADATA_KEYS = {"Haushaltsjahr", "Stand", "Gruppierung"}
EXPECTED_EXPORT_GROUPING = "automatisch"
EXPECTED_DATA_TABLE_HEADER = "Kostenstelle"
SuperXResult = namedtuple(
"SuperXResult", ["account_year", "export_date", "data"]
)
SuperXData = namedtuple(
"SuperXData",
[
"cost_center",
"fonds",
"project",
"kind",
"budget_year",
"obligo",
"expenses",
"revenue_actual",
"revenue_target",
"acutal_value",
],
)
class SuperXError(ValueError):
pass
def _check_export_headline(row):
""" checks the first line of the excel data if it's what we'd expect """
headline = row[0].value
if headline != EXPECTED_HEADLINE:
raise SuperXError(f"unexpected headline: '{headline}'")
def _get_export_metadata(row):
""" extracts the metadata from the second row of the excel sheet """
data = row[0].value
entries = data.split(";")
parts = [entry.split(":", 1) for entry in entries]
metadata = {key.strip(): value.strip() for key, value in parts}
if EXPECTED_METADATA_KEYS - set(metadata.keys()):
raise SuperXError(f"unexpected metadata: '{data}'")
if metadata["Gruppierung"] != EXPECTED_EXPORT_GROUPING:
raise SuperXError(f"unexpected grouping: {metadata['Gruppierung']}")
return SuperXResult(
metadata["Haushaltsjahr"],
datetime.strptime(metadata["Stand"], "%d.%m.%Y"),
None,
)
def _skip_export_data_until_table_header(rows):
""" skip rows until data table headers """
for line in rows:
first_cell = line[0]
if first_cell.value == EXPECTED_DATA_TABLE_HEADER:
break
else:
raise SuperXError("could not find table header")
def _parse_data_table(rows):
""" parses non-empty lines of the data table """
for line in rows:
if not line[0].value:
continue
data = [cell.value for cell in line[:10]]
yield SuperXData(*data)
def parse_export_data(xls_sheet):
""" parses the exported superx data """
rows = xls_sheet.rows
_check_export_headline(next(rows))
metadata = _get_export_metadata(next(rows))
_skip_export_data_until_table_header(rows)
data = list(_parse_data_table(rows))
return SuperXResult(metadata.account_year, metadata.export_date, data)

BIN
test data/Verwendungsnachweis_und_Kassenstand_SAP_Zahlen.xlsx

Binary file not shown.

153
tests/test_superx_budget.py

@ -1,5 +1,154 @@ @@ -1,5 +1,154 @@
""" Stub file for testing the project """
from pathlib import Path
from collections import namedtuple
def test_dummy():
assert True
import pytest
DummyValue = namedtuple("DummyValue", "value")
class DummySheet:
def __init__(self, data):
self._data = data
@property
def rows(self):
for line in self._data:
yield [DummyValue(value) for value in line]
@pytest.fixture
def example_file(request):
root_dir = Path(request.config.rootdir)
data_dir = root_dir / "test data"
return data_dir / "Verwendungsnachweis_und_Kassenstand_SAP_Zahlen.xlsx"
@pytest.fixture
def example_workbook(example_file):
import openpyxl
return openpyxl.open(example_file)
def test_check_export_headline():
from superx_budget import _check_export_headline, SuperXError
row = [DummyValue("nomatching header")]
with pytest.raises(SuperXError):
_check_export_headline(row)
def test_get_export_metadata_ok():
from superx_budget import _get_export_metadata
from datetime import datetime
value = "Haushaltsjahr: XXX;Stand:31.12.2020;Gruppierung:automatisch"
row = [DummyValue(value)]
metadata = _get_export_metadata(row)
assert metadata.account_year == "XXX"
assert metadata.export_date == datetime(2020, 12, 31)
@pytest.mark.parametrize(
"faulty_data",
[
"Haushaltsjahr:XXX;Kein Stand:31.12.2020;Gruppierung:automatisch",
"Kein Haushaltsjahr:XXX;Stand:31.12.2020;Gruppierung:automatisch",
"Kein Haushaltsjahr:XXX;Kein Stand:31.12.2020;Gruppierung:automatisch",
"Haushaltsjahr:XXX;Stand:kein Datum;Gruppierung:automatisch",
"Haushaltsjahr:XXX;Stand:31.12.2020;keine Gruppierung:automatisch",
"Haushaltsjahr:XXX;Stand:31.12.2020;Gruppierung:nicht automatisch",
],
)
def test_get_export_metadata_raises_error(faulty_data):
from superx_budget import _get_export_metadata
row = [DummyValue(faulty_data)]
with pytest.raises(ValueError): # SuperXError is a subclass of ValueError
_get_export_metadata(row)
def test_skip_export_data_until_table_header_ok():
from superx_budget import _skip_export_data_until_table_header
rows = [
[DummyValue("")],
[DummyValue("")],
[DummyValue("Kostenstelle")],
[DummyValue("Daten")],
]
iterator = iter(rows)
_skip_export_data_until_table_header(iterator)
data_line = next(iterator)
assert data_line[0].value == "Daten"
def test_skip_export_data_until_table_header_raises_error():
from superx_budget import _skip_export_data_until_table_header, SuperXError
rows = [
[DummyValue("")],
[DummyValue("")],
[DummyValue("Keine Kostenstelle")],
[DummyValue("Daten")],
]
iterator = iter(rows)
with pytest.raises(SuperXError):
_skip_export_data_until_table_header(iterator)
def test_parse_data_table():
from superx_budget import _parse_data_table
rows = [
[DummyValue(v) for v in "ABCDEFGHIJ"],
[DummyValue("") for i in range(10)],
[DummyValue(v) for v in "qrstuvwxyzX"], # one column more
]
result = _parse_data_table(rows)
first_value, second_value = list(result)
assert first_value.cost_center == "A"
assert first_value.fonds == "B"
assert first_value.acutal_value == "J"
assert second_value.cost_center == "q"
assert second_value.acutal_value == "z"
def test_parse_export_data(example_workbook):
from superx_budget import parse_export_data
from datetime import datetime
result = parse_export_data(example_workbook.active)
assert result.account_year == "2020"
assert result.export_date == datetime(2020, 3, 18)
assert len(result.data) == 212
first, last = result.data[0], result.data[-1]
assert first.cost_center == "1110200121"
assert first.fonds == "3310"
assert first.project == "1100000102"
assert first.kind == "1 - Personal"
assert first.budget_year is None
assert first.obligo == 0.01
assert first.expenses == 1000
assert first.revenue_actual == 2000
assert first.revenue_target == 3000
assert first.acutal_value == 4000
assert last.cost_center == "1110200121"
assert last.fonds == "1123"
assert last.project == "8200062807"
assert last.kind == "KASSENSTAND zum Ende des Betrachungszeitraums"
assert last.budget_year is None
assert last.obligo == 236
assert last.expenses == 1236
assert last.revenue_actual == 2236
assert last.revenue_target == 3236
assert last.acutal_value == 4236

Loading…
Cancel
Save