Browse Source

moved superx parsing in submodule

pull/1/head
Holger Frey 5 years ago
parent
commit
6760e373ce
  1. 85
      superx_budget/__init__.py
  2. 85
      superx_budget/superx.py
  3. 10
      tests/conftest.py
  4. 61
      tests/test_superx_parser.py

85
superx_budget/__init__.py

@ -4,88 +4,3 @@ Creating a budget overview from a SuperX export @@ -4,88 +4,3 @@ Creating a budget overview from a SuperX export
"""
__version__ = "0.0.1"
from datetime import datetime
from collections import namedtuple
EXPECTED_HEADLINE = "Verwendungsnachweis und Kassenstand SAP"
EXPECTED_METADATA_KEYS = {"Haushaltsjahr", "Stand", "Gruppierung"}
EXPECTED_EXPORT_GROUPING = "automatisch"
EXPECTED_DATA_TABLE_HEADER = "Kostenstelle"
SuperXResult = namedtuple(
"SuperXResult", ["account_year", "export_date", "data"]
)
SuperXData = namedtuple(
"SuperXData",
[
"cost_center",
"fonds",
"project",
"kind",
"budget_year",
"obligo",
"expenses",
"revenue_actual",
"revenue_target",
"acutal_value",
],
)
class SuperXError(ValueError):
pass
def _check_export_headline(row):
""" checks the first line of the excel data if it's what we'd expect """
headline = row[0].value
if headline != EXPECTED_HEADLINE:
raise SuperXError(f"unexpected headline: '{headline}'")
def _get_export_metadata(row):
""" extracts the metadata from the second row of the excel sheet """
data = row[0].value
entries = data.split(";")
parts = [entry.split(":", 1) for entry in entries]
metadata = {key.strip(): value.strip() for key, value in parts}
if EXPECTED_METADATA_KEYS - set(metadata.keys()):
raise SuperXError(f"unexpected metadata: '{data}'")
if metadata["Gruppierung"] != EXPECTED_EXPORT_GROUPING:
raise SuperXError(f"unexpected grouping: {metadata['Gruppierung']}")
return SuperXResult(
metadata["Haushaltsjahr"],
datetime.strptime(metadata["Stand"], "%d.%m.%Y"),
None,
)
def _skip_export_data_until_table_header(rows):
""" skip rows until data table headers """
for line in rows:
first_cell = line[0]
if first_cell.value == EXPECTED_DATA_TABLE_HEADER:
break
else:
raise SuperXError("could not find table header")
def _parse_data_table(rows):
""" parses non-empty lines of the data table """
for line in rows:
if not line[0].value:
continue
data = [cell.value for cell in line[:10]]
yield SuperXData(*data)
def parse_export_data(xls_sheet):
""" parses the exported superx data """
rows = xls_sheet.rows
_check_export_headline(next(rows))
metadata = _get_export_metadata(next(rows))
_skip_export_data_until_table_header(rows)
data = list(_parse_data_table(rows))
return SuperXResult(metadata.account_year, metadata.export_date, data)

85
superx_budget/superx.py

@ -0,0 +1,85 @@ @@ -0,0 +1,85 @@
""" SuperX Parser """
from datetime import datetime
from collections import namedtuple
EXPECTED_HEADLINE = "Verwendungsnachweis und Kassenstand SAP"
EXPECTED_METADATA_KEYS = {"Haushaltsjahr", "Stand", "Gruppierung"}
EXPECTED_EXPORT_GROUPING = "automatisch"
EXPECTED_DATA_TABLE_HEADER = "Kostenstelle"
SuperXResult = namedtuple(
"SuperXResult", ["account_year", "export_date", "data"]
)
SuperXData = namedtuple(
"SuperXData",
[
"cost_center",
"fonds",
"project",
"kind",
"budget_year",
"obligo",
"expenses",
"revenue_actual",
"revenue_target",
"acutal_value",
],
)
class SuperXError(ValueError):
pass
def _check_export_headline(row):
""" checks the first line of the excel data if it's what we'd expect """
headline = row[0]
if headline != EXPECTED_HEADLINE:
raise SuperXError(f"unexpected headline: '{headline}'")
def _get_export_metadata(row):
""" extracts the metadata from the second row of the excel sheet """
data = row[0]
entries = data.split(";")
parts = [entry.split(":", 1) for entry in entries]
metadata = {key.strip(): value.strip() for key, value in parts}
if EXPECTED_METADATA_KEYS - set(metadata.keys()):
raise SuperXError(f"unexpected metadata: '{data}'")
if metadata["Gruppierung"] != EXPECTED_EXPORT_GROUPING:
raise SuperXError(f"unexpected grouping: {metadata['Gruppierung']}")
return SuperXResult(
metadata["Haushaltsjahr"],
datetime.strptime(metadata["Stand"], "%d.%m.%Y"),
None,
)
def _skip_export_data_until_table_header(rows):
""" skip rows until data table headers """
for line in rows:
first_cell = line[0]
if first_cell == EXPECTED_DATA_TABLE_HEADER:
break
else:
raise SuperXError("could not find table header")
def _parse_data_table(rows):
""" parses non-empty lines of the data table """
for line in rows:
if not line[0]:
continue
yield SuperXData(*line[:10])
def parse_export_data(xls_sheet):
""" parses the exported superx data """
rows = xls_sheet.values
_check_export_headline(next(rows))
metadata = _get_export_metadata(next(rows))
_skip_export_data_until_table_header(rows)
data = list(_parse_data_table(rows))
return SuperXResult(metadata.account_year, metadata.export_date, data)

10
tests/conftest.py

@ -0,0 +1,10 @@ @@ -0,0 +1,10 @@
from pathlib import Path
import pytest
@pytest.fixture
def example_file(request):
root_dir = Path(request.config.rootdir)
data_dir = root_dir / "test data"
return data_dir / "Verwendungsnachweis_und_Kassenstand_SAP_Zahlen.xlsx"

61
tests/test_superx_budget.py → tests/test_superx_parser.py

@ -1,12 +1,8 @@ @@ -1,12 +1,8 @@
""" Stub file for testing the project """
from pathlib import Path
from collections import namedtuple
import pytest
DummyValue = namedtuple("DummyValue", "value")
class DummySheet:
def __init__(self, data):
@ -14,15 +10,7 @@ class DummySheet: @@ -14,15 +10,7 @@ class DummySheet:
@property
def rows(self):
for line in self._data:
yield [DummyValue(value) for value in line]
@pytest.fixture
def example_file(request):
root_dir = Path(request.config.rootdir)
data_dir = root_dir / "test data"
return data_dir / "Verwendungsnachweis_und_Kassenstand_SAP_Zahlen.xlsx"
return iter(self._data)
@pytest.fixture
@ -33,20 +21,20 @@ def example_workbook(example_file): @@ -33,20 +21,20 @@ def example_workbook(example_file):
def test_check_export_headline():
from superx_budget import _check_export_headline, SuperXError
from superx_budget.superx import _check_export_headline, SuperXError
row = [DummyValue("nomatching header")]
row = ["nomatching header"]
with pytest.raises(SuperXError):
_check_export_headline(row)
def test_get_export_metadata_ok():
from superx_budget import _get_export_metadata
from superx_budget.superx import _get_export_metadata
from datetime import datetime
value = "Haushaltsjahr: XXX;Stand:31.12.2020;Gruppierung:automatisch"
row = [DummyValue(value)]
row = [value]
metadata = _get_export_metadata(row)
assert metadata.account_year == "XXX"
@ -65,38 +53,41 @@ def test_get_export_metadata_ok(): @@ -65,38 +53,41 @@ def test_get_export_metadata_ok():
],
)
def test_get_export_metadata_raises_error(faulty_data):
from superx_budget import _get_export_metadata
from superx_budget.superx import _get_export_metadata
row = [DummyValue(faulty_data)]
row = [faulty_data]
with pytest.raises(ValueError): # SuperXError is a subclass of ValueError
_get_export_metadata(row)
def test_skip_export_data_until_table_header_ok():
from superx_budget import _skip_export_data_until_table_header
from superx_budget.superx import _skip_export_data_until_table_header
rows = [
[DummyValue("")],
[DummyValue("")],
[DummyValue("Kostenstelle")],
[DummyValue("Daten")],
[""],
[""],
["Kostenstelle"],
["Daten"],
]
iterator = iter(rows)
_skip_export_data_until_table_header(iterator)
data_line = next(iterator)
assert data_line[0].value == "Daten"
assert data_line[0] == "Daten"
def test_skip_export_data_until_table_header_raises_error():
from superx_budget import _skip_export_data_until_table_header, SuperXError
from superx_budget.superx import (
_skip_export_data_until_table_header,
SuperXError,
)
rows = [
[DummyValue("")],
[DummyValue("")],
[DummyValue("Keine Kostenstelle")],
[DummyValue("Daten")],
[""],
[""],
["Keine Kostenstelle"],
["Daten"],
]
iterator = iter(rows)
with pytest.raises(SuperXError):
@ -104,12 +95,12 @@ def test_skip_export_data_until_table_header_raises_error(): @@ -104,12 +95,12 @@ def test_skip_export_data_until_table_header_raises_error():
def test_parse_data_table():
from superx_budget import _parse_data_table
from superx_budget.superx import _parse_data_table
rows = [
[DummyValue(v) for v in "ABCDEFGHIJ"],
[DummyValue("") for i in range(10)],
[DummyValue(v) for v in "qrstuvwxyzX"], # one column more
list("ABCDEFGHIJ"),
["" for i in range(10)],
list("qrstuvwxyzX"), # one column more
]
result = _parse_data_table(rows)
first_value, second_value = list(result)
@ -122,7 +113,7 @@ def test_parse_data_table(): @@ -122,7 +113,7 @@ def test_parse_data_table():
def test_parse_export_data(example_workbook):
from superx_budget import parse_export_data
from superx_budget.superx import parse_export_data
from datetime import datetime
result = parse_export_data(example_workbook.active)
Loading…
Cancel
Save