Browse Source

added helper functions

pull/1/head
Holger Frey 5 years ago
parent
commit
a8bc75edf9
  1. 11
      superx_budget/budget.py
  2. 26
      superx_budget/helpers.py
  3. 7
      superx_budget/superx.py
  4. 73
      tests/test_helpers.py
  5. 2
      tests/test_superx_parser.py

11
superx_budget/budget.py

@ -2,7 +2,7 @@
from collections import namedtuple from collections import namedtuple
from .helpers import get_sheet_of_file from .helpers import get_sheet_of_file, strip_excel_value, is_empty_excel_value
from .exceptions import BudgetParserError from .exceptions import BudgetParserError
EXPECTED_TABLE_HEADERS = [ EXPECTED_TABLE_HEADERS = [
@ -44,16 +44,15 @@ def _check_table_header(xl_row):
def _skip_empty_lines(rows): def _skip_empty_lines(rows):
for xl_row in rows: for xl_row in rows:
first_cell = xl_row.data[0] first_cell = xl_row.data[0]
if first_cell is None: if is_empty_excel_value(first_cell):
continue
if isinstance(first_cell, str) and first_cell.strip() == "":
continue continue
yield xl_row yield xl_row
def _parse_data_table(rows): def _parse_data_table(rows):
for xl_row in _skip_empty_lines(rows): for xl_row in _skip_empty_lines(rows):
yield BudgetData(xl_row.row, *xl_row.data[:7]) data = [strip_excel_value(value) for value in xl_row.data[:7]]
yield BudgetData(xl_row.row, *data)
def parse_budget_data(xls_sheet): def parse_budget_data(xls_sheet):
@ -65,5 +64,5 @@ def parse_budget_data(xls_sheet):
def parse_budget_file(file_path): def parse_budget_file(file_path):
""" parses the budget file """ """ parses the budget file """
sheet = get_sheet_of_file(file_path, name=None) sheet = get_sheet_of_file(file_path, sheet=None)
return parse_budget_data(sheet) return parse_budget_data(sheet)

26
superx_budget/helpers.py

@ -3,13 +3,29 @@
import openpyxl import openpyxl
def get_sheet_of_file(excel_file, name=None): def get_sheet_of_file(excel_file, sheet=None):
""" returns a sheet from an excel FileCache """ returns a sheet from an excel FileCache
if name is set to None, the function returns the first sheet if name is set to None, the function returns the first sheet
""" """
workbook = openpyxl.open(excel_file) workbook = openpyxl.open(excel_file)
if name is None: if sheet is None:
sheets = workbook.sheetnames all_sheets = workbook.sheetnames
name = sheets[0] sheet = all_sheets[0]
return workbook[name] return workbook[sheet]
def is_empty_excel_value(value):
""" is the cell value considered empty """
if value is None:
return True
if isinstance(value, str) and value.strip() == "":
return True
return False
def strip_excel_value(value):
""" remove whitespace from an excel value if it is a string """
if isinstance(value, str):
return value.strip()
return value

7
superx_budget/superx.py

@ -3,7 +3,7 @@
from datetime import datetime from datetime import datetime
from collections import namedtuple from collections import namedtuple
from .helpers import get_sheet_of_file from .helpers import get_sheet_of_file, strip_excel_value
from .exceptions import SuperXParserError from .exceptions import SuperXParserError
EXPECTED_HEADLINE = "Verwendungsnachweis und Kassenstand SAP" EXPECTED_HEADLINE = "Verwendungsnachweis und Kassenstand SAP"
@ -73,7 +73,8 @@ def _parse_data_table(rows):
for line in rows: for line in rows:
if not line[0]: if not line[0]:
continue continue
yield SuperXData(*line[:10]) data = [strip_excel_value(value) for value in line[:10]]
yield SuperXData(*data)
def parse_export_data(xls_sheet): def parse_export_data(xls_sheet):
@ -88,5 +89,5 @@ def parse_export_data(xls_sheet):
def parse_exported_file(file_path): def parse_exported_file(file_path):
""" parses the budget file """ """ parses the budget file """
sheet = get_sheet_of_file(file_path, name=None) sheet = get_sheet_of_file(file_path, sheet=None)
return parse_export_data(sheet) return parse_export_data(sheet)

73
tests/test_helpers.py

@ -0,0 +1,73 @@
import pytest
@pytest.fixture
def example_file(example_root):
return example_root / "Verbrauchsmittel-Toto-2020.xlsx"
@pytest.fixture
def example_workbook(example_file):
import openpyxl
yield openpyxl.open(example_file)
@pytest.mark.parametrize(
"input,expected",
[
("a", False),
("", True),
(" ", True),
(" a ", False),
(None, True),
(0, False),
(2.2, False),
],
)
def test_is_empty_excel_value(input, expected):
from superx_budget.helpers import is_empty_excel_value
result = is_empty_excel_value(input)
assert result == expected
@pytest.mark.parametrize(
"input,expected",
[
("a", "a"),
("", ""),
(" ", ""),
(" a ", "a"),
(None, None),
(1, 1),
(2.2, 2.2),
],
)
def test_strip_excel_value(input, expected):
from superx_budget.helpers import strip_excel_value
result = strip_excel_value(input)
assert result == expected
def test_get_sheet_of_file_first(example_file):
from superx_budget.helpers import get_sheet_of_file
sheet = get_sheet_of_file(example_file) # sheet=None
first_row = next(sheet.values)
first_cell = first_row[0]
assert first_cell.strip() == "Nr."
def test_get_sheet_of_file_named(example_file):
from superx_budget.helpers import get_sheet_of_file
sheet = get_sheet_of_file(example_file, sheet="Safeguard I")
first_row = next(sheet.values)
first_cell = first_row[0]
assert first_cell == 1

2
tests/test_superx_parser.py

@ -95,7 +95,7 @@ def test_parse_data_table():
from superx_budget.superx import _parse_data_table from superx_budget.superx import _parse_data_table
rows = [ rows = [
list("ABCDEFGHIJ"), ["A "] + list("BCDEFGHIJ"),
["" for i in range(10)], ["" for i in range(10)],
list("qrstuvwxyzX"), # one column more list("qrstuvwxyzX"), # one column more
] ]

Loading…
Cancel
Save