From a8bc75edf926b2035198dd38838c1a0f0de0d714 Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Tue, 24 Mar 2020 08:20:50 +0100 Subject: [PATCH] added helper functions --- superx_budget/budget.py | 11 +++--- superx_budget/helpers.py | 26 ++++++++++--- superx_budget/superx.py | 7 ++-- tests/test_helpers.py | 73 +++++++++++++++++++++++++++++++++++++ tests/test_superx_parser.py | 2 +- 5 files changed, 104 insertions(+), 15 deletions(-) create mode 100644 tests/test_helpers.py diff --git a/superx_budget/budget.py b/superx_budget/budget.py index 27cc055..ac548e8 100644 --- a/superx_budget/budget.py +++ b/superx_budget/budget.py @@ -2,7 +2,7 @@ from collections import namedtuple -from .helpers import get_sheet_of_file +from .helpers import get_sheet_of_file, strip_excel_value, is_empty_excel_value from .exceptions import BudgetParserError EXPECTED_TABLE_HEADERS = [ @@ -44,16 +44,15 @@ def _check_table_header(xl_row): def _skip_empty_lines(rows): for xl_row in rows: first_cell = xl_row.data[0] - if first_cell is None: - continue - if isinstance(first_cell, str) and first_cell.strip() == "": + if is_empty_excel_value(first_cell): continue yield xl_row def _parse_data_table(rows): for xl_row in _skip_empty_lines(rows): - yield BudgetData(xl_row.row, *xl_row.data[:7]) + data = [strip_excel_value(value) for value in xl_row.data[:7]] + yield BudgetData(xl_row.row, *data) def parse_budget_data(xls_sheet): @@ -65,5 +64,5 @@ def parse_budget_data(xls_sheet): def parse_budget_file(file_path): """ parses the budget file """ - sheet = get_sheet_of_file(file_path, name=None) + sheet = get_sheet_of_file(file_path, sheet=None) return parse_budget_data(sheet) diff --git a/superx_budget/helpers.py b/superx_budget/helpers.py index eae1278..51ca576 100644 --- a/superx_budget/helpers.py +++ b/superx_budget/helpers.py @@ -3,13 +3,29 @@ import openpyxl -def get_sheet_of_file(excel_file, name=None): +def get_sheet_of_file(excel_file, sheet=None): """ returns a sheet from an excel FileCache if name is set to None, the function returns the first sheet """ workbook = openpyxl.open(excel_file) - if name is None: - sheets = workbook.sheetnames - name = sheets[0] - return workbook[name] + if sheet is None: + all_sheets = workbook.sheetnames + sheet = all_sheets[0] + return workbook[sheet] + + +def is_empty_excel_value(value): + """ is the cell value considered empty """ + if value is None: + return True + if isinstance(value, str) and value.strip() == "": + return True + return False + + +def strip_excel_value(value): + """ remove whitespace from an excel value if it is a string """ + if isinstance(value, str): + return value.strip() + return value diff --git a/superx_budget/superx.py b/superx_budget/superx.py index 0b2db67..dc7da87 100644 --- a/superx_budget/superx.py +++ b/superx_budget/superx.py @@ -3,7 +3,7 @@ from datetime import datetime from collections import namedtuple -from .helpers import get_sheet_of_file +from .helpers import get_sheet_of_file, strip_excel_value from .exceptions import SuperXParserError EXPECTED_HEADLINE = "Verwendungsnachweis und Kassenstand SAP" @@ -73,7 +73,8 @@ def _parse_data_table(rows): for line in rows: if not line[0]: continue - yield SuperXData(*line[:10]) + data = [strip_excel_value(value) for value in line[:10]] + yield SuperXData(*data) def parse_export_data(xls_sheet): @@ -88,5 +89,5 @@ def parse_export_data(xls_sheet): def parse_exported_file(file_path): """ parses the budget file """ - sheet = get_sheet_of_file(file_path, name=None) + sheet = get_sheet_of_file(file_path, sheet=None) return parse_export_data(sheet) diff --git a/tests/test_helpers.py b/tests/test_helpers.py new file mode 100644 index 0000000..8c36639 --- /dev/null +++ b/tests/test_helpers.py @@ -0,0 +1,73 @@ +import pytest + + +@pytest.fixture +def example_file(example_root): + return example_root / "Verbrauchsmittel-Toto-2020.xlsx" + + +@pytest.fixture +def example_workbook(example_file): + import openpyxl + + yield openpyxl.open(example_file) + + +@pytest.mark.parametrize( + "input,expected", + [ + ("a", False), + ("", True), + (" ", True), + (" a ", False), + (None, True), + (0, False), + (2.2, False), + ], +) +def test_is_empty_excel_value(input, expected): + from superx_budget.helpers import is_empty_excel_value + + result = is_empty_excel_value(input) + + assert result == expected + + +@pytest.mark.parametrize( + "input,expected", + [ + ("a", "a"), + ("", ""), + (" ", ""), + (" a ", "a"), + (None, None), + (1, 1), + (2.2, 2.2), + ], +) +def test_strip_excel_value(input, expected): + from superx_budget.helpers import strip_excel_value + + result = strip_excel_value(input) + + assert result == expected + + +def test_get_sheet_of_file_first(example_file): + from superx_budget.helpers import get_sheet_of_file + + sheet = get_sheet_of_file(example_file) # sheet=None + first_row = next(sheet.values) + first_cell = first_row[0] + + assert first_cell.strip() == "Nr." + + +def test_get_sheet_of_file_named(example_file): + from superx_budget.helpers import get_sheet_of_file + + sheet = get_sheet_of_file(example_file, sheet="Safeguard I") + first_row = next(sheet.values) + first_cell = first_row[0] + + assert first_cell == 1 diff --git a/tests/test_superx_parser.py b/tests/test_superx_parser.py index ec74eb3..c0b097e 100644 --- a/tests/test_superx_parser.py +++ b/tests/test_superx_parser.py @@ -95,7 +95,7 @@ def test_parse_data_table(): from superx_budget.superx import _parse_data_table rows = [ - list("ABCDEFGHIJ"), + ["A "] + list("BCDEFGHIJ"), ["" for i in range(10)], list("qrstuvwxyzX"), # one column more ]