From a8bc75edf926b2035198dd38838c1a0f0de0d714 Mon Sep 17 00:00:00 2001
From: Holger Frey <mail@holgerfrey.de>
Date: Tue, 24 Mar 2020 08:20:50 +0100
Subject: [PATCH] added helper functions

---
 superx_budget/budget.py     | 11 +++---
 superx_budget/helpers.py    | 26 ++++++++++---
 superx_budget/superx.py     |  7 ++--
 tests/test_helpers.py       | 73 +++++++++++++++++++++++++++++++++++++
 tests/test_superx_parser.py |  2 +-
 5 files changed, 104 insertions(+), 15 deletions(-)
 create mode 100644 tests/test_helpers.py

diff --git a/superx_budget/budget.py b/superx_budget/budget.py
index 27cc055..ac548e8 100644
--- a/superx_budget/budget.py
+++ b/superx_budget/budget.py
@@ -2,7 +2,7 @@
 
 from collections import namedtuple
 
-from .helpers import get_sheet_of_file
+from .helpers import get_sheet_of_file, strip_excel_value, is_empty_excel_value
 from .exceptions import BudgetParserError
 
 EXPECTED_TABLE_HEADERS = [
@@ -44,16 +44,15 @@ def _check_table_header(xl_row):
 def _skip_empty_lines(rows):
     for xl_row in rows:
         first_cell = xl_row.data[0]
-        if first_cell is None:
-            continue
-        if isinstance(first_cell, str) and first_cell.strip() == "":
+        if is_empty_excel_value(first_cell):
             continue
         yield xl_row
 
 
 def _parse_data_table(rows):
     for xl_row in _skip_empty_lines(rows):
-        yield BudgetData(xl_row.row, *xl_row.data[:7])
+        data = [strip_excel_value(value) for value in xl_row.data[:7]]
+        yield BudgetData(xl_row.row, *data)
 
 
 def parse_budget_data(xls_sheet):
@@ -65,5 +64,5 @@ def parse_budget_data(xls_sheet):
 
 def parse_budget_file(file_path):
     """ parses the budget file """
-    sheet = get_sheet_of_file(file_path, name=None)
+    sheet = get_sheet_of_file(file_path, sheet=None)
     return parse_budget_data(sheet)
diff --git a/superx_budget/helpers.py b/superx_budget/helpers.py
index eae1278..51ca576 100644
--- a/superx_budget/helpers.py
+++ b/superx_budget/helpers.py
@@ -3,13 +3,29 @@
 import openpyxl
 
 
-def get_sheet_of_file(excel_file, name=None):
+def get_sheet_of_file(excel_file, sheet=None):
     """ returns a sheet from an excel FileCache
 
     if name is set to None, the function returns the first sheet
     """
     workbook = openpyxl.open(excel_file)
-    if name is None:
-        sheets = workbook.sheetnames
-        name = sheets[0]
-    return workbook[name]
+    if sheet is None:
+        all_sheets = workbook.sheetnames
+        sheet = all_sheets[0]
+    return workbook[sheet]
+
+
+def is_empty_excel_value(value):
+    """ is the cell value considered empty """
+    if value is None:
+        return True
+    if isinstance(value, str) and value.strip() == "":
+        return True
+    return False
+
+
+def strip_excel_value(value):
+    """ remove whitespace from an excel value if it is a string """
+    if isinstance(value, str):
+        return value.strip()
+    return value
diff --git a/superx_budget/superx.py b/superx_budget/superx.py
index 0b2db67..dc7da87 100644
--- a/superx_budget/superx.py
+++ b/superx_budget/superx.py
@@ -3,7 +3,7 @@
 from datetime import datetime
 from collections import namedtuple
 
-from .helpers import get_sheet_of_file
+from .helpers import get_sheet_of_file, strip_excel_value
 from .exceptions import SuperXParserError
 
 EXPECTED_HEADLINE = "Verwendungsnachweis und Kassenstand SAP"
@@ -73,7 +73,8 @@ def _parse_data_table(rows):
     for line in rows:
         if not line[0]:
             continue
-        yield SuperXData(*line[:10])
+        data = [strip_excel_value(value) for value in line[:10]]
+        yield SuperXData(*data)
 
 
 def parse_export_data(xls_sheet):
@@ -88,5 +89,5 @@ def parse_export_data(xls_sheet):
 
 def parse_exported_file(file_path):
     """ parses the budget file """
-    sheet = get_sheet_of_file(file_path, name=None)
+    sheet = get_sheet_of_file(file_path, sheet=None)
     return parse_export_data(sheet)
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
new file mode 100644
index 0000000..8c36639
--- /dev/null
+++ b/tests/test_helpers.py
@@ -0,0 +1,73 @@
+import pytest
+
+
+@pytest.fixture
+def example_file(example_root):
+    return example_root / "Verbrauchsmittel-Toto-2020.xlsx"
+
+
+@pytest.fixture
+def example_workbook(example_file):
+    import openpyxl
+
+    yield openpyxl.open(example_file)
+
+
+@pytest.mark.parametrize(
+    "input,expected",
+    [
+        ("a", False),
+        ("", True),
+        (" ", True),
+        (" a ", False),
+        (None, True),
+        (0, False),
+        (2.2, False),
+    ],
+)
+def test_is_empty_excel_value(input, expected):
+    from superx_budget.helpers import is_empty_excel_value
+
+    result = is_empty_excel_value(input)
+
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "input,expected",
+    [
+        ("a", "a"),
+        ("", ""),
+        (" ", ""),
+        (" a ", "a"),
+        (None, None),
+        (1, 1),
+        (2.2, 2.2),
+    ],
+)
+def test_strip_excel_value(input, expected):
+    from superx_budget.helpers import strip_excel_value
+
+    result = strip_excel_value(input)
+
+    assert result == expected
+
+
+def test_get_sheet_of_file_first(example_file):
+    from superx_budget.helpers import get_sheet_of_file
+
+    sheet = get_sheet_of_file(example_file)  # sheet=None
+    first_row = next(sheet.values)
+    first_cell = first_row[0]
+
+    assert first_cell.strip() == "Nr."
+
+
+def test_get_sheet_of_file_named(example_file):
+    from superx_budget.helpers import get_sheet_of_file
+
+    sheet = get_sheet_of_file(example_file, sheet="Safeguard I")
+    first_row = next(sheet.values)
+    first_cell = first_row[0]
+
+    assert first_cell == 1
diff --git a/tests/test_superx_parser.py b/tests/test_superx_parser.py
index ec74eb3..c0b097e 100644
--- a/tests/test_superx_parser.py
+++ b/tests/test_superx_parser.py
@@ -95,7 +95,7 @@ def test_parse_data_table():
     from superx_budget.superx import _parse_data_table
 
     rows = [
-        list("ABCDEFGHIJ"),
+        ["A "] + list("BCDEFGHIJ"),
         ["" for i in range(10)],
         list("qrstuvwxyzX"),  # one column more
     ]