"""Budget Parser""" from collections import namedtuple from .exceptions import BudgetParserError from .helpers import get_sheet_of_file, is_empty_excel_value, strip_excel_value EXPECTED_TABLE_HEADERS = [ "Nr.", "Projekt", "Laufzeit", "BA", "Fond", "Budget", "Ausgaben", "Rest", ] EXCEL_LINES_TO_IGNORE = {"stand:", "stand"} NUM_EXPECTED_HEADERS = len(EXPECTED_TABLE_HEADERS) ExcelRow = namedtuple("ExcelRow", ["row", "data"]) BudgetData = namedtuple( "BudgetData", [ "row", "number", "project_name", "period", "project", "fond", "budget", "expenses", "rest", ], ) def _check_table_header(xl_row): fields_ignore_none = ( ("" if c is None else c) for c in xl_row.data[:NUM_EXPECTED_HEADERS] ) fields_str = (str(c) for c in fields_ignore_none) fields = [c.strip() for c in fields_str] if fields != EXPECTED_TABLE_HEADERS: msg = f"unexpected headers: '{xl_row.data}'" raise BudgetParserError(msg) def _skip_some_lines(rows): for xl_row in rows: first_cell = xl_row.data[0] if is_empty_excel_value(first_cell): continue if isinstance(first_cell, str): value = first_cell.strip().lower() if value in EXCEL_LINES_TO_IGNORE: continue yield xl_row def _parse_data_table(rows): for xl_row in _skip_some_lines(rows): data = [ strip_excel_value(value) for value in xl_row.data[:NUM_EXPECTED_HEADERS] ] yield BudgetData(xl_row.row, *data) def parse_budget_data(xls_sheet): """parses the budget data""" rows = (ExcelRow(i, v) for i, v in enumerate(xls_sheet.values, start=1)) _check_table_header(next(rows)) return list(_parse_data_table(rows)) def parse_budget_file(file_path): """parses the budget file""" sheet = get_sheet_of_file(file_path, sheet=None) return parse_budget_data(sheet)