From 677ec83a9700324e0ebbd082b5ee4d1363d3f765 Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Thu, 1 Dec 2022 09:30:38 +0100 Subject: [PATCH] added tags for parsing 'Conrad' invoices --- xinvoice/__init__.py | 50 +++++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/xinvoice/__init__.py b/xinvoice/__init__.py index b25b609..38aa127 100644 --- a/xinvoice/__init__.py +++ b/xinvoice/__init__.py @@ -12,13 +12,20 @@ import click import pyperclip from bs4 import BeautifulSoup +RECIPIENT_MAP = { + "calvino": "Céline", + "pappas": "Babis", + "slesarenko": "Slava", + "makro": "Makro", +} + def open_invoice(file_path): with open(file_path, "r") as handle: return BeautifulSoup(handle, "xml") -def drill_down(soup, tags): +def _drill_down(soup, tags): current_tag, *rest_tags = tags findings = soup.find_all(current_tag) if not findings: @@ -30,11 +37,25 @@ def drill_down(soup, tags): yield from drill_down(child, rest_tags) +def drill_down(soup, *tag_lists): + for tags in tag_lists: + result = list(_drill_down(soup, tags)) + if result != [None]: + break + return result + + def get_recipient(soup): results = drill_down( - soup, ["cac:DeliveryParty", "cac:PartyName", "cbc:Name"] + soup, + ["cac:DeliveryParty", "cac:PartyName", "cbc:Name"], + [ + "ram:ApplicableHeaderTradeDelivery", + "ram:ShipToTradeParty", + "ram:Name", + ], ) - return next(results) + return results[0] def get_recipient_short_name(soup): @@ -42,18 +63,23 @@ def get_recipient_short_name(soup): if full_text is None: return "+++ UNKNOWN +++" full_text = full_text.lower() - if "calvino" in full_text: - return "Céline" - if "pappas" in full_text: - return "Babis" - if "slesarenko" in full_text: - return "Slava" + for key, value in RECIPIENT_MAP.items(): + if key in full_text: + return value return "CPI" def get_items(soup): - result = drill_down(soup, ["cac:InvoiceLine", "cac:Item", "cbc:Name"]) - return list(result) + results = drill_down( + soup, + ["cac:InvoiceLine", "cac:Item", "cbc:Name"], + [ + "ram:IncludedSupplyChainTradeLineItem", + "ram:SpecifiedTradeProduct", + "ram:Name", + ], + ) + return results def parse(file_path): @@ -61,7 +87,7 @@ def parse(file_path): recipient = get_recipient_short_name(soup) items = get_items(soup) lines = [f"for {recipient}:"] + items - text = "\n".join(lines) + text = "\n".join((str(line) for line in lines)) pyperclip.copy(text) return text