vendor_map = { "Accurion GmbH": ["accurion",], "Acros Organics": ["acros",], "Agilent Technologies": ["agilent"], "Alfa Aesar": [ "alefa aesar", "alfa", "aesar", "alfa aesar - vwr", "alfa-aesar", "alfaaesar", ], "Amazon": [], "analytics-shop.com": ["analytics-shop",], "architekturbedarf.de": ["architekturbedarf",], "Avanti Polar Lipids": ["avanti",], "Biesterfeld Spezialchemie GmbH": ["biesterfeld", "biesterfeld gmbh",], "Bio-Rad": ["bio rad",], "Biocat": [], "Bioleague GmbH & co.kg": ["bioleague",], "Biomol": [], "Bürkert Indelfingen": ["bürkert",], "Carl Roth GmbH": [ "carl roth", "carl-roth", "carlroth", "cr", "karl roth", ], "Cellsystems Biotech": ["cellsystems",], "Chemicell GmbH": ["chemicell",], "Clickbox e.k.": ["clickbox",], "Cole Parmer": ["coleparmer"], "Conrad Electronics": [ "conrad", "conrad electronic", "conrad, fax 09604408937, k-nr 6043142", ], "Cytoskeleton inc": ["cytoskeleton",], "Daniel Maas Dichtstoffhandel & Co.": ["daniel maas",], "Delta Mask b.v.": ["delta mask", "deltamask",], "der-rollende-shop.de": ["der-rollende-shop",], "Dianova": [], "Distrelec": ["distrilec",], "Edmund Optics GmbH": ["edmund optics", "edmund",], "Faust Lab Science GmbH": ["faust", "faust lab",], "Fisher Scientific": [ "fischer scientific", "fisher scientific/acros organic", ], "form.in Lasercenter": ["form.in",], "Uwe Markus (Glasbläser)": [ "glasbläser markus", "markus", "markus, glasbläser", "uwe markus", "glasbläser", ], "Goodfellow": ["good fellow",], "Greiner Bio-One": ["greiner bio one",], "Hach": [], "Hellma GmbH & Co KG": [ "hellma analytics", "hellma optics", "hellma optik jena", ], "Hiss dx": ["hiss",], "HJ-Bioanalytik GmbH": ["hj bioanalytik gmbh", "hj bioanalytik",], "laborhandel24.de": ["laborhandel24",], "locheisen.com": ["locheisen",], "praezisionsmesstechnik.de": ["praezisionsmesstechnik",], "siebgewebeshop.de": ["siebgewebeshop",], "Häberle Labortechnik": ["häberle",], "High-Tech-Flon": ["hightechflon"], "hygie.de": ["hygie",], "Iolitec": [ "ionic liquids technologie", "ionic liquids technologies", "iolitec ionic liquids technologies", ], "Iris Biotech GmbH": ["iris biotech",], "Ismatec": ["ismatec/idex",], "Jackson Immunoresearch": ["jacksonimmuno", "jackson",], "Kinesis Abimed": ["kinesis", "kinesisgmbh"], "Kisker Biotech GmbH & co. kg": ["kisker",], "Knick Elektronische Messgeräte GmbH": ["knick",], "Kummer Laborbedarf": ["kummer",], "Laborhandel Krumpholz": ["krumpholz",], "leuchtmittelmarkt.com": ["leuchtmittelmarkt",], "Life Sciences Advanced Technologies inc": [ "life sciences advanced technologies", ], "Life Technologies": ["lifetechnologies", "ife technologies"], "Macherey Nagel": ["m-n", "mn", "macherey-nagel",], "magnets4you GmbH": ["magnets4you",], "magnet-shop.com": ["magnet-shop",], "Mercateo ag": ["mercateo",], "Merck": ["merck über vwr",], "merck berufsbekleidung": [ "merck berufskleidung", "merck-berufsbekleidung", ], "Merck millipore inc.": ["merck millipore", "merckmillipore",], "Mettler-Toledo GmbH": ["mettler toledo", "mettler-toledo", "mettler",], "Micro Particles GmbH": ["micro particles",], "Microdyn-Nadir GmbH": ["microdyn-nadir",], "Millipore GmbH": ["millipore",], "Molecular Devices / Genetix": ["molecular devices", "genetix",], "Nanoandmore GmbH": ["nano-and-more", "nanoandmore",], "Nanocyl s.a.": ["nanocyl",], "Neo Lab": ["neolab",], "Newport Spectra-Physics": ["newport",], "OCO Ortenauer Gase GmbH": ["oco ortenauer gase", "oco",], "Pall Corporation": ["pall",], "Plano GmbH": ["plano", "plano-eu", "plano-em",], "Polyan GmbH": ["polyan",], "Polyscience Europa GmbH": [ "polysciences", "polysciences europe gmbh", "polysciences, inc.", ], "ProLiquid GmbH": ["pro-liquid",], "Qiagen": ["quiagen",], "R&D Systems": ["r&d system",], "Reichelt Elektronik": ["reichelt",], "RS Components": ["rs",], "S-Polytec": ["s-polytech",], "Sarstedt AG & Co. KG": ["sarstedt",], "Science Services GmbH": ["science services",], "Scienion AG": ["scienion",], "Sigma Aldrich": [ "aldrich", "adrich", "aldich", "aldritch", "aldrích", "fluka", "sa", "sigma - aldrich", "sigma adrich", "sigma aldich", "sigma aldritch", "sigma aldrích", "sigma- aldrich", "sigma-adrich", "sigma-aldrich", "sigma-aldrich (fluka)", "sigmaaldrich", "roche, sigma-aldrich", "sigmar", "sigma", ], "Sterlitech": [], "Supermagnete": ["supermagnet",], "taq-dna.com": ["taq-dna"], "TCI Deutschland GmbH": [ "tci", "tci chemical", "tci chemicals", "tci deutschland", "tci europe", "tci europe n.v.", ], "Thermo Fisher Scientific": [ "themofisher", "thermofischer", "thermofisher", "thermo fisher", "thermofisher scientific", "thermo scientific", "thermo scientific - www.perbio.com", "perbio", "thermo scientific / pierce", "thermo ", "pierce", ], "Tib MolBiol": ["tibmolbiol", "tibmolbio",], "Tse Systems GmbH": ["tse systems",], "Vilber Lourmat GmbH": ["vilber",], "VWR International GmbH": [ "vwr", "vwr (lenz laborglas gmbh & co.kg)", "vwr chemicals", "vwr collection", "vwt", ], "Weigert": ["drweigert"], "Xantec Bioanalytics": ["xantec", "xantec bioanalyticss"], "Zitt-Thoma": ["zitt thoma",], } replace_map = {} for vendor, replacements in vendor_map.items(): replace_map[vendor.lower()] = vendor for replacement in replacements: if replacement in replace_map: raise ValueError(f"{vendor}: {replacement}") replace_map[replacement.lower()] = vendor def _process_url(who): parsed = urlparse(who) if parsed.netloc != "": return parsed.netloc else: return who def _process_common_domains(who): for tld in (".eu", ".com", ".de"): if who.endswith(tld): parts = who.split(".") return parts[-2] return who def _strip_multiple_spaces(who): return " ".join(who.split()) def fix_vendor(who): tmp = who.strip().lower() tmp = _process_url(tmp) tmp = _process_common_domains(tmp) tmp = _strip_multiple_spaces(tmp) return replace_map.get(tmp, who)