From 0570063806aa4ae5d54eff817e2ef74b0b5a87b5 Mon Sep 17 00:00:00 2001 From: Holger Frey Date: Wed, 30 May 2018 11:44:45 +0200 Subject: [PATCH] using chardet library for encoding detection --- honeypot/__init__.py | 6 ++++-- honeypot/utils.py | 18 ++++++++++++++++++ setup.py | 1 + 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/honeypot/__init__.py b/honeypot/__init__.py index 70d7988..515d358 100644 --- a/honeypot/__init__.py +++ b/honeypot/__init__.py @@ -28,7 +28,8 @@ class RootResource: def pre_init(cls, moin_config_dir): cls.moin_config_dir = moin_config_dir moin_farmconfig = os.path.join(moin_config_dir, 'farmconfig.py') - with open(moin_farmconfig, 'r') as fh: + encoding = utils.guess_encoding(moin_farmconfig) + with open(moin_farmconfig, 'r', encoding=encoding) as fh: cls.moin_wiki_defs = list(utils.extract_wiki_definitions(fh)) @@ -55,7 +56,8 @@ class RootResource: def _get_wiki_data_dir(self): wiki_name = self._get_wiki_name() wiki_config = os.path.join(cls.moin_config_dir, wiki_name + '.py') - with open(wiki_config, 'r') as fh: + encoding = utils.guess_encoding(wiki_config) + with open(wiki_config, 'r', encoding=encoding) as fh: data_dir = utils.extract_data_dir(fh) return data_dir diff --git a/honeypot/utils.py b/honeypot/utils.py index 803dd76..6d4e8ea 100644 --- a/honeypot/utils.py +++ b/honeypot/utils.py @@ -1,3 +1,16 @@ +from chardet.universaldetector import UniversalDetector + + +def guess_encoding(path): + detector = UniversalDetector() + with open(path, 'rb') as fh: + for line in fh: + detector.feed(line) + if detector.done: break + detector.close() + return detector.result['encoding'] + + def extract_wiki_definitions(file_handle): for line in file_handle: if line.startswith('wikis = ['): @@ -30,3 +43,8 @@ def extract_data_dir(fh): def dict_helper(dict_like): return [' %s: %s' % (k, v) for k, v in dict_like.items()] + + + + + diff --git a/setup.py b/setup.py index 0659866..1db204f 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ with open(os.path.join(here, 'README.md')) as f: README = f.read() requires = [ + 'chardet', 'plaster_pastedeploy', 'pyramid', 'waitress',