diff --git a/weboob/tools/parsers/__init__.py b/weboob/tools/parsers/__init__.py index 7a3e7a2f..0bbb528f 100644 --- a/weboob/tools/parsers/__init__.py +++ b/weboob/tools/parsers/__init__.py @@ -59,6 +59,12 @@ def load_json(): from .jsonparser import JsonParser return JsonParser +def load_raw(): + # This parser doesn't read HTML, don't include it in the + # preference_order default value below. + from .iparser import RawParser + return RawParser + def get_parser(preference_order=('lxml', 'lxmlsoup')): """ diff --git a/weboob/tools/parsers/iparser.py b/weboob/tools/parsers/iparser.py index 52fed24d..161e009a 100644 --- a/weboob/tools/parsers/iparser.py +++ b/weboob/tools/parsers/iparser.py @@ -21,7 +21,7 @@ import re -__all__ = ['IParser'] +__all__ = ['IParser', 'RawParser'] class IParser(object): @@ -53,3 +53,10 @@ class IParser(object): """ p = re.compile(r'<.*?>') return p.sub(' ', data).strip() + +class RawParser(IParser): + def parse(self, data, encoding=None): + return data.read() + + def tostring(self, elem): + return elem