From 72819cdb67c877b637843dc68b452ba4f2e1b9a7 Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Wed, 8 Oct 2014 15:17:02 +0200 Subject: [PATCH] html2text: Use the class if possible --- weboob/tools/html.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/weboob/tools/html.py b/weboob/tools/html.py index 6bf7d640..53c039c7 100644 --- a/weboob/tools/html.py +++ b/weboob/tools/html.py @@ -23,13 +23,27 @@ __all__ = ['html2text'] try: - import html2text as h2t - h2t.UNICODE_SNOB = 1 - h2t.SKIP_INTERNAL_LINKS = True - h2t.INLINE_LINKS = False - h2t.LINKS_EACH_PARAGRAPH = True - html2text = h2t.html2text -except ImportError: + from html2text import HTML2Text + def html2text(html): - warnings.warn('python-html2text is not present. HTML pages are not converted into text.', stacklevel=2) - return html + h = HTML2Text() + h.unicode_snob = True + h.skip_internal_links = True + h.inline_links = False + h.links_each_paragraph = True + return unicode(h.handle(html)) + +except ImportError: + # Older versions of html2text do not have a class, so we have + # to configure the module globally. + try: + import html2text as h2t + h2t.UNICODE_SNOB = 1 + h2t.SKIP_INTERNAL_LINKS = True + h2t.INLINE_LINKS = False + h2t.LINKS_EACH_PARAGRAPH = True + html2text = h2t.html2text + except ImportError: + def html2text(html): + warnings.warn('python-html2text is not present. HTML pages are not converted into text.', stacklevel=2) + return html