html2text: Use the class if possible
This commit is contained in:
parent
362b441445
commit
72819cdb67
1 changed files with 23 additions and 9 deletions
|
|
@ -23,13 +23,27 @@ __all__ = ['html2text']
|
|||
|
||||
|
||||
try:
|
||||
import html2text as h2t
|
||||
h2t.UNICODE_SNOB = 1
|
||||
h2t.SKIP_INTERNAL_LINKS = True
|
||||
h2t.INLINE_LINKS = False
|
||||
h2t.LINKS_EACH_PARAGRAPH = True
|
||||
html2text = h2t.html2text
|
||||
except ImportError:
|
||||
from html2text import HTML2Text
|
||||
|
||||
def html2text(html):
|
||||
warnings.warn('python-html2text is not present. HTML pages are not converted into text.', stacklevel=2)
|
||||
return html
|
||||
h = HTML2Text()
|
||||
h.unicode_snob = True
|
||||
h.skip_internal_links = True
|
||||
h.inline_links = False
|
||||
h.links_each_paragraph = True
|
||||
return unicode(h.handle(html))
|
||||
|
||||
except ImportError:
|
||||
# Older versions of html2text do not have a class, so we have
|
||||
# to configure the module globally.
|
||||
try:
|
||||
import html2text as h2t
|
||||
h2t.UNICODE_SNOB = 1
|
||||
h2t.SKIP_INTERNAL_LINKS = True
|
||||
h2t.INLINE_LINKS = False
|
||||
h2t.LINKS_EACH_PARAGRAPH = True
|
||||
html2text = h2t.html2text
|
||||
except ImportError:
|
||||
def html2text(html):
|
||||
warnings.warn('python-html2text is not present. HTML pages are not converted into text.', stacklevel=2)
|
||||
return html
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue