diff --git a/modules/adecco/pages.py b/modules/adecco/pages.py index cabcb4d3..593afaa3 100644 --- a/modules/adecco/pages.py +++ b/modules/adecco/pages.py @@ -19,7 +19,7 @@ from weboob.tools.browser import BasePage -from weboob.tools.misc import html2text +from weboob.tools.html import html2text from .job import AdeccoJobAdvert import datetime import re diff --git a/modules/apec/pages.py b/modules/apec/pages.py index 3a3db828..80f29b52 100644 --- a/modules/apec/pages.py +++ b/modules/apec/pages.py @@ -19,7 +19,7 @@ from weboob.tools.browser import BasePage -from weboob.tools.misc import html2text +from weboob.tools.html import html2text import dateutil.parser import re diff --git a/modules/arte/pages.py b/modules/arte/pages.py index c6621002..93c2073e 100644 --- a/modules/arte/pages.py +++ b/modules/arte/pages.py @@ -19,7 +19,7 @@ from weboob.tools.browser import BasePage -from weboob.tools.misc import html2text +from weboob.tools.html import html2text from weboob.capabilities import NotAvailable from weboob.capabilities.image import BaseImage from weboob.capabilities.collection import Collection diff --git a/modules/aum/contact.py b/modules/aum/contact.py index c0aed71e..5e5ff139 100644 --- a/modules/aum/contact.py +++ b/modules/aum/contact.py @@ -25,7 +25,7 @@ from dateutil.parser import parse as parse_dt from weboob.tools.ordereddict import OrderedDict from weboob.capabilities.contact import Contact as _Contact, ProfileNode -from weboob.tools.misc import html2text +from weboob.tools.html import html2text class FieldBase(object): diff --git a/modules/champslibres/pages.py b/modules/champslibres/pages.py index 9488320b..ec83d224 100644 --- a/modules/champslibres/pages.py +++ b/modules/champslibres/pages.py @@ -21,7 +21,7 @@ from datetime import date from weboob.capabilities.library import Book, Renew from weboob.tools.browser import BasePage from weboob.tools.mech import ClientForm -from weboob.tools.misc import html2text +from weboob.tools.html import html2text class SkipPage(BasePage): diff --git a/modules/dailymotion/pages.py b/modules/dailymotion/pages.py index 1be381b8..35ba7d49 100644 --- a/modules/dailymotion/pages.py +++ b/modules/dailymotion/pages.py @@ -26,7 +26,7 @@ import mechanize from weboob.capabilities import NotAvailable from weboob.capabilities.image import BaseImage -from weboob.tools.misc import html2text +from weboob.tools.html import html2text from weboob.tools.browser import BasePage, BrokenPageError diff --git a/modules/ehentai/pages.py b/modules/ehentai/pages.py index 7ba6d9b2..d1d41572 100644 --- a/modules/ehentai/pages.py +++ b/modules/ehentai/pages.py @@ -19,7 +19,7 @@ from weboob.tools.browser import BasePage -from weboob.tools.misc import html2text +from weboob.tools.html import html2text from weboob.capabilities.image import BaseImage from datetime import datetime diff --git a/modules/gazelle/pages/torrents.py b/modules/gazelle/pages/torrents.py index 93267118..2a93fe85 100644 --- a/modules/gazelle/pages/torrents.py +++ b/modules/gazelle/pages/torrents.py @@ -23,7 +23,8 @@ import urlparse from logging import warning, debug from urlparse import parse_qs -from weboob.tools.misc import html2text, get_bytes_size +from weboob.tools.misc import get_bytes_size +from weboob.tools.html import html2text from weboob.capabilities.torrent import Torrent from weboob.capabilities.base import NotLoaded diff --git a/modules/lefigaro/test.py b/modules/lefigaro/test.py index ba4fb043..675ab985 100644 --- a/modules/lefigaro/test.py +++ b/modules/lefigaro/test.py @@ -19,7 +19,7 @@ from weboob.tools.test import BackendTest -from weboob.tools.misc import html2text +from weboob.tools.html import html2text __all__ = ['LeFigaroTest'] diff --git a/modules/monster/pages.py b/modules/monster/pages.py index 53af0276..10d208d2 100644 --- a/modules/monster/pages.py +++ b/modules/monster/pages.py @@ -19,7 +19,7 @@ from weboob.tools.browser import BasePage -from weboob.tools.misc import html2text +from weboob.tools.html import html2text import re from datetime import datetime, time, timedelta from .job import MonsterJobAdvert diff --git a/modules/okc/pages.py b/modules/okc/pages.py index ac17e6e9..639290c0 100644 --- a/modules/okc/pages.py +++ b/modules/okc/pages.py @@ -23,7 +23,7 @@ from datetime import datetime from weboob.tools.browser import BasePage from weboob.tools.ordereddict import OrderedDict from weboob.capabilities.contact import ProfileNode -from weboob.tools.misc import html2text +from weboob.tools.html import html2text from weboob.tools.date import local2utc class LoginPage(BasePage): diff --git a/modules/popolemploi/pages.py b/modules/popolemploi/pages.py index f74b6dc7..dda84fbf 100644 --- a/modules/popolemploi/pages.py +++ b/modules/popolemploi/pages.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.misc import html2text +from weboob.tools.html import html2text from weboob.tools.browser import BasePage import dateutil.parser import re diff --git a/modules/senscritique/pages.py b/modules/senscritique/pages.py index a165e1c1..c4b7dbf2 100644 --- a/modules/senscritique/pages.py +++ b/modules/senscritique/pages.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.misc import html2text +from weboob.tools.html import html2text from .calendar import SensCritiquenCalendarEvent from datetime import date, datetime, time, timedelta diff --git a/weboob/applications/boobmsg/boobmsg.py b/weboob/applications/boobmsg/boobmsg.py index 5bcb5382..96b4e8a2 100644 --- a/weboob/applications/boobmsg/boobmsg.py +++ b/weboob/applications/boobmsg/boobmsg.py @@ -32,7 +32,7 @@ from weboob.capabilities.account import CapAccount from weboob.capabilities.contact import CapContact from weboob.tools.application.repl import ReplApplication, defaultcount from weboob.tools.application.formatters.iformatter import IFormatter -from weboob.tools.misc import html2text +from weboob.tools.html import html2text __all__ = ['Boobmsg'] diff --git a/weboob/applications/boobtracker/boobtracker.py b/weboob/applications/boobtracker/boobtracker.py index 1fa92d0f..56ea6add 100644 --- a/weboob/applications/boobtracker/boobtracker.py +++ b/weboob/applications/boobtracker/boobtracker.py @@ -32,7 +32,7 @@ from weboob.capabilities.base import empty, BaseObject from weboob.capabilities.bugtracker import CapBugTracker, Query, Update, Project, Issue, IssueError from weboob.tools.application.repl import ReplApplication, defaultcount from weboob.tools.application.formatters.iformatter import IFormatter, PrettyFormatter -from weboob.tools.misc import html2text +from weboob.tools.html import html2text from weboob.tools.date import parse_french_date diff --git a/weboob/applications/comparoob/comparoob.py b/weboob/applications/comparoob/comparoob.py index 3393cbea..9fdb9806 100644 --- a/weboob/applications/comparoob/comparoob.py +++ b/weboob/applications/comparoob/comparoob.py @@ -23,7 +23,7 @@ import sys from weboob.capabilities.pricecomparison import CapPriceComparison -from weboob.tools.misc import html2text +from weboob.tools.html import html2text from weboob.tools.application.repl import ReplApplication from weboob.tools.application.formatters.iformatter import IFormatter, PrettyFormatter diff --git a/weboob/applications/monboob/monboob.py b/weboob/applications/monboob/monboob.py index 68d8c63e..7b4af474 100644 --- a/weboob/applications/monboob/monboob.py +++ b/weboob/applications/monboob/monboob.py @@ -36,7 +36,9 @@ from weboob.core import Weboob, CallErrors from weboob.core.scheduler import Scheduler from weboob.capabilities.messages import CapMessages, CapMessagesPost, Thread, Message from weboob.tools.application.repl import ReplApplication -from weboob.tools.misc import html2text, get_backtrace, utc2local, to_unicode +from weboob.tools.date import utc2local +from weboob.tools.html import html2text +from weboob.tools.misc import get_backtrace, to_unicode __all__ = ['Monboob'] diff --git a/weboob/tools/browser2/filters.py b/weboob/tools/browser2/filters.py index 09bf41e5..9caaae93 100644 --- a/weboob/tools/browser2/filters.py +++ b/weboob/tools/browser2/filters.py @@ -28,7 +28,7 @@ from dateutil.parser import parse as parse_date from weboob.capabilities.base import empty from weboob.tools.compat import basestring from weboob.tools.exceptions import ParseError -from weboob.tools.misc import html2text +from weboob.tools.html import html2text _NO_DEFAULT = object() diff --git a/weboob/tools/html.py b/weboob/tools/html.py new file mode 100644 index 00000000..6bf7d640 --- /dev/null +++ b/weboob/tools/html.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2014 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import warnings + +__all__ = ['html2text'] + + +try: + import html2text as h2t + h2t.UNICODE_SNOB = 1 + h2t.SKIP_INTERNAL_LINKS = True + h2t.INLINE_LINKS = False + h2t.LINKS_EACH_PARAGRAPH = True + html2text = h2t.html2text +except ImportError: + def html2text(html): + warnings.warn('python-html2text is not present. HTML pages are not converted into text.', stacklevel=2) + return html diff --git a/weboob/tools/misc.py b/weboob/tools/misc.py index f066125e..226e1be2 100644 --- a/weboob/tools/misc.py +++ b/weboob/tools/misc.py @@ -18,7 +18,6 @@ # along with weboob. If not, see . -import warnings from time import time, sleep import os import sys @@ -28,7 +27,7 @@ import types from .compat import unicode -__all__ = ['get_backtrace', 'get_bytes_size', 'html2text', 'iter_fields', +__all__ = ['get_backtrace', 'get_bytes_size', 'iter_fields', 'to_unicode', 'limit'] @@ -58,18 +57,6 @@ def get_bytes_size(size, unit_name): } return float(size * unit_data.get(unit_name, 1)) -try: - import html2text as h2t - h2t.UNICODE_SNOB = 1 - h2t.SKIP_INTERNAL_LINKS = True - h2t.INLINE_LINKS = False - h2t.LINKS_EACH_PARAGRAPH = True - html2text = h2t.html2text -except ImportError: - def html2text(html): - warnings.warn('python-html2text is not present. HTML pages are not converted into text.', stacklevel=2) - return html - def iter_fields(obj): for attribute_name in dir(obj):