rename things related to browsers

weboob.tools.browser -> weboob.deprecated.browser
weboob.tools.parsers -> weboob.deprecated.browser.parsers
weboob.tools.mech -> weboob.deprecated.mech
weboob.browser2 -> weboob.browser
weboob.core.exceptions -> weboob.exceptions

Also, the new tree for browser2 is:

weboob.browser: import weboob.browser.browsers.* and weboob.browser.url.*
weboob.browser.browsers: all browsers (including PagesBrowser and LoginBrowser)
weboob.browser.url: the URL class
weboob.browser.profiles: all Profile classes
weboob.browser.sessions: WeboobSession and FuturesSession
weboob.browser.cookies: that's a cookies thing
weboob.browser.pages: all Page and derivated classes, and Form class
weboob.browser.exceptions: specific browser exceptions
weboob.browser.elements: AbstractElement classes, and 'method' decorator
weboob.browser.filters.*: all filters
This commit is contained in:
Romain Bignon 2014-10-07 00:23:18 +02:00
commit d61e15cf84
396 changed files with 1442 additions and 1382 deletions

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Laurent Bachelier
# Copyright(C) 2012-2014 Laurent Bachelier
#
# This file is part of weboob.
#
@ -17,9 +17,9 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .browser import Browser, DomainBrowser, Wget, Firefox, UrlNotAllowed, Profile
from .page import PagesBrowser, Page, URL, HTMLPage, LoginBrowser, need_login, JsonPage, LoggedPage, XMLPage
from .browsers import Browser, DomainBrowser, UrlNotAllowed, PagesBrowser, LoginBrowser, need_login
from .url import URL
__all__ = ['Browser', 'DomainBrowser', 'Wget', 'Firefox', 'UrlNotAllowed', 'Profile', 'XMLPage',
'PagesBrowser', 'Page', 'URL', 'HTMLPage', 'LoginBrowser', 'need_login', 'JsonPage', 'LoggedPage']
__all__ = ['Browser', 'DomainBrowser', 'UrlNotAllowed', 'PagesBrowser', 'URL',
'LoginBrowser', 'need_login']

View file

@ -26,6 +26,7 @@ except ImportError:
from urlparse import urlparse, urljoin
import os
import sys
from copy import deepcopy
try:
import requests
@ -35,85 +36,14 @@ except ImportError:
raise ImportError('Please install python-requests >= 2.0')
from weboob.tools.log import getLogger
from weboob.tools.ordereddict import OrderedDict
from .cookies import WeboobCookieJar
from .exceptions import HTTPNotFound, ClientError, ServerError
from .sessions import FuturesSession
class Profile(object):
"""
A profile represents the way Browser should act.
Usually it is to mimic a real browser.
"""
def setup_session(self, session):
"""
Change default headers, set up hooks, etc.
Warning: Do not enable lzma, bzip or bzip2, sdch encodings
as python-requests does not support it yet.
Supported as of 2.2: gzip, deflate, compress.
In doubt, do not change the default Accept-Encoding header
of python-requests.
"""
raise NotImplementedError()
class Weboob(Profile):
"""
It's us!
Recommended for Weboob-friendly websites only.
"""
def __init__(self, version):
self.version = version
def setup_session(self, session):
session.headers['User-Agent'] = 'weboob/%s' % self.version
class Firefox(Profile):
"""
Try to mimic a specific version of Firefox.
Ideally, it should follow the current ESR Firefox:
https://www.mozilla.org/en-US/firefox/organizations/all.html
Do not change the Firefox version without checking the Gecko one!
"""
def setup_session(self, session):
"""
Set up headers for a standard Firefox request
(except for DNT which isn't on by default but is a good idea).
The goal is to be unidentifiable.
"""
# Replace all base requests headers
# https://developer.mozilla.org/en/Gecko_user_agent_string_reference
# https://bugzilla.mozilla.org/show_bug.cgi?id=572650
session.headers = {
'Accept-Language': 'en-us,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0',
'DNT': '1'}
class Wget(Profile):
"""
Common alternative user agent.
Some websites will give you a version with less JavaScript.
Some others could ban you (after all, wget is not a real browser).
"""
def __init__(self, version='1.11.4'):
self.version = version
def setup_session(self, session):
# Don't remove base headers, if websites want to block fake browsers,
# they will probably block any wget user agent anyway.
session.headers.update({
'Accept': '*/*',
'User-Agent': 'Wget/%s' % self.version})
from .profiles import Firefox
from .pages import NextPage
from .url import URL
class Browser(object):
@ -579,3 +509,183 @@ class DomainBrowser(Browser):
Go to the "home" page, usually the BASEURL.
"""
return self.location(self.BASEURL or self.absurl('/'))
class _PagesBrowserMeta(type):
"""
Private meta-class used to keep order of URLs instances of PagesBrowser.
"""
def __new__(mcs, name, bases, attrs):
urls = [(url_name, attrs.pop(url_name)) for url_name, obj in attrs.items() if isinstance(obj, URL)]
urls.sort(key=lambda x: x[1]._creation_counter)
new_class = super(_PagesBrowserMeta, mcs).__new__(mcs, name, bases, attrs)
if new_class._urls is None:
new_class._urls = OrderedDict()
else:
new_class._urls = deepcopy(new_class._urls)
new_class._urls.update(urls)
return new_class
class PagesBrowser(DomainBrowser):
r"""
A browser which works pages and keep state of navigation.
To use it, you have to derive it and to create URL objects as class
attributes. When open() or location() are called, if the url matches
one of URL objects, it returns a Page object. In case of location(), it
stores it in self.page.
Example:
>>> class HomePage(Page):
... pass
...
>>> class ListPage(Page):
... pass
...
>>> class MyBrowser(PagesBrowser):
... BASEURL = 'http://example.org'
... home = URL('/(index\.html)?', HomePage)
... list = URL('/list\.html', ListPage)
...
You can then use URL instances to go on pages.
"""
_urls = None
__metaclass__ = _PagesBrowserMeta
def __getattr__(self, name):
if self._urls is not None and name in self._urls:
return self._urls[name]
else:
raise AttributeError("'%s' object has no attribute '%s'" % (
self.__class__.__name__, name))
def __init__(self, *args, **kwargs):
super(PagesBrowser, self).__init__(*args, **kwargs)
self.page = None
self._urls = deepcopy(self._urls)
for url in self._urls.itervalues():
url.browser = self
def open(self, *args, **kwargs):
"""
Same method than
:meth:`weboob.browser.browsers.DomainBrowser.open`, but the
response contains an attribute `page` if the url matches any
:class:`URL` object.
"""
callback = kwargs.pop('callback', lambda response: response)
# Have to define a callback to seamlessly process synchronous and
# asynchronous requests, see :meth:`Browser.open` and its `async`
# and `callback` params.
def internal_callback(response):
# Try to handle the response page with an URL instance.
response.page = None
for url in self._urls.itervalues():
page = url.handle(response)
if page is not None:
self.logger.debug('Handle %s with %s' % (response.url, page.__class__.__name__))
response.page = page
break
if response.page is None:
self.logger.debug('Unable to handle %s' % response.url)
return callback(response)
return super(PagesBrowser, self).open(callback=internal_callback, *args, **kwargs)
def location(self, *args, **kwargs):
"""
Same method than
:meth:`weboob.browser.browsers.Browser.location`, but if the
url matches any :class:`URL` object, an attribute `page` is added to
response, and the attribute :attr:`PagesBrowser.page` is set.
"""
if self.page is not None:
# Call leave hook.
self.page.on_leave()
response = self.open(*args, **kwargs)
self.response = response
self.page = response.page
self.url = response.url
if self.page is not None:
# Call load hook.
self.page.on_load()
# Returns self.response in case on_load recalls location()
return self.response
def pagination(self, func, *args, **kwargs):
r"""
This helper function can be used to handle pagination pages easily.
When the called function raises an exception :class:`NextPage`, it goes
on the wanted page and recall the function.
:class:`NextPage` constructor can take an url or a Request object.
>>> class Page(HTMLPage):
... def iter_values(self):
... for el in self.doc.xpath('//li'):
... yield el.text
... for next in self.doc.xpath('//a'):
... raise NextPage(next.attrib['href'])
...
>>> class Browser(PagesBrowser):
... BASEURL = 'http://people.symlink.me'
... list = URL('/~rom1/projects/weboob/list-(?P<pagenum>\d+).html', Page)
...
>>> b = Browser()
>>> b.list.go(pagenum=1)
>>> list(b.pagination(lambda: b.page.iter_values()))
['One', 'Two', 'Three', 'Four']
"""
while True:
try:
for r in func(*args, **kwargs):
yield r
except NextPage as e:
self.location(e.request)
else:
return
def need_login(func):
"""
Decorator used to require to be logged to access to this function.
"""
def inner(browser, *args, **kwargs):
if browser.page is None or not browser.page.logged:
browser.do_login()
return func(browser, *args, **kwargs)
return inner
class LoginBrowser(PagesBrowser):
"""
A browser which supports login.
"""
def __init__(self, username, password, *args, **kwargs):
super(LoginBrowser, self).__init__(*args, **kwargs)
self.username = username
self.password = password
def do_login(self):
"""
Abstract method to implement to login on website.
It is call when a login is needed.
"""
raise NotImplementedError()

View file

@ -23,7 +23,7 @@ from copy import deepcopy
from weboob.tools.log import getLogger
from weboob.tools.ordereddict import OrderedDict
from weboob.browser2.page import NextPage
from weboob.browser.pages import NextPage
from .filters.standard import _Filter, CleanText
from .filters.html import AttributeNotFound, XPathNotFound
@ -38,6 +38,16 @@ class DataError(Exception):
"""
def method(klass):
"""
Class-decorator to call it as a method.
"""
def inner(self, *args, **kwargs):
return klass(self)(*args, **kwargs)
return inner
class AbstractElement(object):
_creation_counter = 0
@ -290,3 +300,5 @@ class TableElement(ListElement):
def get_colnum(self, name):
return self._cols.get(name, None)

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from requests.exceptions import HTTPError
from weboob.core.exceptions import BrowserHTTPError, BrowserHTTPNotFound
from weboob.exceptions import BrowserHTTPError, BrowserHTTPNotFound
class HTTPNotFound(HTTPError, BrowserHTTPNotFound):

View file

@ -21,8 +21,8 @@
import re
from ast import literal_eval
from weboob.browser2.filters.standard import Filter, Regexp, RegexpError
from weboob.core.exceptions import ParseError
from weboob.browser.filters.standard import Filter, Regexp, RegexpError
from weboob.exceptions import ParseError
__all__ = ['JSPayload', 'JSVar']

View file

@ -28,8 +28,8 @@ from dateutil.parser import parse as parse_date
from weboob.capabilities.base import empty
from weboob.tools.compat import basestring
from weboob.core.exceptions import ParseError
from weboob.browser2 import URL
from weboob.exceptions import ParseError
from weboob.browser.url import URL
from weboob.tools.log import getLogger
class NoDefault(object):
@ -251,7 +251,7 @@ class TableCell(_Filter):
For example:
>>> from weboob.capabilities.bank import Transaction
>>> from weboob.browser2.elements import TableElement, ItemElement
>>> from weboob.browser.elements import TableElement, ItemElement
>>> class table(TableElement):
... head_xpath = '//table/thead/th'
... item_xpath = '//table/tbody/tr'

344
weboob/browser/pages.py Normal file
View file

@ -0,0 +1,344 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
import warnings
from io import BytesIO
import requests
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.compat import basestring
from weboob.tools.log import getLogger
def pagination(func):
r"""
This helper decorator can be used to handle pagination pages easily.
When the called function raises an exception :class:`NextPage`, it goes on
the wanted page and recall the function.
:class:`NextPage` constructor can take an url or a Request object.
>>> class Page(HTMLPage):
... @pagination
... def iter_values(self):
... for el in self.doc.xpath('//li'):
... yield el.text
... for next in self.doc.xpath('//a'):
... raise NextPage(next.attrib['href'])
...
>>> class Browser(PagesBrowser):
... BASEURL = 'http://people.symlink.me'
... list = URL('/~rom1/projects/weboob/list-(?P<pagenum>\d+).html', Page)
...
>>> b = Browser()
>>> b.list.go(pagenum=1)
>>> list(b.page.iter_values())
['One', 'Two', 'Three', 'Four']
"""
def inner(page, *args, **kwargs):
while True:
try:
for r in func(page, *args, **kwargs):
yield r
except NextPage as e:
result = page.browser.location(e.request)
page = result.page
else:
return
return inner
class NextPage(Exception):
"""
Exception used for example in a Page to tell PagesBrowser.pagination to
go on the next page.
See :meth:`PagesBrowser.pagination` or decorator :func:`pagination`.
"""
def __init__(self, request):
super(NextPage, self).__init__()
self.request = request
class Page(object):
"""
Base page.
"""
logged = False
def __init__(self, browser, response, params=None):
self.browser = browser
self.logger = getLogger(self.__class__.__name__.lower(), browser.logger)
self.response = response
self.url = self.response.url
self.params = params
def on_load(self):
"""
Event called when browser loads this page.
"""
def on_leave(self):
"""
Event called when browser leaves this page.
"""
class FormNotFound(Exception):
"""
Raised when :meth:`HTMLPage.get_form` can't find a form.
"""
class FormSubmitWarning(UserWarning):
"""
A form has more than one submit element selected, and will likely
generate an invalid request.
"""
class Form(OrderedDict):
"""
Represents a form of an HTML page.
It is used as a dict with pre-filled values from HTML. You can set new
values as strings by setting an item value.
submit_el allows you to only consider one submit button (which is what
browsers do). If set to None, it takes all of them, and if set to False,
it takes none.
"""
def __init__(self, page, el, submit_el=None):
super(Form, self).__init__()
self.page = page
self.el = el
self.submit_el = submit_el
self.method = el.attrib.get('method', 'GET')
self.url = el.attrib.get('action', page.url)
self.name = el.attrib.get('name', '')
submits = 0
for inp in el.xpath('.//input | .//select | .//textarea'):
try:
name = inp.attrib['name']
except KeyError:
continue
try:
if inp.attrib['type'] in ('checkbox', 'radio') and 'checked' not in inp.attrib:
continue
except KeyError:
pass
try:
if inp.attrib['type'] == 'submit':
if self.submit_el is not None and inp is not self.submit_el:
continue
else:
submits += 1
except KeyError:
pass
if inp.tag == 'select':
options = inp.xpath('.//option[@selected]')
if len(options) == 0:
options = inp.xpath('.//option')
if len(options) == 0:
value = u''
else:
value = options[0].attrib.get('value', options[0].text or u'')
else:
value = inp.attrib.get('value', inp.text or u'')
self[name] = value
if submits > 1:
warnings.warn('Form has more than one submit input, you should chose the correct one', FormSubmitWarning, stacklevel=3)
if self.submit_el is not None and self.submit_el is not False and submits == 0:
warnings.warn('Form had a submit element provided, but it was not found', FormSubmitWarning, stacklevel=3)
@property
def request(self):
"""
Get the Request object from the form.
"""
if self.method.lower() == 'get':
req = requests.Request(self.method, self.url, params=self)
else:
req = requests.Request(self.method, self.url, data=self)
req.headers.setdefault('Referer', self.page.url)
return req
def submit(self, **kwargs):
"""
Submit the form and tell browser to be located to the new page.
"""
kwargs.setdefault('data_encoding', self.page.encoding)
return self.page.browser.location(self.request, **kwargs)
class CsvPage(Page):
DIALECT = 'excel'
FMTPARAMS = {}
ENCODING = 'utf-8'
NEWLINES_HACK = True
"""
If True, will consider the first line as a header.
This means the rows will be also available as dictionnaries.
"""
HEADER = None
def __init__(self, browser, response, *args, **kwargs):
super(CsvPage, self).__init__(browser, response, *args, **kwargs)
content = response.content
encoding = self.ENCODING
if encoding == 'utf-16le':
content = content.decode('utf-16le')[1:].encode('utf-8')
encoding = 'utf-8'
if self.NEWLINES_HACK:
content = content.replace('\r\n', '\n').replace('\r', '\n')
fp = BytesIO(content)
self.doc = self.parse(fp, encoding)
def parse(self, data, encoding=None):
import csv
reader = csv.reader(data, dialect=self.DIALECT, **self.FMTPARAMS)
header = None
drows = []
rows = []
for i, row in enumerate(reader):
if self.HEADER and i+1 < self.HEADER:
continue
row = self.decode_row(row, encoding)
if header is None and self.HEADER:
header = row
else:
rows.append(row)
if header:
drow = {}
for i, cell in enumerate(row):
drow[header[i]] = cell
drows.append(drow)
return drows if header is not None else row
def decode_row(self, row, encoding):
if encoding:
return [unicode(cell, encoding) for cell in row]
else:
return row
class JsonPage(Page):
def __init__(self, browser, response, *args, **kwargs):
super(JsonPage, self).__init__(browser, response, *args, **kwargs)
from weboob.tools.json import json
self.doc = json.loads(response.text)
class XMLPage(Page):
ENCODING = None
"""
Force a page encoding.
It is recommended to use None for autodetection.
"""
def __init__(self, browser, response, *args, **kwargs):
super(XMLPage, self).__init__(browser, response, *args, **kwargs)
import lxml.etree as etree
parser = etree.XMLParser(encoding=self.ENCODING or response.encoding)
self.doc = etree.parse(BytesIO(response.content), parser)
class RawPage(Page):
def __init__(self, browser, response, *args, **kwargs):
super(RawPage, self).__init__(browser, response, *args, **kwargs)
self.doc = response.content
class HTMLPage(Page):
"""
HTML page.
"""
FORM_CLASS = Form
ENCODING = None
"""
Force a page encoding.
It is recommended to use None for autodetection.
"""
def __init__(self, browser, response, *args, **kwargs):
super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
self.encoding = self.ENCODING or response.encoding
import lxml.html as html
parser = html.HTMLParser(encoding=self.encoding)
self.doc = html.parse(BytesIO(response.content), parser)
def get_form(self, xpath='//form', name=None, nr=None, submit=None):
"""
Get a :class:`Form` object from a selector.
The form will be analyzed and its parameters extracted.
In the case there is more than one "submit" input, only one of
them should be chosen to generate the request.
:param xpath: xpath string to select forms
:type xpath: :class:`str`
:param name: if supplied, select a form with the given name
:type name: :class:`str`
:param nr: if supplied, take the n+1 th selected form
:type nr: :class:`int`
:param submit: if supplied, xpath string to select the submit \
element from the form
:type submit: :class:`str`
:rtype: :class:`Form`
:raises: :class:`FormNotFound` if no form is found
"""
i = 0
for el in self.doc.xpath(xpath):
if name is not None and el.attrib.get('name', '') != name:
continue
if nr is not None and i != nr:
i += 1
continue
if isinstance(submit, basestring):
submit_el = el.xpath(submit)[0]
else:
submit_el = submit
return self.FORM_CLASS(self, el, submit_el)
raise FormNotFound()
class LoggedPage(object):
"""
A page that only logged users can reach. If we did not get a redirection
for this page, we are sure that the login is still active.
Do not use this class for page we mixed content (logged/anonymous) or for
pages with a login form.
"""
logged = True

View file

@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012-2014 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
class Profile(object):
"""
A profile represents the way Browser should act.
Usually it is to mimic a real browser.
"""
def setup_session(self, session):
"""
Change default headers, set up hooks, etc.
Warning: Do not enable lzma, bzip or bzip2, sdch encodings
as python-requests does not support it yet.
Supported as of 2.2: gzip, deflate, compress.
In doubt, do not change the default Accept-Encoding header
of python-requests.
"""
raise NotImplementedError()
class Weboob(Profile):
"""
It's us!
Recommended for Weboob-friendly websites only.
"""
def __init__(self, version):
self.version = version
def setup_session(self, session):
session.headers['User-Agent'] = 'weboob/%s' % self.version
class Firefox(Profile):
"""
Try to mimic a specific version of Firefox.
Ideally, it should follow the current ESR Firefox:
https://www.mozilla.org/en-US/firefox/organizations/all.html
Do not change the Firefox version without checking the Gecko one!
"""
def setup_session(self, session):
"""
Set up headers for a standard Firefox request
(except for DNT which isn't on by default but is a good idea).
The goal is to be unidentifiable.
"""
# Replace all base requests headers
# https://developer.mozilla.org/en/Gecko_user_agent_string_reference
# https://bugzilla.mozilla.org/show_bug.cgi?id=572650
session.headers = {
'Accept-Language': 'en-us,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0',
'DNT': '1'}
class Wget(Profile):
"""
Common alternative user agent.
Some websites will give you a version with less JavaScript.
Some others could ban you (after all, wget is not a real browser).
"""
def __init__(self, version='1.11.4'):
self.version = version
def setup_session(self, session):
# Don't remove base headers, if websites want to block fake browsers,
# they will probably block any wget user agent anyway.
session.headers.update({
'Accept': '*/*',
'User-Agent': 'Wget/%s' % self.version})

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
# Inspired by: https://github.com/ross/requests-futures/blob/master/requests_futures/sessions.py
# XXX Licence issues ?
# XXX Licence issues?
try:
from concurrent.futures import ThreadPoolExecutor
@ -58,7 +58,6 @@ def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict):
class WeboobSession(Session):
def prepare_request(self, request):
"""Constructs a :class:`PreparedRequest <PreparedRequest>` for
transmission and returns it. The :class:`PreparedRequest` has settings
@ -99,8 +98,8 @@ class WeboobSession(Session):
)
return p
class FuturesSession(WeboobSession):
class FuturesSession(WeboobSession):
def __init__(self, executor=None, max_workers=2, *args, **kwargs):
"""Creates a FuturesSession

205
weboob/browser/url.py Normal file
View file

@ -0,0 +1,205 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
try:
from urllib.parse import unquote
except ImportError:
from urllib import unquote
import re
import requests
from weboob.tools.regex_helper import normalize
class UrlNotResolvable(Exception):
"""
Raised when trying to locate on an URL instance which url pattern is not resolvable as a real url.
"""
class URL(object):
"""
A description of an URL on the PagesBrowser website.
It takes one or several regexps to match urls, and an optional Page
class which is instancied by PagesBrowser.open if the page matches a regex.
"""
_creation_counter = 0
def __init__(self, *args):
self.urls = []
self.klass = None
self.browser = None
for arg in args:
if isinstance(arg, basestring):
self.urls.append(arg)
if isinstance(arg, type):
self.klass = arg
self._creation_counter = URL._creation_counter
URL._creation_counter += 1
def is_here(self, **kwargs):
"""
Returns True if the current page of browser matches this URL.
If arguments are provided, and only then, they are checked against the arguments
that were used to build the current page URL.
"""
assert self.klass is not None, "You can use this method only if there is a Page class handler."
if len(kwargs):
params = self.match(self.build(**kwargs)).groupdict()
else:
params = None
# XXX use unquote on current params values because if there are spaces
# or special characters in them, it is encoded only in but not in kwargs.
return self.browser.page and isinstance(self.browser.page, self.klass) \
and (params is None or params == dict([(k,unquote(v)) for k,v in self.browser.page.params.iteritems()]))
def stay_or_go(self, **kwargs):
"""
Request to go on this url only if we aren't already here.
Arguments are optional parameters for url.
>>> url = URL('http://exawple.org/(?P<pagename>).html')
>>> url.stay_or_go(pagename='index')
"""
if self.is_here(**kwargs):
return self.browser.page
return self.go(**kwargs)
def go(self, params=None, data=None, **kwargs):
"""
Request to go on this url.
Arguments are optional parameters for url.
>>> url = URL('http://exawple.org/(?P<pagename>).html')
>>> url.stay_or_go(pagename='index')
"""
r = self.browser.location(self.build(**kwargs), params=params, data=data)
return r.page or r
def open(self, params=None, data=None, **kwargs):
"""
Request to open on this url.
Arguments are optional parameters for url.
:param data: POST data
:type url: str or dict or None
>>> url = URL('http://exawple.org/(?P<pagename>).html')
>>> url.open(pagename='index')
"""
r = self.browser.open(self.build(**kwargs), params=params, data=data)
return r.page or r
def build(self, **kwargs):
"""
Build an url with the given arguments from URL's regexps.
:param param: Query string parameters
:rtype: :class:`str`
:raises: :class:`UrlNotResolvable` if unable to resolve a correct url with the given arguments.
"""
browser = kwargs.pop('browser', self.browser)
params = kwargs.pop('params', None)
patterns = []
for url in self.urls:
patterns += normalize(url)
for pattern, _ in patterns:
url = pattern
# only use full-name substitutions, to allow % in URLs
for kwkey in kwargs.keys(): # need to use keys() because of pop()
search = '%%(%s)s' % kwkey
if search in pattern:
url = url.replace(search, unicode(kwargs.pop(kwkey)))
# if there are named substitutions left, ignore pattern
if re.search('%\([A-z_]+\)s', url):
continue
# if not all kwargs were used
if len(kwargs):
continue
url = browser.absurl(url, base=True)
if params:
p = requests.models.PreparedRequest()
p.prepare_url(url, params)
url = p.url
return url
raise UrlNotResolvable('Unable to resolve URL with %r. Available are %s' % (kwargs, ', '.join([pattern for pattern, _ in patterns])))
def match(self, url, base=None):
"""
Check if the given url match this object.
"""
if base is None:
assert self.browser is not None
base = self.browser.BASEURL
for regex in self.urls:
if not re.match(r'^\w+://.*', regex):
regex = re.escape(base).rstrip('/') + '/' + regex.lstrip('/')
m = re.match(regex, url)
if m:
return m
def handle(self, response):
"""
Handle a HTTP response to get an instance of the klass if it matches.
"""
if self.klass is None:
return
m = self.match(response.url)
if m:
page = self.klass(self.browser, response, m.groupdict())
if hasattr(page, 'is_here'):
if callable(page.is_here):
if page.is_here():
return page
else:
assert isinstance(page.is_here, basestring)
if page.doc.xpath(page.is_here):
return page
else:
return page
def id2url(self, func):
r"""
Helper decorator to get an URL if the given first parameter is an ID.
"""
def inner(browser, id_or_url, *args, **kwargs):
if re.match('^https?://.*', id_or_url):
if not self.match(id_or_url, browser.BASEURL):
return
else:
id_or_url = self.build(id=id_or_url, browser=browser)
return func(browser, id_or_url, *args, **kwargs)
return inner

View file

@ -1,716 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
try:
from urllib.parse import unquote
except ImportError:
from urllib import unquote
import re
import warnings
from copy import deepcopy
from io import BytesIO
import requests
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.regex_helper import normalize
from weboob.tools.compat import basestring
from weboob.tools.log import getLogger
from .browser import DomainBrowser
class UrlNotResolvable(Exception):
"""
Raised when trying to locate on an URL instance which url pattern is not resolvable as a real url.
"""
class URL(object):
"""
A description of an URL on the PagesBrowser website.
It takes one or several regexps to match urls, and an optional Page
class which is instancied by PagesBrowser.open if the page matches a regex.
"""
_creation_counter = 0
def __init__(self, *args):
self.urls = []
self.klass = None
self.browser = None
for arg in args:
if isinstance(arg, basestring):
self.urls.append(arg)
if isinstance(arg, type):
self.klass = arg
self._creation_counter = URL._creation_counter
URL._creation_counter += 1
def is_here(self, **kwargs):
"""
Returns True if the current page of browser matches this URL.
If arguments are provided, and only then, they are checked against the arguments
that were used to build the current page URL.
"""
assert self.klass is not None, "You can use this method only if there is a Page class handler."
if len(kwargs):
params = self.match(self.build(**kwargs)).groupdict()
else:
params = None
# XXX use unquote on current params values because if there are spaces
# or special characters in them, it is encoded only in but not in kwargs.
return self.browser.page and isinstance(self.browser.page, self.klass) \
and (params is None or params == dict([(k,unquote(v)) for k,v in self.browser.page.params.iteritems()]))
def stay_or_go(self, **kwargs):
"""
Request to go on this url only if we aren't already here.
Arguments are optional parameters for url.
>>> url = URL('http://exawple.org/(?P<pagename>).html')
>>> url.stay_or_go(pagename='index')
"""
if self.is_here(**kwargs):
return self.browser.page
return self.go(**kwargs)
def go(self, params=None, data=None, **kwargs):
"""
Request to go on this url.
Arguments are optional parameters for url.
>>> url = URL('http://exawple.org/(?P<pagename>).html')
>>> url.stay_or_go(pagename='index')
"""
r = self.browser.location(self.build(**kwargs), params=params, data=data)
return r.page or r
def open(self, params=None, data=None, **kwargs):
"""
Request to open on this url.
Arguments are optional parameters for url.
:param data: POST data
:type url: str or dict or None
>>> url = URL('http://exawple.org/(?P<pagename>).html')
>>> url.open(pagename='index')
"""
r = self.browser.open(self.build(**kwargs), params=params, data=data)
return r.page or r
def build(self, **kwargs):
"""
Build an url with the given arguments from URL's regexps.
:param param: Query string parameters
:rtype: :class:`str`
:raises: :class:`UrlNotResolvable` if unable to resolve a correct url with the given arguments.
"""
browser = kwargs.pop('browser', self.browser)
params = kwargs.pop('params', None)
patterns = []
for url in self.urls:
patterns += normalize(url)
for pattern, _ in patterns:
url = pattern
# only use full-name substitutions, to allow % in URLs
for kwkey in kwargs.keys(): # need to use keys() because of pop()
search = '%%(%s)s' % kwkey
if search in pattern:
url = url.replace(search, unicode(kwargs.pop(kwkey)))
# if there are named substitutions left, ignore pattern
if re.search('%\([A-z_]+\)s', url):
continue
# if not all kwargs were used
if len(kwargs):
continue
url = browser.absurl(url, base=True)
if params:
p = requests.models.PreparedRequest()
p.prepare_url(url, params)
url = p.url
return url
raise UrlNotResolvable('Unable to resolve URL with %r. Available are %s' % (kwargs, ', '.join([pattern for pattern, _ in patterns])))
def match(self, url, base=None):
"""
Check if the given url match this object.
"""
if base is None:
assert self.browser is not None
base = self.browser.BASEURL
for regex in self.urls:
if not re.match(r'^\w+://.*', regex):
regex = re.escape(base).rstrip('/') + '/' + regex.lstrip('/')
m = re.match(regex, url)
if m:
return m
def handle(self, response):
"""
Handle a HTTP response to get an instance of the klass if it matches.
"""
if self.klass is None:
return
m = self.match(response.url)
if m:
page = self.klass(self.browser, response, m.groupdict())
if hasattr(page, 'is_here'):
if callable(page.is_here):
if page.is_here():
return page
else:
assert isinstance(page.is_here, basestring)
if page.doc.xpath(page.is_here):
return page
else:
return page
def id2url(self, func):
r"""
Helper decorator to get an URL if the given first parameter is an ID.
"""
def inner(browser, id_or_url, *args, **kwargs):
if re.match('^https?://.*', id_or_url):
if not self.match(id_or_url, browser.BASEURL):
return
else:
id_or_url = self.build(id=id_or_url, browser=browser)
return func(browser, id_or_url, *args, **kwargs)
return inner
class _PagesBrowserMeta(type):
"""
Private meta-class used to keep order of URLs instances of PagesBrowser.
"""
def __new__(mcs, name, bases, attrs):
urls = [(url_name, attrs.pop(url_name)) for url_name, obj in attrs.items() if isinstance(obj, URL)]
urls.sort(key=lambda x: x[1]._creation_counter)
new_class = super(_PagesBrowserMeta, mcs).__new__(mcs, name, bases, attrs)
if new_class._urls is None:
new_class._urls = OrderedDict()
else:
new_class._urls = deepcopy(new_class._urls)
new_class._urls.update(urls)
return new_class
class PagesBrowser(DomainBrowser):
r"""
A browser which works pages and keep state of navigation.
To use it, you have to derive it and to create URL objects as class
attributes. When open() or location() are called, if the url matches
one of URL objects, it returns a Page object. In case of location(), it
stores it in self.page.
Example:
>>> class HomePage(Page):
... pass
...
>>> class ListPage(Page):
... pass
...
>>> class MyBrowser(PagesBrowser):
... BASEURL = 'http://example.org'
... home = URL('/(index\.html)?', HomePage)
... list = URL('/list\.html', ListPage)
...
You can then use URL instances to go on pages.
"""
_urls = None
__metaclass__ = _PagesBrowserMeta
def __getattr__(self, name):
if self._urls is not None and name in self._urls:
return self._urls[name]
else:
raise AttributeError("'%s' object has no attribute '%s'" % (
self.__class__.__name__, name))
def __init__(self, *args, **kwargs):
super(PagesBrowser, self).__init__(*args, **kwargs)
self.page = None
self._urls = deepcopy(self._urls)
for url in self._urls.itervalues():
url.browser = self
def open(self, *args, **kwargs):
"""
Same method than
:meth:`weboob.browser2.browser.DomainBrowser.open`, but the
response contains an attribute `page` if the url matches any
:class:`URL` object.
"""
callback = kwargs.pop('callback', lambda response: response)
# Have to define a callback to seamlessly process synchronous and
# asynchronous requests, see :meth:`Browser.open` and its `async`
# and `callback` params.
def internal_callback(response):
# Try to handle the response page with an URL instance.
response.page = None
for url in self._urls.itervalues():
page = url.handle(response)
if page is not None:
self.logger.debug('Handle %s with %s' % (response.url, page.__class__.__name__))
response.page = page
break
if response.page is None:
self.logger.debug('Unable to handle %s' % response.url)
return callback(response)
return super(PagesBrowser, self).open(callback=internal_callback, *args, **kwargs)
def location(self, *args, **kwargs):
"""
Same method than
:meth:`weboob.browser2.browser.Browser.location`, but if the
url matches any :class:`URL` object, an attribute `page` is added to
response, and the attribute :attr:`PagesBrowser.page` is set.
"""
if self.page is not None:
# Call leave hook.
self.page.on_leave()
response = self.open(*args, **kwargs)
self.response = response
self.page = response.page
self.url = response.url
if self.page is not None:
# Call load hook.
self.page.on_load()
# Returns self.response in case on_load recalls location()
return self.response
def pagination(self, func, *args, **kwargs):
r"""
This helper function can be used to handle pagination pages easily.
When the called function raises an exception :class:`NextPage`, it goes
on the wanted page and recall the function.
:class:`NextPage` constructor can take an url or a Request object.
>>> class Page(HTMLPage):
... def iter_values(self):
... for el in self.doc.xpath('//li'):
... yield el.text
... for next in self.doc.xpath('//a'):
... raise NextPage(next.attrib['href'])
...
>>> class Browser(PagesBrowser):
... BASEURL = 'http://people.symlink.me'
... list = URL('/~rom1/projects/weboob/list-(?P<pagenum>\d+).html', Page)
...
>>> b = Browser()
>>> b.list.go(pagenum=1)
>>> list(b.pagination(lambda: b.page.iter_values()))
['One', 'Two', 'Three', 'Four']
"""
while True:
try:
for r in func(*args, **kwargs):
yield r
except NextPage as e:
self.location(e.request)
else:
return
def pagination(func):
r"""
This helper decorator can be used to handle pagination pages easily.
When the called function raises an exception :class:`NextPage`, it goes on
the wanted page and recall the function.
:class:`NextPage` constructor can take an url or a Request object.
>>> class Page(HTMLPage):
... @pagination
... def iter_values(self):
... for el in self.doc.xpath('//li'):
... yield el.text
... for next in self.doc.xpath('//a'):
... raise NextPage(next.attrib['href'])
...
>>> class Browser(PagesBrowser):
... BASEURL = 'http://people.symlink.me'
... list = URL('/~rom1/projects/weboob/list-(?P<pagenum>\d+).html', Page)
...
>>> b = Browser()
>>> b.list.go(pagenum=1)
>>> list(b.page.iter_values())
['One', 'Two', 'Three', 'Four']
"""
def inner(page, *args, **kwargs):
while True:
try:
for r in func(page, *args, **kwargs):
yield r
except NextPage as e:
result = page.browser.location(e.request)
page = result.page
else:
return
return inner
class NextPage(Exception):
"""
Exception used for example in a Page to tell PagesBrowser.pagination to
go on the next page.
See :meth:`PagesBrowser.pagination` or decorator :func:`pagination`.
"""
def __init__(self, request):
super(NextPage, self).__init__()
self.request = request
def need_login(func):
"""
Decorator used to require to be logged to access to this function.
"""
def inner(browser, *args, **kwargs):
if browser.page is None or not browser.page.logged:
browser.do_login()
return func(browser, *args, **kwargs)
return inner
class LoginBrowser(PagesBrowser):
"""
A browser which supports login.
"""
def __init__(self, username, password, *args, **kwargs):
super(LoginBrowser, self).__init__(*args, **kwargs)
self.username = username
self.password = password
def do_login(self):
"""
Abstract method to implement to login on website.
It is call when a login is needed.
"""
raise NotImplementedError()
class Page(object):
"""
Base page.
"""
logged = False
def __init__(self, browser, response, params=None):
self.browser = browser
self.logger = getLogger(self.__class__.__name__.lower(), browser.logger)
self.response = response
self.url = self.response.url
self.params = params
def on_load(self):
"""
Event called when browser loads this page.
"""
def on_leave(self):
"""
Event called when browser leaves this page.
"""
class FormNotFound(Exception):
"""
Raised when :meth:`HTMLPage.get_form` can't find a form.
"""
class FormSubmitWarning(UserWarning):
"""
A form has more than one submit element selected, and will likely
generate an invalid request.
"""
class Form(OrderedDict):
"""
Represents a form of an HTML page.
It is used as a dict with pre-filled values from HTML. You can set new
values as strings by setting an item value.
submit_el allows you to only consider one submit button (which is what
browsers do). If set to None, it takes all of them, and if set to False,
it takes none.
"""
def __init__(self, page, el, submit_el=None):
super(Form, self).__init__()
self.page = page
self.el = el
self.submit_el = submit_el
self.method = el.attrib.get('method', 'GET')
self.url = el.attrib.get('action', page.url)
self.name = el.attrib.get('name', '')
submits = 0
for inp in el.xpath('.//input | .//select | .//textarea'):
try:
name = inp.attrib['name']
except KeyError:
continue
try:
if inp.attrib['type'] in ('checkbox', 'radio') and 'checked' not in inp.attrib:
continue
except KeyError:
pass
try:
if inp.attrib['type'] == 'submit':
if self.submit_el is not None and inp is not self.submit_el:
continue
else:
submits += 1
except KeyError:
pass
if inp.tag == 'select':
options = inp.xpath('.//option[@selected]')
if len(options) == 0:
options = inp.xpath('.//option')
if len(options) == 0:
value = u''
else:
value = options[0].attrib.get('value', options[0].text or u'')
else:
value = inp.attrib.get('value', inp.text or u'')
self[name] = value
if submits > 1:
warnings.warn('Form has more than one submit input, you should chose the correct one', FormSubmitWarning, stacklevel=3)
if self.submit_el is not None and self.submit_el is not False and submits == 0:
warnings.warn('Form had a submit element provided, but it was not found', FormSubmitWarning, stacklevel=3)
@property
def request(self):
"""
Get the Request object from the form.
"""
if self.method.lower() == 'get':
req = requests.Request(self.method, self.url, params=self)
else:
req = requests.Request(self.method, self.url, data=self)
req.headers.setdefault('Referer', self.page.url)
return req
def submit(self, **kwargs):
"""
Submit the form and tell browser to be located to the new page.
"""
kwargs.setdefault('data_encoding', self.page.encoding)
return self.page.browser.location(self.request, **kwargs)
class CsvPage(Page):
DIALECT = 'excel'
FMTPARAMS = {}
ENCODING = 'utf-8'
NEWLINES_HACK = True
"""
If True, will consider the first line as a header.
This means the rows will be also available as dictionnaries.
"""
HEADER = None
def __init__(self, browser, response, *args, **kwargs):
super(CsvPage, self).__init__(browser, response, *args, **kwargs)
content = response.content
encoding = self.ENCODING
if encoding == 'utf-16le':
content = content.decode('utf-16le')[1:].encode('utf-8')
encoding = 'utf-8'
if self.NEWLINES_HACK:
content = content.replace('\r\n', '\n').replace('\r', '\n')
fp = BytesIO(content)
self.doc = self.parse(fp, encoding)
def parse(self, data, encoding=None):
import csv
reader = csv.reader(data, dialect=self.DIALECT, **self.FMTPARAMS)
header = None
drows = []
rows = []
for i, row in enumerate(reader):
if self.HEADER and i+1 < self.HEADER:
continue
row = self.decode_row(row, encoding)
if header is None and self.HEADER:
header = row
else:
rows.append(row)
if header:
drow = {}
for i, cell in enumerate(row):
drow[header[i]] = cell
drows.append(drow)
return drows if header is not None else row
def decode_row(self, row, encoding):
if encoding:
return [unicode(cell, encoding) for cell in row]
else:
return row
class JsonPage(Page):
def __init__(self, browser, response, *args, **kwargs):
super(JsonPage, self).__init__(browser, response, *args, **kwargs)
from weboob.tools.json import json
self.doc = json.loads(response.text)
class XMLPage(Page):
ENCODING = None
"""
Force a page encoding.
It is recommended to use None for autodetection.
"""
def __init__(self, browser, response, *args, **kwargs):
super(XMLPage, self).__init__(browser, response, *args, **kwargs)
import lxml.etree as etree
parser = etree.XMLParser(encoding=self.ENCODING or response.encoding)
self.doc = etree.parse(BytesIO(response.content), parser)
class RawPage(Page):
def __init__(self, browser, response, *args, **kwargs):
super(RawPage, self).__init__(browser, response, *args, **kwargs)
self.doc = response.content
class HTMLPage(Page):
"""
HTML page.
"""
FORM_CLASS = Form
ENCODING = None
"""
Force a page encoding.
It is recommended to use None for autodetection.
"""
def __init__(self, browser, response, *args, **kwargs):
super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
self.encoding = self.ENCODING or response.encoding
import lxml.html as html
parser = html.HTMLParser(encoding=self.encoding)
self.doc = html.parse(BytesIO(response.content), parser)
def get_form(self, xpath='//form', name=None, nr=None, submit=None):
"""
Get a :class:`Form` object from a selector.
The form will be analyzed and its parameters extracted.
In the case there is more than one "submit" input, only one of
them should be chosen to generate the request.
:param xpath: xpath string to select forms
:type xpath: :class:`str`
:param name: if supplied, select a form with the given name
:type name: :class:`str`
:param nr: if supplied, take the n+1 th selected form
:type nr: :class:`int`
:param submit: if supplied, xpath string to select the submit \
element from the form
:type submit: :class:`str`
:rtype: :class:`Form`
:raises: :class:`FormNotFound` if no form is found
"""
i = 0
for el in self.doc.xpath(xpath):
if name is not None and el.attrib.get('name', '') != name:
continue
if nr is not None and i != nr:
i += 1
continue
if isinstance(submit, basestring):
submit_el = el.xpath(submit)[0]
else:
submit_el = submit
return self.FORM_CLASS(self, el, submit_el)
raise FormNotFound()
def method(klass):
"""
Class-decorator to call it as a method.
"""
def inner(self, *args, **kwargs):
return klass(self)(*args, **kwargs)
return inner
class LoggedPage(object):
"""
A page that only logged users can reach. If we did not get a redirection
for this page, we are sure that the login is still active.
Do not use this class for page we mixed content (logged/anonymous) or for
pages with a login form.
"""
logged = True

View file

@ -31,7 +31,7 @@ from contextlib import closing
from compileall import compile_dir
from io import BytesIO
from weboob.core.exceptions import BrowserHTTPError, BrowserHTTPNotFound
from weboob.exceptions import BrowserHTTPError, BrowserHTTPNotFound
from .modules import LoadedModule
from weboob.tools.log import getLogger
from weboob.tools.misc import to_unicode
@ -442,7 +442,8 @@ class Repositories(object):
self.load()
def load_browser(self):
from weboob.browser2.browser import Browser, Weboob as WeboobProfile
from weboob.browser.browsers import Browser
from weboob.browser.profiles import Weboob as WeboobProfile
class WeboobBrowser(Browser):
PROFILE = WeboobProfile(self.version)
if self.browser is None:

View file

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser.browser import BrowserIncorrectPassword, BrowserBanned, \
from weboob.deprecated.browser.browser import BrowserIncorrectPassword, BrowserBanned, \
BrowserUnavailable, BrowserRetry, \
BrowserHTTPNotFound, BrowserHTTPError, \
Page, Browser, BrokenPageError, \

View file

@ -49,12 +49,12 @@ from contextlib import closing
from gzip import GzipFile
import warnings
from weboob.core.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserPasswordExpired, BrowserForbidden, BrowserBanned, BrowserHTTPNotFound, BrowserHTTPError, FormFieldConversionWarning, BrowserSSLError
from weboob.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserPasswordExpired, BrowserForbidden, BrowserBanned, BrowserHTTPNotFound, BrowserHTTPError, FormFieldConversionWarning, BrowserSSLError
from weboob.tools.decorators import retry
from weboob.tools.log import getLogger
from weboob.tools.mech import ClientForm
from weboob.deprecated.mech import ClientForm
ControlNotFoundError = ClientForm.ControlNotFoundError
from weboob.tools.parsers import get_parser
from weboob.deprecated.browser.parsers import get_parser
__all__ = ['BrowserIncorrectPassword', 'BrowserForbidden', 'BrowserBanned', 'BrowserUnavailable', 'BrowserRetry',
'BrowserPasswordExpired', 'BrowserHTTPNotFound', 'BrowserHTTPError', 'BrokenPageError', 'Page',
@ -134,7 +134,7 @@ class StandardBrowser(mechanize.Browser):
:param firefox_cookies: path to cookies sqlite file
:type firefox_cookies: str
:param parser: parser to use on HTML files
:type parser: :class:`weboob.tools.parsers.iparser.IParser`
:type parser: :class:`weboob.deprecated.browser.parsers.iparser.IParser`
:param history: history manager; default value is an object which
does not keep history
:type history: object
@ -434,7 +434,7 @@ class Browser(StandardBrowser):
:param firefox_cookies: path to cookies sqlite file
:type firefox_cookies: str
:param parser: parser to use on HTML files
:type parser: :class:`weboob.tools.parsers.iparser.IParser`
:type parser: :class:`weboob.deprecated.browser.parsers.iparser.IParser`
:param history: history manager; default value is an object which
does not keep history
:type history: object

View file

@ -64,7 +64,7 @@ class LxmlParser(IParser):
"""
Select one or many elements from an element, using lxml cssselect by default.
Raises :class:`weboob.tools.browser.browser.BrokenPageError` if not found.
Raises :class:`weboob.deprecated.browser.browser.BrokenPageError` if not found.
:param element: element on which to apply selector
:type element: object

View file

@ -32,7 +32,7 @@ from weboob.capabilities.base import ConversionWarning, BaseObject
from weboob.core import Weboob, CallErrors
from weboob.core.backendscfg import BackendsConfig
from weboob.tools.config.iconfig import ConfigError
from weboob.core.exceptions import FormFieldConversionWarning
from weboob.exceptions import FormFieldConversionWarning
from weboob.tools.log import createColoredFormatter, getLogger, DebugFilter, settings as log_settings
from weboob.tools.misc import to_unicode
from .results import ResultsConditionError

View file

@ -32,7 +32,7 @@ from weboob.capabilities.account import CapAccount, Account, AccountRegisterErro
from weboob.core.backendscfg import BackendAlreadyExists
from weboob.core.modules import ModuleLoadError
from weboob.core.repositories import ModuleInstallError
from weboob.core.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserForbidden, BrowserSSLError
from weboob.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserForbidden, BrowserSSLError
from weboob.tools.value import Value, ValueBool, ValueFloat, ValueInt, ValueBackendPassword
from weboob.tools.misc import to_unicode
from weboob.tools.ordereddict import OrderedDict

View file

@ -34,7 +34,7 @@ from weboob.core.ouiboube import Weboob, VersionsMismatchError
from weboob.core.scheduler import IScheduler
from weboob.core.repositories import ModuleInstallError
from weboob.tools.config.iconfig import ConfigError
from weboob.core.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserForbidden
from weboob.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserForbidden
from weboob.tools.value import ValueInt, ValueBool, ValueBackendPassword
from weboob.tools.misc import to_unicode
from weboob.capabilities import UserError

View file

@ -27,9 +27,9 @@ from weboob.capabilities import NotAvailable, NotLoaded
from weboob.tools.misc import to_unicode
from weboob.tools.log import getLogger
from weboob.core.exceptions import ParseError
from weboob.browser2.elements import TableElement, ItemElement
from weboob.browser2.filters.standard import Filter, CleanText, CleanDecimal, TableCell
from weboob.exceptions import ParseError
from weboob.browser.elements import TableElement, ItemElement
from weboob.browser.filters.standard import Filter, CleanText, CleanDecimal, TableCell
__all__ = ['FrenchTransaction', 'AmericanTransaction']

View file

@ -23,7 +23,7 @@ import re
from weboob.capabilities.gallery import CapGallery, BaseGallery, BaseImage
from weboob.tools.backend import Module
from weboob.tools.browser import Browser, Page
from weboob.deprecated.browser import Browser, Page
__all__ = ['GenericComicReaderModule']

View file

@ -17,8 +17,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import Page
from weboob.tools.browser import BrokenPageError
from weboob.deprecated.browser import Page
from weboob.deprecated.browser import BrokenPageError
from lxml.etree import Comment