rename things related to browsers

weboob.tools.browser -> weboob.deprecated.browser weboob.tools.parsers -> weboob.deprecated.browser.parsers weboob.tools.mech -> weboob.deprecated.mech weboob.browser2 -> weboob.browser weboob.core.exceptions -> weboob.exceptions Also, the new tree for browser2 is: weboob.browser: import weboob.browser.browsers.* and weboob.browser.url.* weboob.browser.browsers: all browsers (including PagesBrowser and LoginBrowser) weboob.browser.url: the URL class weboob.browser.profiles: all Profile classes weboob.browser.sessions: WeboobSession and FuturesSession weboob.browser.cookies: that's a cookies thing weboob.browser.pages: all Page and derivated classes, and Form class weboob.browser.exceptions: specific browser exceptions weboob.browser.elements: AbstractElement classes, and 'method' decorator weboob.browser.filters.*: all filters
2014-10-07 00:23:18 +02:00 · 2014-10-07 00:23:18 +02:00 · d61e15cf84
commit d61e15cf84
parent 1f95e7631f
396 changed files with 1442 additions and 1382 deletions
--- a/weboob/browser2/init.py
+++ b/weboob/browser2/init.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2012 Laurent Bachelier
+# Copyright(C) 2012-2014 Laurent Bachelier
 #
 # This file is part of weboob.
 #
@ -17,9 +17,9 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

-from .browser import Browser, DomainBrowser, Wget, Firefox, UrlNotAllowed, Profile
-from .page import PagesBrowser, Page, URL, HTMLPage, LoginBrowser, need_login, JsonPage, LoggedPage, XMLPage
+from .browsers import Browser, DomainBrowser, UrlNotAllowed, PagesBrowser, LoginBrowser, need_login
+from .url import URL


-__all__ = ['Browser', 'DomainBrowser', 'Wget', 'Firefox', 'UrlNotAllowed', 'Profile', 'XMLPage',
-           'PagesBrowser', 'Page', 'URL', 'HTMLPage', 'LoginBrowser', 'need_login', 'JsonPage', 'LoggedPage']
+__all__ = ['Browser', 'DomainBrowser', 'UrlNotAllowed', 'PagesBrowser', 'URL',
+           'LoginBrowser', 'need_login']
--- a/weboob/browser/browsers.py
+++ b/weboob/browser/browsers.py
@ -26,6 +26,7 @@ except ImportError:
    from urlparse import urlparse, urljoin
 import os
 import sys
+from copy import deepcopy

 try:
    import requests
@ -35,85 +36,14 @@ except ImportError:
    raise ImportError('Please install python-requests >= 2.0')

 from weboob.tools.log import getLogger
+from weboob.tools.ordereddict import OrderedDict

 from .cookies import WeboobCookieJar
 from .exceptions import HTTPNotFound, ClientError, ServerError
 from .sessions import FuturesSession
-
-
-class Profile(object):
-    """
-    A profile represents the way Browser should act.
-    Usually it is to mimic a real browser.
-    """
-
-    def setup_session(self, session):
-        """
-        Change default headers, set up hooks, etc.
-
-        Warning: Do not enable lzma, bzip or bzip2, sdch encodings
-        as python-requests does not support it yet.
-        Supported as of 2.2: gzip, deflate, compress.
-        In doubt, do not change the default Accept-Encoding header
-        of python-requests.
-        """
-        raise NotImplementedError()
-
-
-class Weboob(Profile):
-    """
-    It's us!
-    Recommended for Weboob-friendly websites only.
-    """
-
-    def __init__(self, version):
-        self.version = version
-
-    def setup_session(self, session):
-        session.headers['User-Agent'] = 'weboob/%s' % self.version
-
-
-class Firefox(Profile):
-    """
-    Try to mimic a specific version of Firefox.
-    Ideally, it should follow the current ESR Firefox:
-    https://www.mozilla.org/en-US/firefox/organizations/all.html
-    Do not change the Firefox version without checking the Gecko one!
-    """
-
-    def setup_session(self, session):
-        """
-        Set up headers for a standard Firefox request
-        (except for DNT which isn't on by default but is a good idea).
-
-        The goal is to be unidentifiable.
-        """
-        # Replace all base requests headers
-        # https://developer.mozilla.org/en/Gecko_user_agent_string_reference
-        # https://bugzilla.mozilla.org/show_bug.cgi?id=572650
-        session.headers = {
-            'Accept-Language': 'en-us,en;q=0.5',
-            'Accept-Encoding': 'gzip, deflate',
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0',
-            'DNT': '1'}
-
-
-class Wget(Profile):
-    """
-    Common alternative user agent.
-    Some websites will give you a version with less JavaScript.
-    Some others could ban you (after all, wget is not a real browser).
-    """
-    def __init__(self, version='1.11.4'):
-        self.version = version
-
-    def setup_session(self, session):
-        # Don't remove base headers, if websites want to block fake browsers,
-        # they will probably block any wget user agent anyway.
-        session.headers.update({
-            'Accept': '*/*',
-            'User-Agent': 'Wget/%s' % self.version})
+from .profiles import Firefox
+from .pages import NextPage
+from .url import URL


 class Browser(object):
@ -579,3 +509,183 @@ class DomainBrowser(Browser):
        Go to the "home" page, usually the BASEURL.
        """
        return self.location(self.BASEURL or self.absurl('/'))
+
+
+class _PagesBrowserMeta(type):
+    """
+    Private meta-class used to keep order of URLs instances of PagesBrowser.
+    """
+    def __new__(mcs, name, bases, attrs):
+        urls = [(url_name, attrs.pop(url_name)) for url_name, obj in attrs.items() if isinstance(obj, URL)]
+        urls.sort(key=lambda x: x[1]._creation_counter)
+
+        new_class = super(_PagesBrowserMeta, mcs).__new__(mcs, name, bases, attrs)
+        if new_class._urls is None:
+            new_class._urls = OrderedDict()
+        else:
+            new_class._urls = deepcopy(new_class._urls)
+        new_class._urls.update(urls)
+        return new_class
+
+class PagesBrowser(DomainBrowser):
+    r"""
+    A browser which works pages and keep state of navigation.
+
+    To use it, you have to derive it and to create URL objects as class
+    attributes. When open() or location() are called, if the url matches
+    one of URL objects, it returns a Page object. In case of location(), it
+    stores it in self.page.
+
+    Example:
+
+    >>> class HomePage(Page):
+    ...     pass
+    ...
+    >>> class ListPage(Page):
+    ...     pass
+    ...
+    >>> class MyBrowser(PagesBrowser):
+    ...     BASEURL = 'http://example.org'
+    ...     home = URL('/(index\.html)?', HomePage)
+    ...     list = URL('/list\.html', ListPage)
+    ...
+
+    You can then use URL instances to go on pages.
+    """
+
+
+    _urls = None
+    __metaclass__ = _PagesBrowserMeta
+
+    def __getattr__(self, name):
+        if self._urls is not None and name in self._urls:
+            return self._urls[name]
+        else:
+            raise AttributeError("'%s' object has no attribute '%s'" % (
+                self.__class__.__name__, name))
+
+    def __init__(self, *args, **kwargs):
+        super(PagesBrowser, self).__init__(*args, **kwargs)
+
+        self.page = None
+        self._urls = deepcopy(self._urls)
+        for url in self._urls.itervalues():
+            url.browser = self
+
+    def open(self, *args, **kwargs):
+        """
+        Same method than
+        :meth:`weboob.browser.browsers.DomainBrowser.open`, but the
+        response contains an attribute `page` if the url matches any
+        :class:`URL` object.
+        """
+
+        callback = kwargs.pop('callback', lambda response: response)
+
+        # Have to define a callback to seamlessly process synchronous and
+        # asynchronous requests, see :meth:`Browser.open` and its `async`
+        # and `callback` params.
+        def internal_callback(response):
+            # Try to handle the response page with an URL instance.
+            response.page = None
+            for url in self._urls.itervalues():
+                page = url.handle(response)
+                if page is not None:
+                    self.logger.debug('Handle %s with %s' % (response.url, page.__class__.__name__))
+                    response.page = page
+                    break
+
+            if response.page is None:
+                self.logger.debug('Unable to handle %s' % response.url)
+
+            return callback(response)
+
+        return super(PagesBrowser, self).open(callback=internal_callback, *args, **kwargs)
+
+    def location(self, *args, **kwargs):
+        """
+        Same method than
+        :meth:`weboob.browser.browsers.Browser.location`, but if the
+        url matches any :class:`URL` object, an attribute `page` is added to
+        response, and the attribute :attr:`PagesBrowser.page` is set.
+        """
+        if self.page is not None:
+            # Call leave hook.
+            self.page.on_leave()
+
+        response = self.open(*args, **kwargs)
+
+        self.response = response
+        self.page = response.page
+        self.url = response.url
+
+        if self.page is not None:
+            # Call load hook.
+            self.page.on_load()
+
+        # Returns self.response in case on_load recalls location()
+        return self.response
+
+    def pagination(self, func, *args, **kwargs):
+        r"""
+        This helper function can be used to handle pagination pages easily.
+
+        When the called function raises an exception :class:`NextPage`, it goes
+        on the wanted page and recall the function.
+
+        :class:`NextPage` constructor can take an url or a Request object.
+
+        >>> class Page(HTMLPage):
+        ...     def iter_values(self):
+        ...         for el in self.doc.xpath('//li'):
+        ...             yield el.text
+        ...         for next in self.doc.xpath('//a'):
+        ...             raise NextPage(next.attrib['href'])
+        ...
+        >>> class Browser(PagesBrowser):
+        ...     BASEURL = 'http://people.symlink.me'
+        ...     list = URL('/~rom1/projects/weboob/list-(?P<pagenum>\d+).html', Page)
+        ...
+        >>> b = Browser()
+        >>> b.list.go(pagenum=1)
+        >>> list(b.pagination(lambda: b.page.iter_values()))
+        ['One', 'Two', 'Three', 'Four']
+        """
+        while True:
+            try:
+                for r in func(*args, **kwargs):
+                    yield r
+            except NextPage as e:
+                self.location(e.request)
+            else:
+                return
+
+
+def need_login(func):
+    """
+    Decorator used to require to be logged to access to this function.
+    """
+    def inner(browser, *args, **kwargs):
+        if browser.page is None or not browser.page.logged:
+            browser.do_login()
+        return func(browser, *args, **kwargs)
+
+    return inner
+
+
+class LoginBrowser(PagesBrowser):
+    """
+    A browser which supports login.
+    """
+    def __init__(self, username, password, *args, **kwargs):
+        super(LoginBrowser, self).__init__(*args, **kwargs)
+        self.username = username
+        self.password = password
+
+    def do_login(self):
+        """
+        Abstract method to implement to login on website.
+
+        It is call when a login is needed.
+        """
+        raise NotImplementedError()
--- a/weboob/browser2/cookies.py
+++ b/weboob/browser2/cookies.py
--- a/weboob/browser2/elements.py
+++ b/weboob/browser2/elements.py
@ -23,7 +23,7 @@ from copy import deepcopy

 from weboob.tools.log import getLogger
 from weboob.tools.ordereddict import OrderedDict
-from weboob.browser2.page import NextPage
+from weboob.browser.pages import NextPage

 from .filters.standard import _Filter, CleanText
 from .filters.html import AttributeNotFound, XPathNotFound
@ -38,6 +38,16 @@ class DataError(Exception):
    """


+def method(klass):
+    """
+    Class-decorator to call it as a method.
+    """
+    def inner(self, *args, **kwargs):
+        return klass(self)(*args, **kwargs)
+    return inner
+
+
+
 class AbstractElement(object):
    _creation_counter = 0

@ -290,3 +300,5 @@ class TableElement(ListElement):

    def get_colnum(self, name):
        return self._cols.get(name, None)
+
+
--- a/weboob/browser2/exceptions.py
+++ b/weboob/browser2/exceptions.py
@ -18,7 +18,7 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

 from requests.exceptions import HTTPError
-from weboob.core.exceptions import BrowserHTTPError, BrowserHTTPNotFound
+from weboob.exceptions import BrowserHTTPError, BrowserHTTPNotFound


 class HTTPNotFound(HTTPError, BrowserHTTPNotFound):
--- a/weboob/browser2/filters/init.py
+++ b/weboob/browser2/filters/init.py
--- a/weboob/browser2/filters/html.py
+++ b/weboob/browser2/filters/html.py
--- a/weboob/browser2/filters/javascript.py
+++ b/weboob/browser2/filters/javascript.py
@ -21,8 +21,8 @@
 import re
 from ast import literal_eval

-from weboob.browser2.filters.standard import Filter, Regexp, RegexpError
-from weboob.core.exceptions import ParseError
+from weboob.browser.filters.standard import Filter, Regexp, RegexpError
+from weboob.exceptions import ParseError


 __all__ = ['JSPayload', 'JSVar']
--- a/weboob/browser2/filters/json.py
+++ b/weboob/browser2/filters/json.py
--- a/weboob/browser2/filters/standard.py
+++ b/weboob/browser2/filters/standard.py
@ -28,8 +28,8 @@ from dateutil.parser import parse as parse_date

 from weboob.capabilities.base import empty
 from weboob.tools.compat import basestring
-from weboob.core.exceptions import ParseError
-from weboob.browser2 import URL
+from weboob.exceptions import ParseError
+from weboob.browser.url import URL
 from weboob.tools.log import getLogger

 class NoDefault(object):
@ -251,7 +251,7 @@ class TableCell(_Filter):
    For example:

    >>> from weboob.capabilities.bank import Transaction
-    >>> from weboob.browser2.elements import TableElement, ItemElement
+    >>> from weboob.browser.elements import TableElement, ItemElement
    >>> class table(TableElement):
    ...     head_xpath = '//table/thead/th'
    ...     item_xpath = '//table/tbody/tr'
--- a/weboob/browser/pages.py
+++ b/weboob/browser/pages.py
@ -0,0 +1,344 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from __future__ import absolute_import
+
+import warnings
+from io import BytesIO
+
+import requests
+
+from weboob.tools.ordereddict import OrderedDict
+from weboob.tools.compat import basestring
+
+from weboob.tools.log import getLogger
+
+
+def pagination(func):
+    r"""
+    This helper decorator can be used to handle pagination pages easily.
+
+    When the called function raises an exception :class:`NextPage`, it goes on
+    the wanted page and recall the function.
+
+    :class:`NextPage` constructor can take an url or a Request object.
+
+    >>> class Page(HTMLPage):
+    ...     @pagination
+    ...     def iter_values(self):
+    ...         for el in self.doc.xpath('//li'):
+    ...             yield el.text
+    ...         for next in self.doc.xpath('//a'):
+    ...             raise NextPage(next.attrib['href'])
+    ...
+    >>> class Browser(PagesBrowser):
+    ...     BASEURL = 'http://people.symlink.me'
+    ...     list = URL('/~rom1/projects/weboob/list-(?P<pagenum>\d+).html', Page)
+    ...
+    >>> b = Browser()
+    >>> b.list.go(pagenum=1)
+    >>> list(b.page.iter_values())
+    ['One', 'Two', 'Three', 'Four']
+    """
+    def inner(page, *args, **kwargs):
+        while True:
+            try:
+                for r in func(page, *args, **kwargs):
+                    yield r
+            except NextPage as e:
+                result = page.browser.location(e.request)
+                page = result.page
+            else:
+                return
+
+    return inner
+
+
+class NextPage(Exception):
+    """
+    Exception used for example in a Page to tell PagesBrowser.pagination to
+    go on the next page.
+
+    See :meth:`PagesBrowser.pagination` or decorator :func:`pagination`.
+    """
+    def __init__(self, request):
+        super(NextPage, self).__init__()
+        self.request = request
+
+
+class Page(object):
+    """
+    Base page.
+    """
+    logged = False
+
+    def __init__(self, browser, response, params=None):
+        self.browser = browser
+        self.logger = getLogger(self.__class__.__name__.lower(), browser.logger)
+        self.response = response
+        self.url = self.response.url
+        self.params = params
+
+    def on_load(self):
+        """
+        Event called when browser loads this page.
+        """
+
+    def on_leave(self):
+        """
+        Event called when browser leaves this page.
+        """
+
+class FormNotFound(Exception):
+    """
+    Raised when :meth:`HTMLPage.get_form` can't find a form.
+    """
+
+class FormSubmitWarning(UserWarning):
+    """
+    A form has more than one submit element selected, and will likely
+    generate an invalid request.
+    """
+
+class Form(OrderedDict):
+    """
+    Represents a form of an HTML page.
+
+    It is used as a dict with pre-filled values from HTML. You can set new
+    values as strings by setting an item value.
+
+    submit_el allows you to only consider one submit button (which is what
+    browsers do). If set to None, it takes all of them, and if set to False,
+    it takes none.
+    """
+
+    def __init__(self, page, el, submit_el=None):
+        super(Form, self).__init__()
+        self.page = page
+        self.el = el
+        self.submit_el = submit_el
+        self.method = el.attrib.get('method', 'GET')
+        self.url = el.attrib.get('action', page.url)
+        self.name = el.attrib.get('name', '')
+        submits = 0
+
+        for inp in el.xpath('.//input | .//select | .//textarea'):
+            try:
+                name = inp.attrib['name']
+            except KeyError:
+                continue
+
+            try:
+                if inp.attrib['type'] in ('checkbox', 'radio') and 'checked' not in inp.attrib:
+                    continue
+            except KeyError:
+                pass
+
+            try:
+                if inp.attrib['type'] == 'submit':
+                    if self.submit_el is not None and inp is not self.submit_el:
+                        continue
+                    else:
+                        submits += 1
+            except KeyError:
+                pass
+
+            if inp.tag == 'select':
+                options = inp.xpath('.//option[@selected]')
+                if len(options) == 0:
+                    options = inp.xpath('.//option')
+                if len(options) == 0:
+                    value = u''
+                else:
+                    value = options[0].attrib.get('value', options[0].text or u'')
+            else:
+                value = inp.attrib.get('value', inp.text or u'')
+
+            self[name] = value
+
+        if submits > 1:
+            warnings.warn('Form has more than one submit input, you should chose the correct one', FormSubmitWarning, stacklevel=3)
+        if self.submit_el is not None and self.submit_el is not False and submits == 0:
+            warnings.warn('Form had a submit element provided, but it was not found', FormSubmitWarning, stacklevel=3)
+
+
+    @property
+    def request(self):
+        """
+        Get the Request object from the form.
+        """
+        if self.method.lower() == 'get':
+            req = requests.Request(self.method, self.url, params=self)
+        else:
+            req = requests.Request(self.method, self.url, data=self)
+        req.headers.setdefault('Referer', self.page.url)
+        return req
+
+    def submit(self, **kwargs):
+        """
+        Submit the form and tell browser to be located to the new page.
+        """
+        kwargs.setdefault('data_encoding', self.page.encoding)
+        return self.page.browser.location(self.request, **kwargs)
+
+
+class CsvPage(Page):
+    DIALECT = 'excel'
+    FMTPARAMS = {}
+    ENCODING = 'utf-8'
+    NEWLINES_HACK = True
+
+    """
+    If True, will consider the first line as a header.
+    This means the rows will be also available as dictionnaries.
+    """
+    HEADER = None
+
+    def __init__(self, browser, response, *args, **kwargs):
+        super(CsvPage, self).__init__(browser, response, *args, **kwargs)
+        content = response.content
+        encoding = self.ENCODING
+        if encoding == 'utf-16le':
+            content = content.decode('utf-16le')[1:].encode('utf-8')
+            encoding = 'utf-8'
+        if self.NEWLINES_HACK:
+            content = content.replace('\r\n', '\n').replace('\r', '\n')
+        fp = BytesIO(content)
+        self.doc = self.parse(fp, encoding)
+
+    def parse(self, data, encoding=None):
+        import csv
+        reader = csv.reader(data, dialect=self.DIALECT, **self.FMTPARAMS)
+        header = None
+        drows = []
+        rows = []
+        for i, row in enumerate(reader):
+            if self.HEADER and i+1 < self.HEADER:
+                continue
+            row = self.decode_row(row, encoding)
+            if header is None and self.HEADER:
+                header = row
+            else:
+                rows.append(row)
+                if header:
+                    drow = {}
+                    for i, cell in enumerate(row):
+                        drow[header[i]] = cell
+                    drows.append(drow)
+        return drows if header is not None else row
+
+    def decode_row(self, row, encoding):
+        if encoding:
+            return [unicode(cell, encoding) for cell in row]
+        else:
+            return row
+
+
+class JsonPage(Page):
+    def __init__(self, browser, response, *args, **kwargs):
+        super(JsonPage, self).__init__(browser, response, *args, **kwargs)
+        from weboob.tools.json import json
+        self.doc = json.loads(response.text)
+
+
+class XMLPage(Page):
+    ENCODING = None
+    """
+    Force a page encoding.
+    It is recommended to use None for autodetection.
+    """
+
+    def __init__(self, browser, response, *args, **kwargs):
+        super(XMLPage, self).__init__(browser, response, *args, **kwargs)
+        import lxml.etree as etree
+        parser = etree.XMLParser(encoding=self.ENCODING or response.encoding)
+        self.doc = etree.parse(BytesIO(response.content), parser)
+
+
+class RawPage(Page):
+    def __init__(self, browser, response, *args, **kwargs):
+        super(RawPage, self).__init__(browser, response, *args, **kwargs)
+        self.doc = response.content
+
+
+class HTMLPage(Page):
+    """
+    HTML page.
+    """
+    FORM_CLASS = Form
+
+    ENCODING = None
+    """
+    Force a page encoding.
+    It is recommended to use None for autodetection.
+    """
+
+    def __init__(self, browser, response, *args, **kwargs):
+        super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
+        self.encoding = self.ENCODING or response.encoding
+        import lxml.html as html
+        parser = html.HTMLParser(encoding=self.encoding)
+        self.doc = html.parse(BytesIO(response.content), parser)
+
+    def get_form(self, xpath='//form', name=None, nr=None, submit=None):
+        """
+        Get a :class:`Form` object from a selector.
+        The form will be analyzed and its parameters extracted.
+        In the case there is more than one "submit" input, only one of
+        them should be chosen to generate the request.
+
+        :param xpath: xpath string to select forms
+        :type xpath: :class:`str`
+        :param name: if supplied, select a form with the given name
+        :type name: :class:`str`
+        :param nr: if supplied, take the n+1 th selected form
+        :type nr: :class:`int`
+        :param submit: if supplied, xpath string to select the submit \
+            element from the form
+        :type submit: :class:`str`
+        :rtype: :class:`Form`
+        :raises: :class:`FormNotFound` if no form is found
+        """
+        i = 0
+        for el in self.doc.xpath(xpath):
+            if name is not None and el.attrib.get('name', '') != name:
+                continue
+            if nr is not None and i != nr:
+                i += 1
+                continue
+
+            if isinstance(submit, basestring):
+                submit_el = el.xpath(submit)[0]
+            else:
+                submit_el = submit
+
+            return self.FORM_CLASS(self, el, submit_el)
+
+        raise FormNotFound()
+
+
+class LoggedPage(object):
+    """
+    A page that only logged users can reach. If we did not get a redirection
+    for this page, we are sure that the login is still active.
+
+    Do not use this class for page we mixed content (logged/anonymous) or for
+    pages with a login form.
+    """
+    logged = True
--- a/weboob/browser/profiles.py
+++ b/weboob/browser/profiles.py
@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012-2014 Laurent Bachelier
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+class Profile(object):
+    """
+    A profile represents the way Browser should act.
+    Usually it is to mimic a real browser.
+    """
+
+    def setup_session(self, session):
+        """
+        Change default headers, set up hooks, etc.
+
+        Warning: Do not enable lzma, bzip or bzip2, sdch encodings
+        as python-requests does not support it yet.
+        Supported as of 2.2: gzip, deflate, compress.
+        In doubt, do not change the default Accept-Encoding header
+        of python-requests.
+        """
+        raise NotImplementedError()
+
+
+class Weboob(Profile):
+    """
+    It's us!
+    Recommended for Weboob-friendly websites only.
+    """
+
+    def __init__(self, version):
+        self.version = version
+
+    def setup_session(self, session):
+        session.headers['User-Agent'] = 'weboob/%s' % self.version
+
+
+class Firefox(Profile):
+    """
+    Try to mimic a specific version of Firefox.
+    Ideally, it should follow the current ESR Firefox:
+    https://www.mozilla.org/en-US/firefox/organizations/all.html
+    Do not change the Firefox version without checking the Gecko one!
+    """
+
+    def setup_session(self, session):
+        """
+        Set up headers for a standard Firefox request
+        (except for DNT which isn't on by default but is a good idea).
+
+        The goal is to be unidentifiable.
+        """
+        # Replace all base requests headers
+        # https://developer.mozilla.org/en/Gecko_user_agent_string_reference
+        # https://bugzilla.mozilla.org/show_bug.cgi?id=572650
+        session.headers = {
+            'Accept-Language': 'en-us,en;q=0.5',
+            'Accept-Encoding': 'gzip, deflate',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0',
+            'DNT': '1'}
+
+
+class Wget(Profile):
+    """
+    Common alternative user agent.
+    Some websites will give you a version with less JavaScript.
+    Some others could ban you (after all, wget is not a real browser).
+    """
+    def __init__(self, version='1.11.4'):
+        self.version = version
+
+    def setup_session(self, session):
+        # Don't remove base headers, if websites want to block fake browsers,
+        # they will probably block any wget user agent anyway.
+        session.headers.update({
+            'Accept': '*/*',
+            'User-Agent': 'Wget/%s' % self.version})
+
+
--- a/weboob/browser2/sessions.py
+++ b/weboob/browser2/sessions.py
@ -18,7 +18,7 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

 # Inspired by: https://github.com/ross/requests-futures/blob/master/requests_futures/sessions.py
-# XXX Licence issues ?
+# XXX Licence issues?

 try:
    from concurrent.futures import ThreadPoolExecutor
@ -58,7 +58,6 @@ def merge_hooks(request_hooks, session_hooks, dict_class=OrderedDict):


 class WeboobSession(Session):
-
    def prepare_request(self, request):
        """Constructs a :class:`PreparedRequest <PreparedRequest>` for
        transmission and returns it. The :class:`PreparedRequest` has settings
@ -99,8 +98,8 @@ class WeboobSession(Session):
        )
        return p

-class FuturesSession(WeboobSession):

+class FuturesSession(WeboobSession):
    def __init__(self, executor=None, max_workers=2, *args, **kwargs):
        """Creates a FuturesSession

--- a/weboob/browser/url.py
+++ b/weboob/browser/url.py
@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+try:
+    from urllib.parse import unquote
+except ImportError:
+    from urllib import unquote
+import re
+import requests
+
+from weboob.tools.regex_helper import normalize
+
+
+class UrlNotResolvable(Exception):
+    """
+    Raised when trying to locate on an URL instance which url pattern is not resolvable as a real url.
+    """
+
+
+class URL(object):
+    """
+    A description of an URL on the PagesBrowser website.
+
+    It takes one or several regexps to match urls, and an optional Page
+    class which is instancied by PagesBrowser.open if the page matches a regex.
+    """
+    _creation_counter = 0
+
+    def __init__(self, *args):
+        self.urls = []
+        self.klass = None
+        self.browser = None
+        for arg in args:
+            if isinstance(arg, basestring):
+                self.urls.append(arg)
+            if isinstance(arg, type):
+                self.klass = arg
+
+        self._creation_counter = URL._creation_counter
+        URL._creation_counter += 1
+
+    def is_here(self, **kwargs):
+        """
+        Returns True if the current page of browser matches this URL.
+        If arguments are provided, and only then, they are checked against the arguments
+        that were used to build the current page URL.
+        """
+        assert self.klass is not None, "You can use this method only if there is a Page class handler."
+
+        if len(kwargs):
+            params = self.match(self.build(**kwargs)).groupdict()
+        else:
+            params = None
+
+        # XXX use unquote on current params values because if there are spaces
+        # or special characters in them, it is encoded only in but not in kwargs.
+        return self.browser.page and isinstance(self.browser.page, self.klass) \
+            and (params is None or params == dict([(k,unquote(v)) for k,v in self.browser.page.params.iteritems()]))
+
+    def stay_or_go(self, **kwargs):
+        """
+        Request to go on this url only if we aren't already here.
+
+        Arguments are optional parameters for url.
+
+        >>> url = URL('http://exawple.org/(?P<pagename>).html')
+        >>> url.stay_or_go(pagename='index')
+        """
+        if self.is_here(**kwargs):
+            return self.browser.page
+
+        return self.go(**kwargs)
+
+    def go(self, params=None, data=None, **kwargs):
+        """
+        Request to go on this url.
+
+        Arguments are optional parameters for url.
+
+        >>> url = URL('http://exawple.org/(?P<pagename>).html')
+        >>> url.stay_or_go(pagename='index')
+        """
+        r = self.browser.location(self.build(**kwargs), params=params, data=data)
+        return r.page or r
+
+    def open(self, params=None, data=None, **kwargs):
+        """
+        Request to open on this url.
+
+        Arguments are optional parameters for url.
+
+        :param data: POST data
+        :type url: str or dict or None
+
+        >>> url = URL('http://exawple.org/(?P<pagename>).html')
+        >>> url.open(pagename='index')
+        """
+        r = self.browser.open(self.build(**kwargs), params=params, data=data)
+        return r.page or r
+
+    def build(self, **kwargs):
+        """
+        Build an url with the given arguments from URL's regexps.
+
+        :param param: Query string parameters
+
+        :rtype: :class:`str`
+        :raises: :class:`UrlNotResolvable` if unable to resolve a correct url with the given arguments.
+        """
+        browser = kwargs.pop('browser', self.browser)
+        params = kwargs.pop('params', None)
+        patterns = []
+        for url in self.urls:
+            patterns += normalize(url)
+
+        for pattern, _ in patterns:
+            url = pattern
+            # only use full-name substitutions, to allow % in URLs
+            for kwkey in kwargs.keys():  # need to use keys() because of pop()
+                search = '%%(%s)s' % kwkey
+                if search in pattern:
+                    url = url.replace(search, unicode(kwargs.pop(kwkey)))
+            # if there are named substitutions left, ignore pattern
+            if re.search('%\([A-z_]+\)s', url):
+                continue
+            # if not all kwargs were used
+            if len(kwargs):
+                continue
+
+            url = browser.absurl(url, base=True)
+            if params:
+                p = requests.models.PreparedRequest()
+                p.prepare_url(url, params)
+                url = p.url
+            return url
+
+        raise UrlNotResolvable('Unable to resolve URL with %r. Available are %s' % (kwargs, ', '.join([pattern for pattern, _ in patterns])))
+
+    def match(self, url, base=None):
+        """
+        Check if the given url match this object.
+        """
+        if base is None:
+            assert self.browser is not None
+            base = self.browser.BASEURL
+
+        for regex in self.urls:
+            if not re.match(r'^\w+://.*', regex):
+                regex = re.escape(base).rstrip('/') + '/' + regex.lstrip('/')
+            m = re.match(regex, url)
+            if m:
+                return m
+
+    def handle(self, response):
+        """
+        Handle a HTTP response to get an instance of the klass if it matches.
+        """
+        if self.klass is None:
+            return
+
+        m = self.match(response.url)
+        if m:
+            page = self.klass(self.browser, response, m.groupdict())
+            if hasattr(page, 'is_here'):
+                if callable(page.is_here):
+                    if page.is_here():
+                        return page
+                else:
+                    assert isinstance(page.is_here, basestring)
+                    if page.doc.xpath(page.is_here):
+                        return page
+            else:
+                return page
+
+    def id2url(self, func):
+        r"""
+        Helper decorator to get an URL if the given first parameter is an ID.
+        """
+        def inner(browser, id_or_url, *args, **kwargs):
+            if re.match('^https?://.*', id_or_url):
+                if not self.match(id_or_url, browser.BASEURL):
+                    return
+            else:
+                id_or_url = self.build(id=id_or_url, browser=browser)
+
+            return func(browser, id_or_url, *args, **kwargs)
+        return inner
+
+
--- a/weboob/browser2/page.py
+++ b/weboob/browser2/page.py
@ -1,716 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright(C) 2014 Romain Bignon
-#
-# This file is part of weboob.
-#
-# weboob is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# weboob is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with weboob. If not, see <http://www.gnu.org/licenses/>.
-
-from __future__ import absolute_import
-
-try:
-    from urllib.parse import unquote
-except ImportError:
-    from urllib import unquote
-import re
-import warnings
-from copy import deepcopy
-from io import BytesIO
-
-import requests
-
-from weboob.tools.ordereddict import OrderedDict
-from weboob.tools.regex_helper import normalize
-from weboob.tools.compat import basestring
-
-from weboob.tools.log import getLogger
-
-from .browser import DomainBrowser
-
-
-class UrlNotResolvable(Exception):
-    """
-    Raised when trying to locate on an URL instance which url pattern is not resolvable as a real url.
-    """
-
-
-class URL(object):
-    """
-    A description of an URL on the PagesBrowser website.
-
-    It takes one or several regexps to match urls, and an optional Page
-    class which is instancied by PagesBrowser.open if the page matches a regex.
-    """
-    _creation_counter = 0
-
-    def __init__(self, *args):
-        self.urls = []
-        self.klass = None
-        self.browser = None
-        for arg in args:
-            if isinstance(arg, basestring):
-                self.urls.append(arg)
-            if isinstance(arg, type):
-                self.klass = arg
-
-        self._creation_counter = URL._creation_counter
-        URL._creation_counter += 1
-
-    def is_here(self, **kwargs):
-        """
-        Returns True if the current page of browser matches this URL.
-        If arguments are provided, and only then, they are checked against the arguments
-        that were used to build the current page URL.
-        """
-        assert self.klass is not None, "You can use this method only if there is a Page class handler."
-
-        if len(kwargs):
-            params = self.match(self.build(**kwargs)).groupdict()
-        else:
-            params = None
-
-        # XXX use unquote on current params values because if there are spaces
-        # or special characters in them, it is encoded only in but not in kwargs.
-        return self.browser.page and isinstance(self.browser.page, self.klass) \
-            and (params is None or params == dict([(k,unquote(v)) for k,v in self.browser.page.params.iteritems()]))
-
-    def stay_or_go(self, **kwargs):
-        """
-        Request to go on this url only if we aren't already here.
-
-        Arguments are optional parameters for url.
-
-        >>> url = URL('http://exawple.org/(?P<pagename>).html')
-        >>> url.stay_or_go(pagename='index')
-        """
-        if self.is_here(**kwargs):
-            return self.browser.page
-
-        return self.go(**kwargs)
-
-    def go(self, params=None, data=None, **kwargs):
-        """
-        Request to go on this url.
-
-        Arguments are optional parameters for url.
-
-        >>> url = URL('http://exawple.org/(?P<pagename>).html')
-        >>> url.stay_or_go(pagename='index')
-        """
-        r = self.browser.location(self.build(**kwargs), params=params, data=data)
-        return r.page or r
-
-    def open(self, params=None, data=None, **kwargs):
-        """
-        Request to open on this url.
-
-        Arguments are optional parameters for url.
-
-        :param data: POST data
-        :type url: str or dict or None
-
-        >>> url = URL('http://exawple.org/(?P<pagename>).html')
-        >>> url.open(pagename='index')
-        """
-        r = self.browser.open(self.build(**kwargs), params=params, data=data)
-        return r.page or r
-
-    def build(self, **kwargs):
-        """
-        Build an url with the given arguments from URL's regexps.
-
-        :param param: Query string parameters
-
-        :rtype: :class:`str`
-        :raises: :class:`UrlNotResolvable` if unable to resolve a correct url with the given arguments.
-        """
-        browser = kwargs.pop('browser', self.browser)
-        params = kwargs.pop('params', None)
-        patterns = []
-        for url in self.urls:
-            patterns += normalize(url)
-
-        for pattern, _ in patterns:
-            url = pattern
-            # only use full-name substitutions, to allow % in URLs
-            for kwkey in kwargs.keys():  # need to use keys() because of pop()
-                search = '%%(%s)s' % kwkey
-                if search in pattern:
-                    url = url.replace(search, unicode(kwargs.pop(kwkey)))
-            # if there are named substitutions left, ignore pattern
-            if re.search('%\([A-z_]+\)s', url):
-                continue
-            # if not all kwargs were used
-            if len(kwargs):
-                continue
-
-            url = browser.absurl(url, base=True)
-            if params:
-                p = requests.models.PreparedRequest()
-                p.prepare_url(url, params)
-                url = p.url
-            return url
-
-        raise UrlNotResolvable('Unable to resolve URL with %r. Available are %s' % (kwargs, ', '.join([pattern for pattern, _ in patterns])))
-
-    def match(self, url, base=None):
-        """
-        Check if the given url match this object.
-        """
-        if base is None:
-            assert self.browser is not None
-            base = self.browser.BASEURL
-
-        for regex in self.urls:
-            if not re.match(r'^\w+://.*', regex):
-                regex = re.escape(base).rstrip('/') + '/' + regex.lstrip('/')
-            m = re.match(regex, url)
-            if m:
-                return m
-
-    def handle(self, response):
-        """
-        Handle a HTTP response to get an instance of the klass if it matches.
-        """
-        if self.klass is None:
-            return
-
-        m = self.match(response.url)
-        if m:
-            page = self.klass(self.browser, response, m.groupdict())
-            if hasattr(page, 'is_here'):
-                if callable(page.is_here):
-                    if page.is_here():
-                        return page
-                else:
-                    assert isinstance(page.is_here, basestring)
-                    if page.doc.xpath(page.is_here):
-                        return page
-            else:
-                return page
-
-    def id2url(self, func):
-        r"""
-        Helper decorator to get an URL if the given first parameter is an ID.
-        """
-        def inner(browser, id_or_url, *args, **kwargs):
-            if re.match('^https?://.*', id_or_url):
-                if not self.match(id_or_url, browser.BASEURL):
-                    return
-            else:
-                id_or_url = self.build(id=id_or_url, browser=browser)
-
-            return func(browser, id_or_url, *args, **kwargs)
-        return inner
-
-
-class _PagesBrowserMeta(type):
-    """
-    Private meta-class used to keep order of URLs instances of PagesBrowser.
-    """
-    def __new__(mcs, name, bases, attrs):
-        urls = [(url_name, attrs.pop(url_name)) for url_name, obj in attrs.items() if isinstance(obj, URL)]
-        urls.sort(key=lambda x: x[1]._creation_counter)
-
-        new_class = super(_PagesBrowserMeta, mcs).__new__(mcs, name, bases, attrs)
-        if new_class._urls is None:
-            new_class._urls = OrderedDict()
-        else:
-            new_class._urls = deepcopy(new_class._urls)
-        new_class._urls.update(urls)
-        return new_class
-
-class PagesBrowser(DomainBrowser):
-    r"""
-    A browser which works pages and keep state of navigation.
-
-    To use it, you have to derive it and to create URL objects as class
-    attributes. When open() or location() are called, if the url matches
-    one of URL objects, it returns a Page object. In case of location(), it
-    stores it in self.page.
-
-    Example:
-
-    >>> class HomePage(Page):
-    ...     pass
-    ...
-    >>> class ListPage(Page):
-    ...     pass
-    ...
-    >>> class MyBrowser(PagesBrowser):
-    ...     BASEURL = 'http://example.org'
-    ...     home = URL('/(index\.html)?', HomePage)
-    ...     list = URL('/list\.html', ListPage)
-    ...
-
-    You can then use URL instances to go on pages.
-    """
-
-
-    _urls = None
-    __metaclass__ = _PagesBrowserMeta
-
-    def __getattr__(self, name):
-        if self._urls is not None and name in self._urls:
-            return self._urls[name]
-        else:
-            raise AttributeError("'%s' object has no attribute '%s'" % (
-                self.__class__.__name__, name))
-
-    def __init__(self, *args, **kwargs):
-        super(PagesBrowser, self).__init__(*args, **kwargs)
-
-        self.page = None
-        self._urls = deepcopy(self._urls)
-        for url in self._urls.itervalues():
-            url.browser = self
-
-    def open(self, *args, **kwargs):
-        """
-        Same method than
-        :meth:`weboob.browser2.browser.DomainBrowser.open`, but the
-        response contains an attribute `page` if the url matches any
-        :class:`URL` object.
-        """
-
-        callback = kwargs.pop('callback', lambda response: response)
-
-        # Have to define a callback to seamlessly process synchronous and
-        # asynchronous requests, see :meth:`Browser.open` and its `async`
-        # and `callback` params.
-        def internal_callback(response):
-            # Try to handle the response page with an URL instance.
-            response.page = None
-            for url in self._urls.itervalues():
-                page = url.handle(response)
-                if page is not None:
-                    self.logger.debug('Handle %s with %s' % (response.url, page.__class__.__name__))
-                    response.page = page
-                    break
-
-            if response.page is None:
-                self.logger.debug('Unable to handle %s' % response.url)
-
-            return callback(response)
-
-        return super(PagesBrowser, self).open(callback=internal_callback, *args, **kwargs)
-
-    def location(self, *args, **kwargs):
-        """
-        Same method than
-        :meth:`weboob.browser2.browser.Browser.location`, but if the
-        url matches any :class:`URL` object, an attribute `page` is added to
-        response, and the attribute :attr:`PagesBrowser.page` is set.
-        """
-        if self.page is not None:
-            # Call leave hook.
-            self.page.on_leave()
-
-        response = self.open(*args, **kwargs)
-
-        self.response = response
-        self.page = response.page
-        self.url = response.url
-
-        if self.page is not None:
-            # Call load hook.
-            self.page.on_load()
-
-        # Returns self.response in case on_load recalls location()
-        return self.response
-
-    def pagination(self, func, *args, **kwargs):
-        r"""
-        This helper function can be used to handle pagination pages easily.
-
-        When the called function raises an exception :class:`NextPage`, it goes
-        on the wanted page and recall the function.
-
-        :class:`NextPage` constructor can take an url or a Request object.
-
-        >>> class Page(HTMLPage):
-        ...     def iter_values(self):
-        ...         for el in self.doc.xpath('//li'):
-        ...             yield el.text
-        ...         for next in self.doc.xpath('//a'):
-        ...             raise NextPage(next.attrib['href'])
-        ...
-        >>> class Browser(PagesBrowser):
-        ...     BASEURL = 'http://people.symlink.me'
-        ...     list = URL('/~rom1/projects/weboob/list-(?P<pagenum>\d+).html', Page)
-        ...
-        >>> b = Browser()
-        >>> b.list.go(pagenum=1)
-        >>> list(b.pagination(lambda: b.page.iter_values()))
-        ['One', 'Two', 'Three', 'Four']
-        """
-        while True:
-            try:
-                for r in func(*args, **kwargs):
-                    yield r
-            except NextPage as e:
-                self.location(e.request)
-            else:
-                return
-
-def pagination(func):
-    r"""
-    This helper decorator can be used to handle pagination pages easily.
-
-    When the called function raises an exception :class:`NextPage`, it goes on
-    the wanted page and recall the function.
-
-    :class:`NextPage` constructor can take an url or a Request object.
-
-    >>> class Page(HTMLPage):
-    ...     @pagination
-    ...     def iter_values(self):
-    ...         for el in self.doc.xpath('//li'):
-    ...             yield el.text
-    ...         for next in self.doc.xpath('//a'):
-    ...             raise NextPage(next.attrib['href'])
-    ...
-    >>> class Browser(PagesBrowser):
-    ...     BASEURL = 'http://people.symlink.me'
-    ...     list = URL('/~rom1/projects/weboob/list-(?P<pagenum>\d+).html', Page)
-    ...
-    >>> b = Browser()
-    >>> b.list.go(pagenum=1)
-    >>> list(b.page.iter_values())
-    ['One', 'Two', 'Three', 'Four']
-    """
-    def inner(page, *args, **kwargs):
-        while True:
-            try:
-                for r in func(page, *args, **kwargs):
-                    yield r
-            except NextPage as e:
-                result = page.browser.location(e.request)
-                page = result.page
-            else:
-                return
-
-    return inner
-
-class NextPage(Exception):
-    """
-    Exception used for example in a Page to tell PagesBrowser.pagination to
-    go on the next page.
-
-    See :meth:`PagesBrowser.pagination` or decorator :func:`pagination`.
-    """
-    def __init__(self, request):
-        super(NextPage, self).__init__()
-        self.request = request
-
-
-def need_login(func):
-    """
-    Decorator used to require to be logged to access to this function.
-    """
-    def inner(browser, *args, **kwargs):
-        if browser.page is None or not browser.page.logged:
-            browser.do_login()
-        return func(browser, *args, **kwargs)
-
-    return inner
-
-
-class LoginBrowser(PagesBrowser):
-    """
-    A browser which supports login.
-    """
-    def __init__(self, username, password, *args, **kwargs):
-        super(LoginBrowser, self).__init__(*args, **kwargs)
-        self.username = username
-        self.password = password
-
-    def do_login(self):
-        """
-        Abstract method to implement to login on website.
-
-        It is call when a login is needed.
-        """
-        raise NotImplementedError()
-
-
-class Page(object):
-    """
-    Base page.
-    """
-    logged = False
-
-    def __init__(self, browser, response, params=None):
-        self.browser = browser
-        self.logger = getLogger(self.__class__.__name__.lower(), browser.logger)
-        self.response = response
-        self.url = self.response.url
-        self.params = params
-
-    def on_load(self):
-        """
-        Event called when browser loads this page.
-        """
-
-    def on_leave(self):
-        """
-        Event called when browser leaves this page.
-        """
-
-class FormNotFound(Exception):
-    """
-    Raised when :meth:`HTMLPage.get_form` can't find a form.
-    """
-
-class FormSubmitWarning(UserWarning):
-    """
-    A form has more than one submit element selected, and will likely
-    generate an invalid request.
-    """
-
-class Form(OrderedDict):
-    """
-    Represents a form of an HTML page.
-
-    It is used as a dict with pre-filled values from HTML. You can set new
-    values as strings by setting an item value.
-
-    submit_el allows you to only consider one submit button (which is what
-    browsers do). If set to None, it takes all of them, and if set to False,
-    it takes none.
-    """
-
-    def __init__(self, page, el, submit_el=None):
-        super(Form, self).__init__()
-        self.page = page
-        self.el = el
-        self.submit_el = submit_el
-        self.method = el.attrib.get('method', 'GET')
-        self.url = el.attrib.get('action', page.url)
-        self.name = el.attrib.get('name', '')
-        submits = 0
-
-        for inp in el.xpath('.//input | .//select | .//textarea'):
-            try:
-                name = inp.attrib['name']
-            except KeyError:
-                continue
-
-            try:
-                if inp.attrib['type'] in ('checkbox', 'radio') and 'checked' not in inp.attrib:
-                    continue
-            except KeyError:
-                pass
-
-            try:
-                if inp.attrib['type'] == 'submit':
-                    if self.submit_el is not None and inp is not self.submit_el:
-                        continue
-                    else:
-                        submits += 1
-            except KeyError:
-                pass
-
-            if inp.tag == 'select':
-                options = inp.xpath('.//option[@selected]')
-                if len(options) == 0:
-                    options = inp.xpath('.//option')
-                if len(options) == 0:
-                    value = u''
-                else:
-                    value = options[0].attrib.get('value', options[0].text or u'')
-            else:
-                value = inp.attrib.get('value', inp.text or u'')
-
-            self[name] = value
-
-        if submits > 1:
-            warnings.warn('Form has more than one submit input, you should chose the correct one', FormSubmitWarning, stacklevel=3)
-        if self.submit_el is not None and self.submit_el is not False and submits == 0:
-            warnings.warn('Form had a submit element provided, but it was not found', FormSubmitWarning, stacklevel=3)
-
-
-    @property
-    def request(self):
-        """
-        Get the Request object from the form.
-        """
-        if self.method.lower() == 'get':
-            req = requests.Request(self.method, self.url, params=self)
-        else:
-            req = requests.Request(self.method, self.url, data=self)
-        req.headers.setdefault('Referer', self.page.url)
-        return req
-
-    def submit(self, **kwargs):
-        """
-        Submit the form and tell browser to be located to the new page.
-        """
-        kwargs.setdefault('data_encoding', self.page.encoding)
-        return self.page.browser.location(self.request, **kwargs)
-
-
-class CsvPage(Page):
-    DIALECT = 'excel'
-    FMTPARAMS = {}
-    ENCODING = 'utf-8'
-    NEWLINES_HACK = True
-
-    """
-    If True, will consider the first line as a header.
-    This means the rows will be also available as dictionnaries.
-    """
-    HEADER = None
-
-    def __init__(self, browser, response, *args, **kwargs):
-        super(CsvPage, self).__init__(browser, response, *args, **kwargs)
-        content = response.content
-        encoding = self.ENCODING
-        if encoding == 'utf-16le':
-            content = content.decode('utf-16le')[1:].encode('utf-8')
-            encoding = 'utf-8'
-        if self.NEWLINES_HACK:
-            content = content.replace('\r\n', '\n').replace('\r', '\n')
-        fp = BytesIO(content)
-        self.doc = self.parse(fp, encoding)
-
-    def parse(self, data, encoding=None):
-        import csv
-        reader = csv.reader(data, dialect=self.DIALECT, **self.FMTPARAMS)
-        header = None
-        drows = []
-        rows = []
-        for i, row in enumerate(reader):
-            if self.HEADER and i+1 < self.HEADER:
-                continue
-            row = self.decode_row(row, encoding)
-            if header is None and self.HEADER:
-                header = row
-            else:
-                rows.append(row)
-                if header:
-                    drow = {}
-                    for i, cell in enumerate(row):
-                        drow[header[i]] = cell
-                    drows.append(drow)
-        return drows if header is not None else row
-
-    def decode_row(self, row, encoding):
-        if encoding:
-            return [unicode(cell, encoding) for cell in row]
-        else:
-            return row
-
-
-class JsonPage(Page):
-    def __init__(self, browser, response, *args, **kwargs):
-        super(JsonPage, self).__init__(browser, response, *args, **kwargs)
-        from weboob.tools.json import json
-        self.doc = json.loads(response.text)
-
-
-class XMLPage(Page):
-    ENCODING = None
-    """
-    Force a page encoding.
-    It is recommended to use None for autodetection.
-    """
-
-    def __init__(self, browser, response, *args, **kwargs):
-        super(XMLPage, self).__init__(browser, response, *args, **kwargs)
-        import lxml.etree as etree
-        parser = etree.XMLParser(encoding=self.ENCODING or response.encoding)
-        self.doc = etree.parse(BytesIO(response.content), parser)
-
-
-class RawPage(Page):
-    def __init__(self, browser, response, *args, **kwargs):
-        super(RawPage, self).__init__(browser, response, *args, **kwargs)
-        self.doc = response.content
-
-
-class HTMLPage(Page):
-    """
-    HTML page.
-    """
-    FORM_CLASS = Form
-
-    ENCODING = None
-    """
-    Force a page encoding.
-    It is recommended to use None for autodetection.
-    """
-
-    def __init__(self, browser, response, *args, **kwargs):
-        super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
-        self.encoding = self.ENCODING or response.encoding
-        import lxml.html as html
-        parser = html.HTMLParser(encoding=self.encoding)
-        self.doc = html.parse(BytesIO(response.content), parser)
-
-    def get_form(self, xpath='//form', name=None, nr=None, submit=None):
-        """
-        Get a :class:`Form` object from a selector.
-        The form will be analyzed and its parameters extracted.
-        In the case there is more than one "submit" input, only one of
-        them should be chosen to generate the request.
-
-        :param xpath: xpath string to select forms
-        :type xpath: :class:`str`
-        :param name: if supplied, select a form with the given name
-        :type name: :class:`str`
-        :param nr: if supplied, take the n+1 th selected form
-        :type nr: :class:`int`
-        :param submit: if supplied, xpath string to select the submit \
-            element from the form
-        :type submit: :class:`str`
-        :rtype: :class:`Form`
-        :raises: :class:`FormNotFound` if no form is found
-        """
-        i = 0
-        for el in self.doc.xpath(xpath):
-            if name is not None and el.attrib.get('name', '') != name:
-                continue
-            if nr is not None and i != nr:
-                i += 1
-                continue
-
-            if isinstance(submit, basestring):
-                submit_el = el.xpath(submit)[0]
-            else:
-                submit_el = submit
-
-            return self.FORM_CLASS(self, el, submit_el)
-
-        raise FormNotFound()
-
-
-def method(klass):
-    """
-    Class-decorator to call it as a method.
-    """
-    def inner(self, *args, **kwargs):
-        return klass(self)(*args, **kwargs)
-    return inner
-
-
-class LoggedPage(object):
-    """
-    A page that only logged users can reach. If we did not get a redirection
-    for this page, we are sure that the login is still active.
-
-    Do not use this class for page we mixed content (logged/anonymous) or for
-    pages with a login form.
-    """
-    logged = True
--- a/weboob/core/repositories.py
+++ b/weboob/core/repositories.py
@ -31,7 +31,7 @@ from contextlib import closing
 from compileall import compile_dir
 from io import BytesIO

-from weboob.core.exceptions import BrowserHTTPError, BrowserHTTPNotFound
+from weboob.exceptions import BrowserHTTPError, BrowserHTTPNotFound
 from .modules import LoadedModule
 from weboob.tools.log import getLogger
 from weboob.tools.misc import to_unicode
@ -442,7 +442,8 @@ class Repositories(object):
            self.load()

    def load_browser(self):
-        from weboob.browser2.browser import Browser, Weboob as WeboobProfile
+        from weboob.browser.browsers import Browser
+        from weboob.browser.profiles import Weboob as WeboobProfile
        class WeboobBrowser(Browser):
            PROFILE = WeboobProfile(self.version)
        if self.browser is None:
--- a/weboob/deprecated/init.py
+++ b/weboob/deprecated/init.py
--- a/weboob/deprecated/browser/init.py
+++ b/weboob/deprecated/browser/init.py
@ -18,7 +18,7 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.


-from weboob.tools.browser.browser import BrowserIncorrectPassword, BrowserBanned, \
+from weboob.deprecated.browser.browser import BrowserIncorrectPassword, BrowserBanned, \
                                         BrowserUnavailable, BrowserRetry, \
                                         BrowserHTTPNotFound, BrowserHTTPError, \
                                         Page, Browser, BrokenPageError, \
--- a/weboob/deprecated/browser/browser.py
+++ b/weboob/deprecated/browser/browser.py
@ -49,12 +49,12 @@ from contextlib import closing
 from gzip import GzipFile
 import warnings

-from weboob.core.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserPasswordExpired, BrowserForbidden, BrowserBanned, BrowserHTTPNotFound, BrowserHTTPError, FormFieldConversionWarning, BrowserSSLError
+from weboob.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserPasswordExpired, BrowserForbidden, BrowserBanned, BrowserHTTPNotFound, BrowserHTTPError, FormFieldConversionWarning, BrowserSSLError
 from weboob.tools.decorators import retry
 from weboob.tools.log import getLogger
-from weboob.tools.mech import ClientForm
+from weboob.deprecated.mech import ClientForm
 ControlNotFoundError = ClientForm.ControlNotFoundError
-from weboob.tools.parsers import get_parser
+from weboob.deprecated.browser.parsers import get_parser

 __all__ = ['BrowserIncorrectPassword', 'BrowserForbidden', 'BrowserBanned', 'BrowserUnavailable', 'BrowserRetry',
           'BrowserPasswordExpired', 'BrowserHTTPNotFound', 'BrowserHTTPError', 'BrokenPageError', 'Page',
@ -134,7 +134,7 @@ class StandardBrowser(mechanize.Browser):
    :param firefox_cookies: path to cookies sqlite file
    :type firefox_cookies: str
    :param parser: parser to use on HTML files
-    :type parser: :class:`weboob.tools.parsers.iparser.IParser`
+    :type parser: :class:`weboob.deprecated.browser.parsers.iparser.IParser`
    :param history: history manager; default value is an object which
                    does not keep history
    :type history: object
@ -434,7 +434,7 @@ class Browser(StandardBrowser):
    :param firefox_cookies: path to cookies sqlite file
    :type firefox_cookies: str
    :param parser: parser to use on HTML files
-    :type parser: :class:`weboob.tools.parsers.iparser.IParser`
+    :type parser: :class:`weboob.deprecated.browser.parsers.iparser.IParser`
    :param history: history manager; default value is an object which
                    does not keep history
    :type history: object
--- a/weboob/deprecated/browser/decorators.py
+++ b/weboob/deprecated/browser/decorators.py
--- a/weboob/deprecated/browser/firefox_cookies.py
+++ b/weboob/deprecated/browser/firefox_cookies.py
--- a/weboob/deprecated/browser/parsers/init.py
+++ b/weboob/deprecated/browser/parsers/init.py
--- a/weboob/deprecated/browser/parsers/csvparser.py
+++ b/weboob/deprecated/browser/parsers/csvparser.py
--- a/weboob/deprecated/browser/parsers/iparser.py
+++ b/weboob/deprecated/browser/parsers/iparser.py
--- a/weboob/deprecated/browser/parsers/jsonparser.py
+++ b/weboob/deprecated/browser/parsers/jsonparser.py
--- a/weboob/deprecated/browser/parsers/lxmlparser.py
+++ b/weboob/deprecated/browser/parsers/lxmlparser.py
@ -64,7 +64,7 @@ class LxmlParser(IParser):
        """
        Select one or many elements from an element, using lxml cssselect by default.

-        Raises :class:`weboob.tools.browser.browser.BrokenPageError` if not found.
+        Raises :class:`weboob.deprecated.browser.browser.BrokenPageError` if not found.

        :param element: element on which to apply selector
        :type element: object
--- a/weboob/deprecated/browser/parsers/lxmlsoupparser.py
+++ b/weboob/deprecated/browser/parsers/lxmlsoupparser.py
--- a/weboob/deprecated/mech.py
+++ b/weboob/deprecated/mech.py
--- a/weboob/core/exceptions.py
+++ b/weboob/core/exceptions.py
--- a/weboob/tools/application/base.py
+++ b/weboob/tools/application/base.py
@ -32,7 +32,7 @@ from weboob.capabilities.base import ConversionWarning, BaseObject
 from weboob.core import Weboob, CallErrors
 from weboob.core.backendscfg import BackendsConfig
 from weboob.tools.config.iconfig import ConfigError
-from weboob.core.exceptions import FormFieldConversionWarning
+from weboob.exceptions import FormFieldConversionWarning
 from weboob.tools.log import createColoredFormatter, getLogger, DebugFilter, settings as log_settings
 from weboob.tools.misc import to_unicode
 from .results import ResultsConditionError
--- a/weboob/tools/application/console.py
+++ b/weboob/tools/application/console.py
@ -32,7 +32,7 @@ from weboob.capabilities.account import CapAccount, Account, AccountRegisterErro
 from weboob.core.backendscfg import BackendAlreadyExists
 from weboob.core.modules import ModuleLoadError
 from weboob.core.repositories import ModuleInstallError
-from weboob.core.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserForbidden, BrowserSSLError
+from weboob.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserForbidden, BrowserSSLError
 from weboob.tools.value import Value, ValueBool, ValueFloat, ValueInt, ValueBackendPassword
 from weboob.tools.misc import to_unicode
 from weboob.tools.ordereddict import OrderedDict
--- a/weboob/tools/application/qt/qt.py
+++ b/weboob/tools/application/qt/qt.py
@ -34,7 +34,7 @@ from weboob.core.ouiboube import Weboob, VersionsMismatchError
 from weboob.core.scheduler import IScheduler
 from weboob.core.repositories import ModuleInstallError
 from weboob.tools.config.iconfig import ConfigError
-from weboob.core.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserForbidden
+from weboob.exceptions import BrowserUnavailable, BrowserIncorrectPassword, BrowserForbidden
 from weboob.tools.value import ValueInt, ValueBool, ValueBackendPassword
 from weboob.tools.misc import to_unicode
 from weboob.capabilities import UserError
--- a/weboob/tools/capabilities/bank/transactions.py
+++ b/weboob/tools/capabilities/bank/transactions.py
@ -27,9 +27,9 @@ from weboob.capabilities import NotAvailable, NotLoaded
 from weboob.tools.misc import to_unicode
 from weboob.tools.log import getLogger

-from weboob.core.exceptions import ParseError
-from weboob.browser2.elements import TableElement, ItemElement
-from weboob.browser2.filters.standard import Filter, CleanText, CleanDecimal, TableCell
+from weboob.exceptions import ParseError
+from weboob.browser.elements import TableElement, ItemElement
+from weboob.browser.filters.standard import Filter, CleanText, CleanDecimal, TableCell


 __all__ = ['FrenchTransaction', 'AmericanTransaction']
--- a/weboob/tools/capabilities/gallery/genericcomicreader.py
+++ b/weboob/tools/capabilities/gallery/genericcomicreader.py
@ -23,7 +23,7 @@ import re

 from weboob.capabilities.gallery import CapGallery, BaseGallery, BaseImage
 from weboob.tools.backend import Module
-from weboob.tools.browser import Browser, Page
+from weboob.deprecated.browser import Browser, Page

 __all__ = ['GenericComicReaderModule']

--- a/weboob/tools/capabilities/messages/genericArticle.py
+++ b/weboob/tools/capabilities/messages/genericArticle.py
@ -17,8 +17,8 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

-from weboob.tools.browser import Page
-from weboob.tools.browser import BrokenPageError
+from weboob.deprecated.browser import Page
+from weboob.deprecated.browser import BrokenPageError
 from lxml.etree import Comment