rename things related to browsers

weboob.tools.browser -> weboob.deprecated.browser weboob.tools.parsers -> weboob.deprecated.browser.parsers weboob.tools.mech -> weboob.deprecated.mech weboob.browser2 -> weboob.browser weboob.core.exceptions -> weboob.exceptions Also, the new tree for browser2 is: weboob.browser: import weboob.browser.browsers.* and weboob.browser.url.* weboob.browser.browsers: all browsers (including PagesBrowser and LoginBrowser) weboob.browser.url: the URL class weboob.browser.profiles: all Profile classes weboob.browser.sessions: WeboobSession and FuturesSession weboob.browser.cookies: that's a cookies thing weboob.browser.pages: all Page and derivated classes, and Form class weboob.browser.exceptions: specific browser exceptions weboob.browser.elements: AbstractElement classes, and 'method' decorator weboob.browser.filters.*: all filters
2014-10-07 00:23:18 +02:00 · 2014-10-07 00:23:18 +02:00 · d61e15cf84
commit d61e15cf84
parent 1f95e7631f
396 changed files with 1442 additions and 1382 deletions
--- a/weboob/browser/url.py
+++ b/weboob/browser/url.py
@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+try:
+    from urllib.parse import unquote
+except ImportError:
+    from urllib import unquote
+import re
+import requests
+
+from weboob.tools.regex_helper import normalize
+
+
+class UrlNotResolvable(Exception):
+    """
+    Raised when trying to locate on an URL instance which url pattern is not resolvable as a real url.
+    """
+
+
+class URL(object):
+    """
+    A description of an URL on the PagesBrowser website.
+
+    It takes one or several regexps to match urls, and an optional Page
+    class which is instancied by PagesBrowser.open if the page matches a regex.
+    """
+    _creation_counter = 0
+
+    def __init__(self, *args):
+        self.urls = []
+        self.klass = None
+        self.browser = None
+        for arg in args:
+            if isinstance(arg, basestring):
+                self.urls.append(arg)
+            if isinstance(arg, type):
+                self.klass = arg
+
+        self._creation_counter = URL._creation_counter
+        URL._creation_counter += 1
+
+    def is_here(self, **kwargs):
+        """
+        Returns True if the current page of browser matches this URL.
+        If arguments are provided, and only then, they are checked against the arguments
+        that were used to build the current page URL.
+        """
+        assert self.klass is not None, "You can use this method only if there is a Page class handler."
+
+        if len(kwargs):
+            params = self.match(self.build(**kwargs)).groupdict()
+        else:
+            params = None
+
+        # XXX use unquote on current params values because if there are spaces
+        # or special characters in them, it is encoded only in but not in kwargs.
+        return self.browser.page and isinstance(self.browser.page, self.klass) \
+            and (params is None or params == dict([(k,unquote(v)) for k,v in self.browser.page.params.iteritems()]))
+
+    def stay_or_go(self, **kwargs):
+        """
+        Request to go on this url only if we aren't already here.
+
+        Arguments are optional parameters for url.
+
+        >>> url = URL('http://exawple.org/(?P<pagename>).html')
+        >>> url.stay_or_go(pagename='index')
+        """
+        if self.is_here(**kwargs):
+            return self.browser.page
+
+        return self.go(**kwargs)
+
+    def go(self, params=None, data=None, **kwargs):
+        """
+        Request to go on this url.
+
+        Arguments are optional parameters for url.
+
+        >>> url = URL('http://exawple.org/(?P<pagename>).html')
+        >>> url.stay_or_go(pagename='index')
+        """
+        r = self.browser.location(self.build(**kwargs), params=params, data=data)
+        return r.page or r
+
+    def open(self, params=None, data=None, **kwargs):
+        """
+        Request to open on this url.
+
+        Arguments are optional parameters for url.
+
+        :param data: POST data
+        :type url: str or dict or None
+
+        >>> url = URL('http://exawple.org/(?P<pagename>).html')
+        >>> url.open(pagename='index')
+        """
+        r = self.browser.open(self.build(**kwargs), params=params, data=data)
+        return r.page or r
+
+    def build(self, **kwargs):
+        """
+        Build an url with the given arguments from URL's regexps.
+
+        :param param: Query string parameters
+
+        :rtype: :class:`str`
+        :raises: :class:`UrlNotResolvable` if unable to resolve a correct url with the given arguments.
+        """
+        browser = kwargs.pop('browser', self.browser)
+        params = kwargs.pop('params', None)
+        patterns = []
+        for url in self.urls:
+            patterns += normalize(url)
+
+        for pattern, _ in patterns:
+            url = pattern
+            # only use full-name substitutions, to allow % in URLs
+            for kwkey in kwargs.keys():  # need to use keys() because of pop()
+                search = '%%(%s)s' % kwkey
+                if search in pattern:
+                    url = url.replace(search, unicode(kwargs.pop(kwkey)))
+            # if there are named substitutions left, ignore pattern
+            if re.search('%\([A-z_]+\)s', url):
+                continue
+            # if not all kwargs were used
+            if len(kwargs):
+                continue
+
+            url = browser.absurl(url, base=True)
+            if params:
+                p = requests.models.PreparedRequest()
+                p.prepare_url(url, params)
+                url = p.url
+            return url
+
+        raise UrlNotResolvable('Unable to resolve URL with %r. Available are %s' % (kwargs, ', '.join([pattern for pattern, _ in patterns])))
+
+    def match(self, url, base=None):
+        """
+        Check if the given url match this object.
+        """
+        if base is None:
+            assert self.browser is not None
+            base = self.browser.BASEURL
+
+        for regex in self.urls:
+            if not re.match(r'^\w+://.*', regex):
+                regex = re.escape(base).rstrip('/') + '/' + regex.lstrip('/')
+            m = re.match(regex, url)
+            if m:
+                return m
+
+    def handle(self, response):
+        """
+        Handle a HTTP response to get an instance of the klass if it matches.
+        """
+        if self.klass is None:
+            return
+
+        m = self.match(response.url)
+        if m:
+            page = self.klass(self.browser, response, m.groupdict())
+            if hasattr(page, 'is_here'):
+                if callable(page.is_here):
+                    if page.is_here():
+                        return page
+                else:
+                    assert isinstance(page.is_here, basestring)
+                    if page.doc.xpath(page.is_here):
+                        return page
+            else:
+                return page
+
+    def id2url(self, func):
+        r"""
+        Helper decorator to get an URL if the given first parameter is an ID.
+        """
+        def inner(browser, id_or_url, *args, **kwargs):
+            if re.match('^https?://.*', id_or_url):
+                if not self.match(id_or_url, browser.BASEURL):
+                    return
+            else:
+                id_or_url = self.build(id=id_or_url, browser=browser)
+
+            return func(browser, id_or_url, *args, **kwargs)
+        return inner
+
+