diff --git a/weboob/tools/browser2/browser.py b/weboob/tools/browser2/browser.py index e385d0ff..3e5fca41 100644 --- a/weboob/tools/browser2/browser.py +++ b/weboob/tools/browser2/browser.py @@ -19,10 +19,15 @@ from __future__ import absolute_import +import urlparse + import requests from requests.status_codes import codes +# TODO define __all__ + + class Profile(object): """ A profile represents the way Browser should act. @@ -96,7 +101,12 @@ class Wget(Profile): session.config['keep_alive'] = True -class Browser(object): +class BaseBrowser(object): + """ + Simple browser class. + Act like a browser, and don't try to do too much. + """ + PROFILE = Firefox() TIMEOUT = 10.0 @@ -107,6 +117,9 @@ class Browser(object): self.response = None def _setup_session(self, profile): + """ + Set up a python-requests session for our usage. + """ session = requests.Session() # Raise exceptions on HTTP errors @@ -173,10 +186,16 @@ class Browser(object): def open(self, url, data=None, fix_redirect=True, **kwargs): """ Wrapper around request(). - Makes a GET request, or a POST if data is provided. + Makes a GET request, or a POST if data is not None. + An empty data *will* make a post. Call this if you do not want to "visit" the URL (for instance, you are downloading a file). + + :param url: URL + :type url: str + + :rtype: :class:`requests.Response` """ method = kwargs.pop('method', None) if method is None: @@ -205,3 +224,44 @@ class Browser(object): kwargs.setdefault('headers', {}).setdefault('Content-Length', '0') kwargs.setdefault('timeout', self.TIMEOUT) return self.session.request(*args, **kwargs) + + +class DomainBrowser(BaseBrowser): + """ + A browser that handles relative URLs. + + For instance self.location('/hello') will get http://weboob.org/hello + if BASEURL is 'http://weboob.org/'. + """ + + BASEURL = None + """ + Base URL, e.g. 'http://weboob.org/' or 'https://weboob.org/' + See absurl(). + """ + + def absurl(self, uri, base=None): + """ + Get the absolute URL, relative to the base URL. + If BASEURL is None, it will try to use the current URL. + If base is False, it will always try to use the current URL. + + :param uri: URI to make absolute. It can be already absolute. + :type uri: str + + :param base: Base absolute URL. + :type base: str or None or False + + :rtype: str + """ + if base is None: + base = self.BASEURL + if base is None or base is False: + base = self.url + return urlparse.urljoin(base, uri) + + def open(self, uri, *args, **kwargs): + return BaseBrowser.open(self, self.absurl(uri), *args, **kwargs) + + def home(self): + return self.location('/') diff --git a/weboob/tools/browser2/test.py b/weboob/tools/browser2/test.py index 7215cd29..e82a1052 100644 --- a/weboob/tools/browser2/test.py +++ b/weboob/tools/browser2/test.py @@ -19,7 +19,7 @@ from __future__ import absolute_import -from .browser import Browser, Weboob +from .browser import BaseBrowser, DomainBrowser, Weboob import requests @@ -27,7 +27,7 @@ from nose.plugins.skip import SkipTest def test_base(): - b = Browser() + b = BaseBrowser() r = b.location('http://httpbin.org/headers') assert isinstance(r.text, unicode) assert 'Firefox' in r.text @@ -37,7 +37,7 @@ def test_base(): def test_redirects(): - b = Browser() + b = BaseBrowser() b.location('http://httpbin.org/redirect/1') assert b.url == 'http://httpbin.org/get' @@ -47,7 +47,7 @@ def test_brokenpost(): Tests _fix_redirect() """ try: - b = Browser() + b = BaseBrowser() # postbin is picky with empty posts. that's good! r = b.location('http://www.postbin.org/', {}) # ensures empty data (but not None) does a POST @@ -67,10 +67,32 @@ def test_brokenpost(): def test_weboob(): - class BooBrowser(Browser): + class BooBrowser(BaseBrowser): PROFILE = Weboob('0.0') b = BooBrowser() r = b.location('http://httpbin.org/headers') assert 'weboob/0.0' in r.text assert 'identity' in r.text + + +def test_relative(): + b = DomainBrowser() + b.location('http://httpbin.org/') + b.location('/ip') + assert b.url == 'http://httpbin.org/ip' + + assert b.absurl('/ip') == 'http://httpbin.org/ip' + b.location('http://www.postbin.org/') + assert b.absurl('/ip') == 'http://www.postbin.org/ip' + b.BASEURL = 'http://httpbin.org/aaaaaa' + assert b.absurl('/ip') == 'http://httpbin.org/ip' + assert b.absurl('ip') == 'http://httpbin.org/ip' + assert b.absurl('/ip', False) == 'http://www.postbin.org/ip' + b.home() + assert b.url == 'http://httpbin.org/' + b.BASEURL = 'http://httpbin.org/aaaaaa/' + assert b.absurl('/') == 'http://httpbin.org/' + assert b.absurl('/bb') == 'http://httpbin.org/bb' + assert b.absurl('') == 'http://httpbin.org/aaaaaa/' + assert b.absurl('bb') == 'http://httpbin.org/aaaaaa/bb'