browser2: Add support for relative URLs / BASEURL

This commit is contained in:
Laurent Bachelier 2012-03-27 03:28:23 +02:00 committed by Romain Bignon
commit b70d722c03
2 changed files with 89 additions and 7 deletions

View file

@ -19,10 +19,15 @@
from __future__ import absolute_import
import urlparse
import requests
from requests.status_codes import codes
# TODO define __all__
class Profile(object):
"""
A profile represents the way Browser should act.
@ -96,7 +101,12 @@ class Wget(Profile):
session.config['keep_alive'] = True
class Browser(object):
class BaseBrowser(object):
"""
Simple browser class.
Act like a browser, and don't try to do too much.
"""
PROFILE = Firefox()
TIMEOUT = 10.0
@ -107,6 +117,9 @@ class Browser(object):
self.response = None
def _setup_session(self, profile):
"""
Set up a python-requests session for our usage.
"""
session = requests.Session()
# Raise exceptions on HTTP errors
@ -173,10 +186,16 @@ class Browser(object):
def open(self, url, data=None, fix_redirect=True, **kwargs):
"""
Wrapper around request().
Makes a GET request, or a POST if data is provided.
Makes a GET request, or a POST if data is not None.
An empty data *will* make a post.
Call this if you do not want to "visit" the URL (for instance, you
are downloading a file).
:param url: URL
:type url: str
:rtype: :class:`requests.Response`
"""
method = kwargs.pop('method', None)
if method is None:
@ -205,3 +224,44 @@ class Browser(object):
kwargs.setdefault('headers', {}).setdefault('Content-Length', '0')
kwargs.setdefault('timeout', self.TIMEOUT)
return self.session.request(*args, **kwargs)
class DomainBrowser(BaseBrowser):
"""
A browser that handles relative URLs.
For instance self.location('/hello') will get http://weboob.org/hello
if BASEURL is 'http://weboob.org/'.
"""
BASEURL = None
"""
Base URL, e.g. 'http://weboob.org/' or 'https://weboob.org/'
See absurl().
"""
def absurl(self, uri, base=None):
"""
Get the absolute URL, relative to the base URL.
If BASEURL is None, it will try to use the current URL.
If base is False, it will always try to use the current URL.
:param uri: URI to make absolute. It can be already absolute.
:type uri: str
:param base: Base absolute URL.
:type base: str or None or False
:rtype: str
"""
if base is None:
base = self.BASEURL
if base is None or base is False:
base = self.url
return urlparse.urljoin(base, uri)
def open(self, uri, *args, **kwargs):
return BaseBrowser.open(self, self.absurl(uri), *args, **kwargs)
def home(self):
return self.location('/')

View file

@ -19,7 +19,7 @@
from __future__ import absolute_import
from .browser import Browser, Weboob
from .browser import BaseBrowser, DomainBrowser, Weboob
import requests
@ -27,7 +27,7 @@ from nose.plugins.skip import SkipTest
def test_base():
b = Browser()
b = BaseBrowser()
r = b.location('http://httpbin.org/headers')
assert isinstance(r.text, unicode)
assert 'Firefox' in r.text
@ -37,7 +37,7 @@ def test_base():
def test_redirects():
b = Browser()
b = BaseBrowser()
b.location('http://httpbin.org/redirect/1')
assert b.url == 'http://httpbin.org/get'
@ -47,7 +47,7 @@ def test_brokenpost():
Tests _fix_redirect()
"""
try:
b = Browser()
b = BaseBrowser()
# postbin is picky with empty posts. that's good!
r = b.location('http://www.postbin.org/', {})
# ensures empty data (but not None) does a POST
@ -67,10 +67,32 @@ def test_brokenpost():
def test_weboob():
class BooBrowser(Browser):
class BooBrowser(BaseBrowser):
PROFILE = Weboob('0.0')
b = BooBrowser()
r = b.location('http://httpbin.org/headers')
assert 'weboob/0.0' in r.text
assert 'identity' in r.text
def test_relative():
b = DomainBrowser()
b.location('http://httpbin.org/')
b.location('/ip')
assert b.url == 'http://httpbin.org/ip'
assert b.absurl('/ip') == 'http://httpbin.org/ip'
b.location('http://www.postbin.org/')
assert b.absurl('/ip') == 'http://www.postbin.org/ip'
b.BASEURL = 'http://httpbin.org/aaaaaa'
assert b.absurl('/ip') == 'http://httpbin.org/ip'
assert b.absurl('ip') == 'http://httpbin.org/ip'
assert b.absurl('/ip', False) == 'http://www.postbin.org/ip'
b.home()
assert b.url == 'http://httpbin.org/'
b.BASEURL = 'http://httpbin.org/aaaaaa/'
assert b.absurl('/') == 'http://httpbin.org/'
assert b.absurl('/bb') == 'http://httpbin.org/bb'
assert b.absurl('') == 'http://httpbin.org/aaaaaa/'
assert b.absurl('bb') == 'http://httpbin.org/aaaaaa/bb'