browser2: Add support for relative URLs / BASEURL
This commit is contained in:
parent
60d1d03c1c
commit
b70d722c03
2 changed files with 89 additions and 7 deletions
|
|
@ -19,10 +19,15 @@
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import urlparse
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from requests.status_codes import codes
|
from requests.status_codes import codes
|
||||||
|
|
||||||
|
|
||||||
|
# TODO define __all__
|
||||||
|
|
||||||
|
|
||||||
class Profile(object):
|
class Profile(object):
|
||||||
"""
|
"""
|
||||||
A profile represents the way Browser should act.
|
A profile represents the way Browser should act.
|
||||||
|
|
@ -96,7 +101,12 @@ class Wget(Profile):
|
||||||
session.config['keep_alive'] = True
|
session.config['keep_alive'] = True
|
||||||
|
|
||||||
|
|
||||||
class Browser(object):
|
class BaseBrowser(object):
|
||||||
|
"""
|
||||||
|
Simple browser class.
|
||||||
|
Act like a browser, and don't try to do too much.
|
||||||
|
"""
|
||||||
|
|
||||||
PROFILE = Firefox()
|
PROFILE = Firefox()
|
||||||
TIMEOUT = 10.0
|
TIMEOUT = 10.0
|
||||||
|
|
||||||
|
|
@ -107,6 +117,9 @@ class Browser(object):
|
||||||
self.response = None
|
self.response = None
|
||||||
|
|
||||||
def _setup_session(self, profile):
|
def _setup_session(self, profile):
|
||||||
|
"""
|
||||||
|
Set up a python-requests session for our usage.
|
||||||
|
"""
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
|
|
||||||
# Raise exceptions on HTTP errors
|
# Raise exceptions on HTTP errors
|
||||||
|
|
@ -173,10 +186,16 @@ class Browser(object):
|
||||||
def open(self, url, data=None, fix_redirect=True, **kwargs):
|
def open(self, url, data=None, fix_redirect=True, **kwargs):
|
||||||
"""
|
"""
|
||||||
Wrapper around request().
|
Wrapper around request().
|
||||||
Makes a GET request, or a POST if data is provided.
|
Makes a GET request, or a POST if data is not None.
|
||||||
|
An empty data *will* make a post.
|
||||||
|
|
||||||
Call this if you do not want to "visit" the URL (for instance, you
|
Call this if you do not want to "visit" the URL (for instance, you
|
||||||
are downloading a file).
|
are downloading a file).
|
||||||
|
|
||||||
|
:param url: URL
|
||||||
|
:type url: str
|
||||||
|
|
||||||
|
:rtype: :class:`requests.Response`
|
||||||
"""
|
"""
|
||||||
method = kwargs.pop('method', None)
|
method = kwargs.pop('method', None)
|
||||||
if method is None:
|
if method is None:
|
||||||
|
|
@ -205,3 +224,44 @@ class Browser(object):
|
||||||
kwargs.setdefault('headers', {}).setdefault('Content-Length', '0')
|
kwargs.setdefault('headers', {}).setdefault('Content-Length', '0')
|
||||||
kwargs.setdefault('timeout', self.TIMEOUT)
|
kwargs.setdefault('timeout', self.TIMEOUT)
|
||||||
return self.session.request(*args, **kwargs)
|
return self.session.request(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class DomainBrowser(BaseBrowser):
|
||||||
|
"""
|
||||||
|
A browser that handles relative URLs.
|
||||||
|
|
||||||
|
For instance self.location('/hello') will get http://weboob.org/hello
|
||||||
|
if BASEURL is 'http://weboob.org/'.
|
||||||
|
"""
|
||||||
|
|
||||||
|
BASEURL = None
|
||||||
|
"""
|
||||||
|
Base URL, e.g. 'http://weboob.org/' or 'https://weboob.org/'
|
||||||
|
See absurl().
|
||||||
|
"""
|
||||||
|
|
||||||
|
def absurl(self, uri, base=None):
|
||||||
|
"""
|
||||||
|
Get the absolute URL, relative to the base URL.
|
||||||
|
If BASEURL is None, it will try to use the current URL.
|
||||||
|
If base is False, it will always try to use the current URL.
|
||||||
|
|
||||||
|
:param uri: URI to make absolute. It can be already absolute.
|
||||||
|
:type uri: str
|
||||||
|
|
||||||
|
:param base: Base absolute URL.
|
||||||
|
:type base: str or None or False
|
||||||
|
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
if base is None:
|
||||||
|
base = self.BASEURL
|
||||||
|
if base is None or base is False:
|
||||||
|
base = self.url
|
||||||
|
return urlparse.urljoin(base, uri)
|
||||||
|
|
||||||
|
def open(self, uri, *args, **kwargs):
|
||||||
|
return BaseBrowser.open(self, self.absurl(uri), *args, **kwargs)
|
||||||
|
|
||||||
|
def home(self):
|
||||||
|
return self.location('/')
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
from .browser import Browser, Weboob
|
from .browser import BaseBrowser, DomainBrowser, Weboob
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
@ -27,7 +27,7 @@ from nose.plugins.skip import SkipTest
|
||||||
|
|
||||||
|
|
||||||
def test_base():
|
def test_base():
|
||||||
b = Browser()
|
b = BaseBrowser()
|
||||||
r = b.location('http://httpbin.org/headers')
|
r = b.location('http://httpbin.org/headers')
|
||||||
assert isinstance(r.text, unicode)
|
assert isinstance(r.text, unicode)
|
||||||
assert 'Firefox' in r.text
|
assert 'Firefox' in r.text
|
||||||
|
|
@ -37,7 +37,7 @@ def test_base():
|
||||||
|
|
||||||
|
|
||||||
def test_redirects():
|
def test_redirects():
|
||||||
b = Browser()
|
b = BaseBrowser()
|
||||||
b.location('http://httpbin.org/redirect/1')
|
b.location('http://httpbin.org/redirect/1')
|
||||||
assert b.url == 'http://httpbin.org/get'
|
assert b.url == 'http://httpbin.org/get'
|
||||||
|
|
||||||
|
|
@ -47,7 +47,7 @@ def test_brokenpost():
|
||||||
Tests _fix_redirect()
|
Tests _fix_redirect()
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
b = Browser()
|
b = BaseBrowser()
|
||||||
# postbin is picky with empty posts. that's good!
|
# postbin is picky with empty posts. that's good!
|
||||||
r = b.location('http://www.postbin.org/', {})
|
r = b.location('http://www.postbin.org/', {})
|
||||||
# ensures empty data (but not None) does a POST
|
# ensures empty data (but not None) does a POST
|
||||||
|
|
@ -67,10 +67,32 @@ def test_brokenpost():
|
||||||
|
|
||||||
|
|
||||||
def test_weboob():
|
def test_weboob():
|
||||||
class BooBrowser(Browser):
|
class BooBrowser(BaseBrowser):
|
||||||
PROFILE = Weboob('0.0')
|
PROFILE = Weboob('0.0')
|
||||||
|
|
||||||
b = BooBrowser()
|
b = BooBrowser()
|
||||||
r = b.location('http://httpbin.org/headers')
|
r = b.location('http://httpbin.org/headers')
|
||||||
assert 'weboob/0.0' in r.text
|
assert 'weboob/0.0' in r.text
|
||||||
assert 'identity' in r.text
|
assert 'identity' in r.text
|
||||||
|
|
||||||
|
|
||||||
|
def test_relative():
|
||||||
|
b = DomainBrowser()
|
||||||
|
b.location('http://httpbin.org/')
|
||||||
|
b.location('/ip')
|
||||||
|
assert b.url == 'http://httpbin.org/ip'
|
||||||
|
|
||||||
|
assert b.absurl('/ip') == 'http://httpbin.org/ip'
|
||||||
|
b.location('http://www.postbin.org/')
|
||||||
|
assert b.absurl('/ip') == 'http://www.postbin.org/ip'
|
||||||
|
b.BASEURL = 'http://httpbin.org/aaaaaa'
|
||||||
|
assert b.absurl('/ip') == 'http://httpbin.org/ip'
|
||||||
|
assert b.absurl('ip') == 'http://httpbin.org/ip'
|
||||||
|
assert b.absurl('/ip', False) == 'http://www.postbin.org/ip'
|
||||||
|
b.home()
|
||||||
|
assert b.url == 'http://httpbin.org/'
|
||||||
|
b.BASEURL = 'http://httpbin.org/aaaaaa/'
|
||||||
|
assert b.absurl('/') == 'http://httpbin.org/'
|
||||||
|
assert b.absurl('/bb') == 'http://httpbin.org/bb'
|
||||||
|
assert b.absurl('') == 'http://httpbin.org/aaaaaa/'
|
||||||
|
assert b.absurl('bb') == 'http://httpbin.org/aaaaaa/bb'
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue