Add base URL restriction

For security reasons (SSL only, no leakage, etc.)
This commit is contained in:
Laurent Bachelier 2012-12-03 21:03:08 +01:00 committed by Romain Bignon
commit 1ff07273b3
2 changed files with 66 additions and 2 deletions

View file

@ -389,6 +389,10 @@ class BaseBrowser(object):
return oldurl
class UrlNotAllowed(Exception):
pass
class DomainBrowser(BaseBrowser):
"""
A browser that handles relative URLs and can have a base URL (usually a domain).
@ -403,6 +407,33 @@ class DomainBrowser(BaseBrowser):
See absurl().
"""
"""
URLs allowed to load.
This can be used to force SSL (if the BASEURL is SSL) or any other leakage.
Set to True to allow only URLs starting by the BASEURL.
Set it to a list of allowed URLs if you have multiple allowed URLs.
More complex behavior is possible by overloading url_allowed()
"""
RESTRICT_URL = False
def url_allowed(self, url):
"""
Checks if we are allowed to visit an URL.
See RESTRICT_URL.
:param url: Absolute URL
:type url: str
:rtype: bool
"""
if self.BASEURL is None or self.RESTRICT_URL is False:
return True
if self.RESTRICT_URL is True:
return url.startswith(self.BASEURL)
for restrict_url in self.RESTRICT_URL:
if url.startswith(restrict_url):
return True
return False
def absurl(self, uri, base=None):
"""
Get the absolute URL, relative to the base URL.
@ -424,7 +455,10 @@ class DomainBrowser(BaseBrowser):
return urljoin(base, uri)
def open(self, uri, *args, **kwargs):
return super(DomainBrowser, self).open(self.absurl(uri), *args, **kwargs)
url = self.absurl(uri)
if not self.url_allowed(url):
raise UrlNotAllowed(url)
return super(DomainBrowser, self).open(url, *args, **kwargs)
def home(self):
"""

View file

@ -26,8 +26,9 @@ import string
from requests import HTTPError
from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
from .browser import BaseBrowser, DomainBrowser, Weboob
from .browser import BaseBrowser, DomainBrowser, Weboob, UrlNotAllowed
from .cookiejar import CookieJar, CookiePolicy
from .cookies import Cookies
@ -222,6 +223,35 @@ def test_relative():
assert b.absurl('//example.com/aaa/bbb') == 'https://example.com/aaa/bbb'
def test_allow_url():
b = DomainBrowser()
b.RESTRICT_URL = True
assert b.url_allowed('http://example.com/')
assert b.url_allowed('http://example.net/')
b.BASEURL = 'http://example.com/'
assert b.url_allowed('http://example.com/')
assert b.url_allowed('http://example.com/aaa')
assert not b.url_allowed('https://example.com/')
assert not b.url_allowed('http://example.net/')
assert not b.url_allowed('http://')
b.BASEURL = 'https://example.com/'
assert not b.url_allowed('http://example.com/')
assert not b.url_allowed('http://example.com/aaa')
assert b.url_allowed('https://example.com/')
assert b.url_allowed('https://example.com/aaa/bbb')
b.RESTRICT_URL = ['https://example.com/', 'http://example.com/']
assert b.url_allowed('http://example.com/aaa/bbb')
assert b.url_allowed('https://example.com/aaa/bbb')
assert not b.url_allowed('http://example.net/aaa/bbb')
assert not b.url_allowed('https://example.net/aaa/bbb')
assert_raises(UrlNotAllowed, b.location, 'http://example.net/')
assert_raises(UrlNotAllowed, b.open, 'http://example.net/')
def test_changereq():
"""
Test overloading request defaults