From 1ff07273b340604f946f04ded344e274afd0a619 Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Mon, 3 Dec 2012 21:03:08 +0100 Subject: [PATCH] Add base URL restriction For security reasons (SSL only, no leakage, etc.) --- weboob/tools/browser2/browser.py | 36 +++++++++++++++++++++++++++++++- weboob/tools/browser2/test.py | 32 +++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/weboob/tools/browser2/browser.py b/weboob/tools/browser2/browser.py index 7461691a..4cbcdfca 100644 --- a/weboob/tools/browser2/browser.py +++ b/weboob/tools/browser2/browser.py @@ -389,6 +389,10 @@ class BaseBrowser(object): return oldurl +class UrlNotAllowed(Exception): + pass + + class DomainBrowser(BaseBrowser): """ A browser that handles relative URLs and can have a base URL (usually a domain). @@ -403,6 +407,33 @@ class DomainBrowser(BaseBrowser): See absurl(). """ + """ + URLs allowed to load. + This can be used to force SSL (if the BASEURL is SSL) or any other leakage. + Set to True to allow only URLs starting by the BASEURL. + Set it to a list of allowed URLs if you have multiple allowed URLs. + More complex behavior is possible by overloading url_allowed() + """ + RESTRICT_URL = False + + def url_allowed(self, url): + """ + Checks if we are allowed to visit an URL. + See RESTRICT_URL. + + :param url: Absolute URL + :type url: str + :rtype: bool + """ + if self.BASEURL is None or self.RESTRICT_URL is False: + return True + if self.RESTRICT_URL is True: + return url.startswith(self.BASEURL) + for restrict_url in self.RESTRICT_URL: + if url.startswith(restrict_url): + return True + return False + def absurl(self, uri, base=None): """ Get the absolute URL, relative to the base URL. @@ -424,7 +455,10 @@ class DomainBrowser(BaseBrowser): return urljoin(base, uri) def open(self, uri, *args, **kwargs): - return super(DomainBrowser, self).open(self.absurl(uri), *args, **kwargs) + url = self.absurl(uri) + if not self.url_allowed(url): + raise UrlNotAllowed(url) + return super(DomainBrowser, self).open(url, *args, **kwargs) def home(self): """ diff --git a/weboob/tools/browser2/test.py b/weboob/tools/browser2/test.py index 670285b1..1d3867f1 100644 --- a/weboob/tools/browser2/test.py +++ b/weboob/tools/browser2/test.py @@ -26,8 +26,9 @@ import string from requests import HTTPError from nose.plugins.skip import SkipTest +from nose.tools import assert_raises -from .browser import BaseBrowser, DomainBrowser, Weboob +from .browser import BaseBrowser, DomainBrowser, Weboob, UrlNotAllowed from .cookiejar import CookieJar, CookiePolicy from .cookies import Cookies @@ -222,6 +223,35 @@ def test_relative(): assert b.absurl('//example.com/aaa/bbb') == 'https://example.com/aaa/bbb' +def test_allow_url(): + b = DomainBrowser() + b.RESTRICT_URL = True + assert b.url_allowed('http://example.com/') + assert b.url_allowed('http://example.net/') + + b.BASEURL = 'http://example.com/' + assert b.url_allowed('http://example.com/') + assert b.url_allowed('http://example.com/aaa') + assert not b.url_allowed('https://example.com/') + assert not b.url_allowed('http://example.net/') + assert not b.url_allowed('http://') + + b.BASEURL = 'https://example.com/' + assert not b.url_allowed('http://example.com/') + assert not b.url_allowed('http://example.com/aaa') + assert b.url_allowed('https://example.com/') + assert b.url_allowed('https://example.com/aaa/bbb') + + b.RESTRICT_URL = ['https://example.com/', 'http://example.com/'] + assert b.url_allowed('http://example.com/aaa/bbb') + assert b.url_allowed('https://example.com/aaa/bbb') + assert not b.url_allowed('http://example.net/aaa/bbb') + assert not b.url_allowed('https://example.net/aaa/bbb') + + assert_raises(UrlNotAllowed, b.location, 'http://example.net/') + assert_raises(UrlNotAllowed, b.open, 'http://example.net/') + + def test_changereq(): """ Test overloading request defaults