From b4a428a2c88cff4e7d958ef7e486d82c36318f5d Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Wed, 11 Mar 2015 14:25:33 +0100 Subject: [PATCH] browser: Allow more control of the referrer --- weboob/browser/browsers.py | 26 ++++++++++++++++++-------- weboob/browser/profiles.py | 2 ++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/weboob/browser/browsers.py b/weboob/browser/browsers.py index cb912518..5a69058b 100644 --- a/weboob/browser/browsers.py +++ b/weboob/browser/browsers.py @@ -96,6 +96,11 @@ class Browser(object): Saved state variables. """ + ALLOW_REFERRER = True + """ + Controls the behavior of get_referrer. + """ + @classmethod def asset(cls, localfile): """ @@ -451,6 +456,11 @@ class Browser(object): Reference: https://en.wikipedia.org/wiki/HTTP_referer + The behavior can be controlled through the ALLOW_REFERRER attribute. + True always allows the referers + to be sent, False never, and None only if it is within + the same domain. + :param oldurl: Current absolute URL :type oldurl: str or None @@ -459,21 +469,21 @@ class Browser(object): :rtype: str or None """ + if self.ALLOW_REFERRER is False: + return if oldurl is None: - return None + return old = urlparse(oldurl) new = urlparse(newurl) # Do not leak secure URLs to insecure URLs if old.scheme == 'https' and new.scheme != 'https': - return None + return # Reloading the page. Usually no referrer. if oldurl == newurl: - return None - # TODO maybe implement some *optional* privacy features: - # * do not leak referrer to other domains (often breaks websites) - # * send a fake referrer (root of the current domain) - # * never send the referrer - # Inspired by the RefControl Firefox addon. + return + # Domain-based privacy + if self.ALLOW_REFERRER is None and old.netloc != new.netloc: + return return oldurl diff --git a/weboob/browser/profiles.py b/weboob/browser/profiles.py index 589121ba..f60abc58 100644 --- a/weboob/browser/profiles.py +++ b/weboob/browser/profiles.py @@ -80,6 +80,8 @@ class GoogleBot(Profile): """ Try to mimic Googlebot. Keep in mind there are ways to authenticate real Googlebot IPs. + + You will most likely want to set ALLOW_REFERRER to False. """ def setup_session(self, session):