browser: Allow more control of the referrer

This commit is contained in:
Laurent Bachelier 2015-03-11 14:25:33 +01:00 committed by Romain Bignon
commit b4a428a2c8
2 changed files with 20 additions and 8 deletions

View file

@ -96,6 +96,11 @@ class Browser(object):
Saved state variables. Saved state variables.
""" """
ALLOW_REFERRER = True
"""
Controls the behavior of get_referrer.
"""
@classmethod @classmethod
def asset(cls, localfile): def asset(cls, localfile):
""" """
@ -451,6 +456,11 @@ class Browser(object):
Reference: https://en.wikipedia.org/wiki/HTTP_referer Reference: https://en.wikipedia.org/wiki/HTTP_referer
The behavior can be controlled through the ALLOW_REFERRER attribute.
True always allows the referers
to be sent, False never, and None only if it is within
the same domain.
:param oldurl: Current absolute URL :param oldurl: Current absolute URL
:type oldurl: str or None :type oldurl: str or None
@ -459,21 +469,21 @@ class Browser(object):
:rtype: str or None :rtype: str or None
""" """
if self.ALLOW_REFERRER is False:
return
if oldurl is None: if oldurl is None:
return None return
old = urlparse(oldurl) old = urlparse(oldurl)
new = urlparse(newurl) new = urlparse(newurl)
# Do not leak secure URLs to insecure URLs # Do not leak secure URLs to insecure URLs
if old.scheme == 'https' and new.scheme != 'https': if old.scheme == 'https' and new.scheme != 'https':
return None return
# Reloading the page. Usually no referrer. # Reloading the page. Usually no referrer.
if oldurl == newurl: if oldurl == newurl:
return None return
# TODO maybe implement some *optional* privacy features: # Domain-based privacy
# * do not leak referrer to other domains (often breaks websites) if self.ALLOW_REFERRER is None and old.netloc != new.netloc:
# * send a fake referrer (root of the current domain) return
# * never send the referrer
# Inspired by the RefControl Firefox addon.
return oldurl return oldurl

View file

@ -80,6 +80,8 @@ class GoogleBot(Profile):
""" """
Try to mimic Googlebot. Try to mimic Googlebot.
Keep in mind there are ways to authenticate real Googlebot IPs. Keep in mind there are ways to authenticate real Googlebot IPs.
You will most likely want to set ALLOW_REFERRER to False.
""" """
def setup_session(self, session): def setup_session(self, session):