browser2: Separate CookieJar from CookiePolicy

And avoid methods starting by _!
This commit is contained in:
Laurent Bachelier 2012-04-17 05:35:52 +02:00 committed by Romain Bignon
commit 1c404639c1
3 changed files with 73 additions and 61 deletions

View file

@ -25,7 +25,7 @@ import requests
from requests.status_codes import codes from requests.status_codes import codes
from copy import deepcopy from copy import deepcopy
from .cookiejar import CookieJar from .cookiejar import CookieJar, CookiePolicy
# TODO define __all__ # TODO define __all__
@ -114,20 +114,21 @@ class BaseBrowser(object):
PROFILE = Firefox() PROFILE = Firefox()
TIMEOUT = 10.0 TIMEOUT = 10.0
COOKIE_POLICY = CookiePolicy()
def __init__(self): def __init__(self):
self._setup_session(self.PROFILE) self._setup_session(self.PROFILE)
self._setup_cookies() self._setup_cookies(self.COOKIE_POLICY)
self.url = None self.url = None
self.response = None self.response = None
def _setup_cookies(self): def _setup_cookies(self, policy):
""" """
Create and configure a cookie jar. Create and configure a cookie jar.
Overload this method to set custom options, or even change the class. Overload this method to set custom options, or even change the class.
""" """
self.cookies = CookieJar() self.cookies = CookieJar(policy)
def _setup_session(self, profile): def _setup_session(self, profile):
""" """
@ -237,7 +238,7 @@ class BaseBrowser(object):
referrer = orig_referrer referrer = orig_referrer
else: else:
# Guess from last response # Guess from last response
referrer = self._get_referrer(response.url, url) referrer = self.get_referrer(response.url, url)
call_args = deepcopy(orig_args) call_args = deepcopy(orig_args)
response = self.open(url, referrer=referrer, **call_args) response = self.open(url, referrer=referrer, **call_args)
@ -322,7 +323,7 @@ class BaseBrowser(object):
kwargs['allow_redirects'] = False kwargs['allow_redirects'] = False
if referrer is None: if referrer is None:
referrer = self._get_referrer(self.url, url) referrer = self.get_referrer(self.url, url)
if referrer: if referrer:
# Yes, it is a misspelling. # Yes, it is a misspelling.
kwargs.setdefault('headers', {}).setdefault('Referer', referrer) kwargs.setdefault('headers', {}).setdefault('Referer', referrer)
@ -352,7 +353,7 @@ class BaseBrowser(object):
return response return response
def _get_referrer(self, oldurl, newurl): def get_referrer(self, oldurl, newurl):
""" """
Get the referrer to send when doing a request. Get the referrer to send when doing a request.
If we should not send a referrer, it will return None. If we should not send a referrer, it will return None.

View file

@ -39,17 +39,9 @@ Cookie.attribute_parsers['domain'] = parse_domain
Cookie.attribute_validators['domain'] = valid_domain Cookie.attribute_validators['domain'] = valid_domain
class CookieJar(object): class CookiePolicy(object):
""" """
Manage Cookies like a real browser, with security and privacy in mind. Defines how cookies are accepted, and what to do with them.
python-requests accepts cookies blindly,
Expirations are not taken into account,
it can't handle the server asking to delete a cookie,
and sends cookies even when changing domains!
Of course, secure (SSL only) cookies aren't handled either.
This class fixes all that.
""" """
ACCEPT_DOMAINS = [] ACCEPT_DOMAINS = []
@ -89,13 +81,7 @@ class CookieJar(object):
This is only for setting cookies; it should be relatively safe in Weboob. This is only for setting cookies; it should be relatively safe in Weboob.
""" """
def __init__(self): def domain_match(self, pattern, domain):
"""
Cookies are delicious delicacies.
"""
self.cookies = dict()
def _domain_match(self, pattern, domain):
""" """
Checks a domain matches a domain pattern. Checks a domain matches a domain pattern.
Patterns can be either the exact domain, or a wildcard (starting with a dot). Patterns can be either the exact domain, or a wildcard (starting with a dot).
@ -111,7 +97,7 @@ class CookieJar(object):
return domain.endswith(pattern) return domain.endswith(pattern)
return domain == pattern return domain == pattern
def _domain_match_list(self, patterns, domain): def domain_match_list(self, patterns, domain):
""" """
Checks domains match, from a list of patters. Checks domains match, from a list of patters.
If the list of patterns is True, it always matches. If the list of patterns is True, it always matches.
@ -123,17 +109,17 @@ class CookieJar(object):
if patterns is True: if patterns is True:
return True return True
for pattern in patterns: for pattern in patterns:
if self._domain_match(pattern, domain): if self.domain_match(pattern, domain):
return True return True
return False return False
def _can_set(self, cookie, url): def can_set(self, cookie, url):
""" """
Checks an URL can set a particular cookie. Checks an URL can set a particular cookie.
See ACCEPT_DOMAINS, REJECT_DOMAINS to set exceptions. See ACCEPT_DOMAINS, REJECT_DOMAINS to set exceptions.
The cookie must have a domain already set, you can The cookie must have a domain already set, you can
use _normalize_cookie() for that. use normalize_cookie() for that.
:param cookie: The cookie the server set :param cookie: The cookie the server set
:type cookie: Cookie :type cookie: Cookie
@ -146,9 +132,9 @@ class CookieJar(object):
domain = url.hostname domain = url.hostname
# Accept/reject overrides # Accept/reject overrides
if self._domain_match_list(self.ACCEPT_DOMAINS, domain): if self.domain_match_list(self.ACCEPT_DOMAINS, domain):
return True return True
if self._domain_match_list(self.REJECT_DOMAINS, domain): if self.domain_match_list(self.REJECT_DOMAINS, domain):
return False return False
# check path # check path
@ -173,7 +159,7 @@ class CookieJar(object):
return False return False
def _normalize_cookie(self, cookie, url, now=None): def normalize_cookie(self, cookie, url, now=None):
""" """
Update a cookie we got from the response. Update a cookie we got from the response.
The goal is to have data relevant for use in future requests. The goal is to have data relevant for use in future requests.
@ -196,9 +182,34 @@ class CookieJar(object):
now = datetime.now() now = datetime.now()
cookie.expires = now + timedelta(seconds=cookie.max_age) cookie.expires = now + timedelta(seconds=cookie.max_age)
if url.scheme == 'https' \ if url.scheme == 'https' \
and self._domain_match_list(self.SECURE_DOMAINS, cookie.domain): and self.domain_match_list(self.SECURE_DOMAINS, cookie.domain):
cookie.secure = True cookie.secure = True
class CookieJar(object):
"""
Manage Cookies like a real browser, with security and privacy in mind.
python-requests accepts cookies blindly,
Expirations are not taken into account,
it can't handle the server asking to delete a cookie,
and sends cookies even when changing domains!
Of course, secure (SSL only) cookies aren't handled either.
This behavior depends on a `policy` class.
This class fixes all that.
"""
def __init__(self, policy):
"""
Cookies are delicious delicacies.
:type: :class:`CookiePolicy`
"""
self.cookies = dict()
self.policy = policy
def from_response(self, response): def from_response(self, response):
""" """
Import cookies from the response. Import cookies from the response.
@ -208,8 +219,8 @@ class CookieJar(object):
if 'Set-Cookie' in response.headers: if 'Set-Cookie' in response.headers:
cs = Cookies.from_response(response.headers['Set-Cookie'], True) cs = Cookies.from_response(response.headers['Set-Cookie'], True)
for c in cs.itervalues(): for c in cs.itervalues():
self._normalize_cookie(c, response.url) self.policy.normalize_cookie(c, response.url)
if self._can_set(c, response.url): if self.policy.can_set(c, response.url):
self.set(c) self.set(c)
def for_request(self, url, now=None): def for_request(self, url, now=None):
@ -281,7 +292,7 @@ class CookieJar(object):
""" """
for cdomain, cpaths in self.cookies.iteritems(): for cdomain, cpaths in self.cookies.iteritems():
# domain matches (all domains if None) # domain matches (all domains if None)
if domain is None or self._domain_match(cdomain, domain): if domain is None or self.policy.domain_match(cdomain, domain):
for cpath, cnames in cpaths.iteritems(): for cpath, cnames in cpaths.iteritems():
# path matches (all if None) # path matches (all if None)
if path is None or path.startswith(cpath): if path is None or path.startswith(cpath):

View file

@ -28,7 +28,7 @@ from requests import HTTPError
from nose.plugins.skip import SkipTest from nose.plugins.skip import SkipTest
from .browser import BaseBrowser, DomainBrowser, Weboob from .browser import BaseBrowser, DomainBrowser, Weboob
from .cookiejar import CookieJar from .cookiejar import CookieJar, CookiePolicy
from .cookies import Cookies from .cookies import Cookies
from weboob.tools.json import json from weboob.tools.json import json
@ -251,14 +251,14 @@ def test_referrer():
r = b.location(HTTPBIN + 'headers', referrer=False) r = b.location(HTTPBIN + 'headers', referrer=False)
assert 'Referer' not in json.loads(r.text)['headers'] assert 'Referer' not in json.loads(r.text)['headers']
assert b._get_referrer('https://example.com/', 'http://example.com/') is None assert b.get_referrer('https://example.com/', 'http://example.com/') is None
def test_cookieparse(): def test_cookiepolicy():
""" """
Test cookie parsing and processing Test cookie parsing and processing
""" """
cj = CookieJar() policy = CookiePolicy()
def bc(data): def bc(data):
""" """
@ -267,44 +267,44 @@ def test_cookieparse():
cs = Cookies() cs = Cookies()
cs.parse_response(data) cs.parse_response(data)
for c in cs.itervalues(): for c in cs.itervalues():
cj._normalize_cookie(c, 'http://example.com/') policy.normalize_cookie(c, 'http://example.com/')
return c return c
# parse max-age # parse max-age
assert bc('__bwid=58244366; max-age=42; path=/').expires assert bc('__bwid=58244366; max-age=42; path=/').expires
# security for received cookies # security for received cookies
assert cj._can_set(bc('k=v; domain=www.example.com'), assert policy.can_set(bc('k=v; domain=www.example.com'),
'http://www.example.com/') 'http://www.example.com/')
assert cj._can_set(bc('k=v; domain=sub.example.com'), assert policy.can_set(bc('k=v; domain=sub.example.com'),
'http://www.example.com/') 'http://www.example.com/')
assert cj._can_set(bc('k=v; domain=sub.example.com'), assert policy.can_set(bc('k=v; domain=sub.example.com'),
'http://example.com/') 'http://example.com/')
assert cj._can_set(bc('k=v; domain=.example.com'), assert policy.can_set(bc('k=v; domain=.example.com'),
'http://example.com/') 'http://example.com/')
assert cj._can_set(bc('k=v; domain=www.example.com'), assert policy.can_set(bc('k=v; domain=www.example.com'),
'http://example.com/') 'http://example.com/')
assert not cj._can_set(bc('k=v; domain=example.com'), assert not policy.can_set(bc('k=v; domain=example.com'),
'http://example.net/') 'http://example.net/')
assert not cj._can_set(bc('k=v; domain=.net'), assert not policy.can_set(bc('k=v; domain=.net'),
'http://example.net/') 'http://example.net/')
assert not cj._can_set(bc('k=v; domain=www.example.net'), assert not policy.can_set(bc('k=v; domain=www.example.net'),
'http://www.example.com/') 'http://www.example.com/')
assert not cj._can_set(bc('k=v; domain=wwwexample.com'), assert not policy.can_set(bc('k=v; domain=wwwexample.com'),
'http://example.com/') 'http://example.com/')
assert not cj._can_set(bc('k=v; domain=.example.com'), assert not policy.can_set(bc('k=v; domain=.example.com'),
'http://wwwexample.com/') 'http://wwwexample.com/')
# pattern matching domains # pattern matching domains
assert not cj._domain_match('example.com', 's.example.com') assert not policy.domain_match('example.com', 's.example.com')
assert cj._domain_match('.example.com', 's.example.com') assert policy.domain_match('.example.com', 's.example.com')
assert not cj._domain_match('.example.com', 'example.com') # yep. assert not policy.domain_match('.example.com', 'example.com') # yep.
assert cj._domain_match('s.example.com', 's.example.com') assert policy.domain_match('s.example.com', 's.example.com')
assert not cj._domain_match('s.example.com', 's2.example.com') assert not policy.domain_match('s.example.com', 's2.example.com')
assert cj._domain_match_list(True, 'example.com') assert policy.domain_match_list(True, 'example.com')
assert not cj._domain_match_list([], 'example.com') assert not policy.domain_match_list([], 'example.com')
assert cj._domain_match_list(['example.net', 'example.com'], 'example.com') assert policy.domain_match_list(['example.net', 'example.com'], 'example.com')
assert not cj._domain_match_list(['example.net', 'example.org'], 'example.com') assert not policy.domain_match_list(['example.net', 'example.org'], 'example.com')
def test_cookiejar(): def test_cookiejar():
@ -327,7 +327,7 @@ def test_cookiejar():
cookie3 = bc('k=v3; domain=www.example.com; path=/lol/cat/') cookie3 = bc('k=v3; domain=www.example.com; path=/lol/cat/')
cookie4 = bc('k=v4; domain=www.example.com; path=/lol/') cookie4 = bc('k=v4; domain=www.example.com; path=/lol/')
cj = CookieJar() cj = CookieJar(CookiePolicy())
cj.set(cookie0) cj.set(cookie0)
cj.set(cookie1) cj.set(cookie1)
cj.set(cookie2) cj.set(cookie2)
@ -400,7 +400,7 @@ def test_buildcookie():
""" """
Test easy cookie building Test easy cookie building
""" """
cj = CookieJar() cj = CookieJar(CookiePolicy())
c = cj.build('kk', 'vv', 'http://example.com/') c = cj.build('kk', 'vv', 'http://example.com/')
assert c.domain == 'example.com' assert c.domain == 'example.com'
assert not c.secure assert not c.secure