diff --git a/weboob/tools/browser2/browser.py b/weboob/tools/browser2/browser.py index 57402be9..e857f719 100644 --- a/weboob/tools/browser2/browser.py +++ b/weboob/tools/browser2/browser.py @@ -25,7 +25,7 @@ import requests from requests.status_codes import codes from copy import deepcopy -from .cookiejar import CookieJar +from .cookiejar import CookieJar, CookiePolicy # TODO define __all__ @@ -114,20 +114,21 @@ class BaseBrowser(object): PROFILE = Firefox() TIMEOUT = 10.0 + COOKIE_POLICY = CookiePolicy() def __init__(self): self._setup_session(self.PROFILE) - self._setup_cookies() + self._setup_cookies(self.COOKIE_POLICY) self.url = None self.response = None - def _setup_cookies(self): + def _setup_cookies(self, policy): """ Create and configure a cookie jar. Overload this method to set custom options, or even change the class. """ - self.cookies = CookieJar() + self.cookies = CookieJar(policy) def _setup_session(self, profile): """ @@ -237,7 +238,7 @@ class BaseBrowser(object): referrer = orig_referrer else: # Guess from last response - referrer = self._get_referrer(response.url, url) + referrer = self.get_referrer(response.url, url) call_args = deepcopy(orig_args) response = self.open(url, referrer=referrer, **call_args) @@ -322,7 +323,7 @@ class BaseBrowser(object): kwargs['allow_redirects'] = False if referrer is None: - referrer = self._get_referrer(self.url, url) + referrer = self.get_referrer(self.url, url) if referrer: # Yes, it is a misspelling. kwargs.setdefault('headers', {}).setdefault('Referer', referrer) @@ -352,7 +353,7 @@ class BaseBrowser(object): return response - def _get_referrer(self, oldurl, newurl): + def get_referrer(self, oldurl, newurl): """ Get the referrer to send when doing a request. If we should not send a referrer, it will return None. diff --git a/weboob/tools/browser2/cookiejar.py b/weboob/tools/browser2/cookiejar.py index f8303927..9387108e 100644 --- a/weboob/tools/browser2/cookiejar.py +++ b/weboob/tools/browser2/cookiejar.py @@ -39,17 +39,9 @@ Cookie.attribute_parsers['domain'] = parse_domain Cookie.attribute_validators['domain'] = valid_domain -class CookieJar(object): +class CookiePolicy(object): """ - Manage Cookies like a real browser, with security and privacy in mind. - - python-requests accepts cookies blindly, - Expirations are not taken into account, - it can't handle the server asking to delete a cookie, - and sends cookies even when changing domains! - Of course, secure (SSL only) cookies aren't handled either. - - This class fixes all that. + Defines how cookies are accepted, and what to do with them. """ ACCEPT_DOMAINS = [] @@ -89,13 +81,7 @@ class CookieJar(object): This is only for setting cookies; it should be relatively safe in Weboob. """ - def __init__(self): - """ - Cookies are delicious delicacies. - """ - self.cookies = dict() - - def _domain_match(self, pattern, domain): + def domain_match(self, pattern, domain): """ Checks a domain matches a domain pattern. Patterns can be either the exact domain, or a wildcard (starting with a dot). @@ -111,7 +97,7 @@ class CookieJar(object): return domain.endswith(pattern) return domain == pattern - def _domain_match_list(self, patterns, domain): + def domain_match_list(self, patterns, domain): """ Checks domains match, from a list of patters. If the list of patterns is True, it always matches. @@ -123,17 +109,17 @@ class CookieJar(object): if patterns is True: return True for pattern in patterns: - if self._domain_match(pattern, domain): + if self.domain_match(pattern, domain): return True return False - def _can_set(self, cookie, url): + def can_set(self, cookie, url): """ Checks an URL can set a particular cookie. See ACCEPT_DOMAINS, REJECT_DOMAINS to set exceptions. The cookie must have a domain already set, you can - use _normalize_cookie() for that. + use normalize_cookie() for that. :param cookie: The cookie the server set :type cookie: Cookie @@ -146,9 +132,9 @@ class CookieJar(object): domain = url.hostname # Accept/reject overrides - if self._domain_match_list(self.ACCEPT_DOMAINS, domain): + if self.domain_match_list(self.ACCEPT_DOMAINS, domain): return True - if self._domain_match_list(self.REJECT_DOMAINS, domain): + if self.domain_match_list(self.REJECT_DOMAINS, domain): return False # check path @@ -173,7 +159,7 @@ class CookieJar(object): return False - def _normalize_cookie(self, cookie, url, now=None): + def normalize_cookie(self, cookie, url, now=None): """ Update a cookie we got from the response. The goal is to have data relevant for use in future requests. @@ -196,9 +182,34 @@ class CookieJar(object): now = datetime.now() cookie.expires = now + timedelta(seconds=cookie.max_age) if url.scheme == 'https' \ - and self._domain_match_list(self.SECURE_DOMAINS, cookie.domain): + and self.domain_match_list(self.SECURE_DOMAINS, cookie.domain): cookie.secure = True + +class CookieJar(object): + """ + Manage Cookies like a real browser, with security and privacy in mind. + + python-requests accepts cookies blindly, + Expirations are not taken into account, + it can't handle the server asking to delete a cookie, + and sends cookies even when changing domains! + Of course, secure (SSL only) cookies aren't handled either. + + This behavior depends on a `policy` class. + + This class fixes all that. + """ + + def __init__(self, policy): + """ + Cookies are delicious delicacies. + + :type: :class:`CookiePolicy` + """ + self.cookies = dict() + self.policy = policy + def from_response(self, response): """ Import cookies from the response. @@ -208,8 +219,8 @@ class CookieJar(object): if 'Set-Cookie' in response.headers: cs = Cookies.from_response(response.headers['Set-Cookie'], True) for c in cs.itervalues(): - self._normalize_cookie(c, response.url) - if self._can_set(c, response.url): + self.policy.normalize_cookie(c, response.url) + if self.policy.can_set(c, response.url): self.set(c) def for_request(self, url, now=None): @@ -281,7 +292,7 @@ class CookieJar(object): """ for cdomain, cpaths in self.cookies.iteritems(): # domain matches (all domains if None) - if domain is None or self._domain_match(cdomain, domain): + if domain is None or self.policy.domain_match(cdomain, domain): for cpath, cnames in cpaths.iteritems(): # path matches (all if None) if path is None or path.startswith(cpath): diff --git a/weboob/tools/browser2/test.py b/weboob/tools/browser2/test.py index 9e8a40fb..2c7ba598 100644 --- a/weboob/tools/browser2/test.py +++ b/weboob/tools/browser2/test.py @@ -28,7 +28,7 @@ from requests import HTTPError from nose.plugins.skip import SkipTest from .browser import BaseBrowser, DomainBrowser, Weboob -from .cookiejar import CookieJar +from .cookiejar import CookieJar, CookiePolicy from .cookies import Cookies from weboob.tools.json import json @@ -251,14 +251,14 @@ def test_referrer(): r = b.location(HTTPBIN + 'headers', referrer=False) assert 'Referer' not in json.loads(r.text)['headers'] - assert b._get_referrer('https://example.com/', 'http://example.com/') is None + assert b.get_referrer('https://example.com/', 'http://example.com/') is None -def test_cookieparse(): +def test_cookiepolicy(): """ Test cookie parsing and processing """ - cj = CookieJar() + policy = CookiePolicy() def bc(data): """ @@ -267,44 +267,44 @@ def test_cookieparse(): cs = Cookies() cs.parse_response(data) for c in cs.itervalues(): - cj._normalize_cookie(c, 'http://example.com/') + policy.normalize_cookie(c, 'http://example.com/') return c # parse max-age assert bc('__bwid=58244366; max-age=42; path=/').expires # security for received cookies - assert cj._can_set(bc('k=v; domain=www.example.com'), + assert policy.can_set(bc('k=v; domain=www.example.com'), 'http://www.example.com/') - assert cj._can_set(bc('k=v; domain=sub.example.com'), + assert policy.can_set(bc('k=v; domain=sub.example.com'), 'http://www.example.com/') - assert cj._can_set(bc('k=v; domain=sub.example.com'), + assert policy.can_set(bc('k=v; domain=sub.example.com'), 'http://example.com/') - assert cj._can_set(bc('k=v; domain=.example.com'), + assert policy.can_set(bc('k=v; domain=.example.com'), 'http://example.com/') - assert cj._can_set(bc('k=v; domain=www.example.com'), + assert policy.can_set(bc('k=v; domain=www.example.com'), 'http://example.com/') - assert not cj._can_set(bc('k=v; domain=example.com'), + assert not policy.can_set(bc('k=v; domain=example.com'), 'http://example.net/') - assert not cj._can_set(bc('k=v; domain=.net'), + assert not policy.can_set(bc('k=v; domain=.net'), 'http://example.net/') - assert not cj._can_set(bc('k=v; domain=www.example.net'), + assert not policy.can_set(bc('k=v; domain=www.example.net'), 'http://www.example.com/') - assert not cj._can_set(bc('k=v; domain=wwwexample.com'), + assert not policy.can_set(bc('k=v; domain=wwwexample.com'), 'http://example.com/') - assert not cj._can_set(bc('k=v; domain=.example.com'), + assert not policy.can_set(bc('k=v; domain=.example.com'), 'http://wwwexample.com/') # pattern matching domains - assert not cj._domain_match('example.com', 's.example.com') - assert cj._domain_match('.example.com', 's.example.com') - assert not cj._domain_match('.example.com', 'example.com') # yep. - assert cj._domain_match('s.example.com', 's.example.com') - assert not cj._domain_match('s.example.com', 's2.example.com') - assert cj._domain_match_list(True, 'example.com') - assert not cj._domain_match_list([], 'example.com') - assert cj._domain_match_list(['example.net', 'example.com'], 'example.com') - assert not cj._domain_match_list(['example.net', 'example.org'], 'example.com') + assert not policy.domain_match('example.com', 's.example.com') + assert policy.domain_match('.example.com', 's.example.com') + assert not policy.domain_match('.example.com', 'example.com') # yep. + assert policy.domain_match('s.example.com', 's.example.com') + assert not policy.domain_match('s.example.com', 's2.example.com') + assert policy.domain_match_list(True, 'example.com') + assert not policy.domain_match_list([], 'example.com') + assert policy.domain_match_list(['example.net', 'example.com'], 'example.com') + assert not policy.domain_match_list(['example.net', 'example.org'], 'example.com') def test_cookiejar(): @@ -327,7 +327,7 @@ def test_cookiejar(): cookie3 = bc('k=v3; domain=www.example.com; path=/lol/cat/') cookie4 = bc('k=v4; domain=www.example.com; path=/lol/') - cj = CookieJar() + cj = CookieJar(CookiePolicy()) cj.set(cookie0) cj.set(cookie1) cj.set(cookie2) @@ -400,7 +400,7 @@ def test_buildcookie(): """ Test easy cookie building """ - cj = CookieJar() + cj = CookieJar(CookiePolicy()) c = cj.build('kk', 'vv', 'http://example.com/') assert c.domain == 'example.com' assert not c.secure