browser2: Separate CookieJar from CookiePolicy
And avoid methods starting by _!
This commit is contained in:
parent
083caf15e5
commit
1c404639c1
3 changed files with 73 additions and 61 deletions
|
|
@ -25,7 +25,7 @@ import requests
|
|||
from requests.status_codes import codes
|
||||
from copy import deepcopy
|
||||
|
||||
from .cookiejar import CookieJar
|
||||
from .cookiejar import CookieJar, CookiePolicy
|
||||
|
||||
|
||||
# TODO define __all__
|
||||
|
|
@ -114,20 +114,21 @@ class BaseBrowser(object):
|
|||
|
||||
PROFILE = Firefox()
|
||||
TIMEOUT = 10.0
|
||||
COOKIE_POLICY = CookiePolicy()
|
||||
|
||||
def __init__(self):
|
||||
self._setup_session(self.PROFILE)
|
||||
self._setup_cookies()
|
||||
self._setup_cookies(self.COOKIE_POLICY)
|
||||
self.url = None
|
||||
self.response = None
|
||||
|
||||
def _setup_cookies(self):
|
||||
def _setup_cookies(self, policy):
|
||||
"""
|
||||
Create and configure a cookie jar.
|
||||
|
||||
Overload this method to set custom options, or even change the class.
|
||||
"""
|
||||
self.cookies = CookieJar()
|
||||
self.cookies = CookieJar(policy)
|
||||
|
||||
def _setup_session(self, profile):
|
||||
"""
|
||||
|
|
@ -237,7 +238,7 @@ class BaseBrowser(object):
|
|||
referrer = orig_referrer
|
||||
else:
|
||||
# Guess from last response
|
||||
referrer = self._get_referrer(response.url, url)
|
||||
referrer = self.get_referrer(response.url, url)
|
||||
|
||||
call_args = deepcopy(orig_args)
|
||||
response = self.open(url, referrer=referrer, **call_args)
|
||||
|
|
@ -322,7 +323,7 @@ class BaseBrowser(object):
|
|||
kwargs['allow_redirects'] = False
|
||||
|
||||
if referrer is None:
|
||||
referrer = self._get_referrer(self.url, url)
|
||||
referrer = self.get_referrer(self.url, url)
|
||||
if referrer:
|
||||
# Yes, it is a misspelling.
|
||||
kwargs.setdefault('headers', {}).setdefault('Referer', referrer)
|
||||
|
|
@ -352,7 +353,7 @@ class BaseBrowser(object):
|
|||
|
||||
return response
|
||||
|
||||
def _get_referrer(self, oldurl, newurl):
|
||||
def get_referrer(self, oldurl, newurl):
|
||||
"""
|
||||
Get the referrer to send when doing a request.
|
||||
If we should not send a referrer, it will return None.
|
||||
|
|
|
|||
|
|
@ -39,17 +39,9 @@ Cookie.attribute_parsers['domain'] = parse_domain
|
|||
Cookie.attribute_validators['domain'] = valid_domain
|
||||
|
||||
|
||||
class CookieJar(object):
|
||||
class CookiePolicy(object):
|
||||
"""
|
||||
Manage Cookies like a real browser, with security and privacy in mind.
|
||||
|
||||
python-requests accepts cookies blindly,
|
||||
Expirations are not taken into account,
|
||||
it can't handle the server asking to delete a cookie,
|
||||
and sends cookies even when changing domains!
|
||||
Of course, secure (SSL only) cookies aren't handled either.
|
||||
|
||||
This class fixes all that.
|
||||
Defines how cookies are accepted, and what to do with them.
|
||||
"""
|
||||
|
||||
ACCEPT_DOMAINS = []
|
||||
|
|
@ -89,13 +81,7 @@ class CookieJar(object):
|
|||
This is only for setting cookies; it should be relatively safe in Weboob.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Cookies are delicious delicacies.
|
||||
"""
|
||||
self.cookies = dict()
|
||||
|
||||
def _domain_match(self, pattern, domain):
|
||||
def domain_match(self, pattern, domain):
|
||||
"""
|
||||
Checks a domain matches a domain pattern.
|
||||
Patterns can be either the exact domain, or a wildcard (starting with a dot).
|
||||
|
|
@ -111,7 +97,7 @@ class CookieJar(object):
|
|||
return domain.endswith(pattern)
|
||||
return domain == pattern
|
||||
|
||||
def _domain_match_list(self, patterns, domain):
|
||||
def domain_match_list(self, patterns, domain):
|
||||
"""
|
||||
Checks domains match, from a list of patters.
|
||||
If the list of patterns is True, it always matches.
|
||||
|
|
@ -123,17 +109,17 @@ class CookieJar(object):
|
|||
if patterns is True:
|
||||
return True
|
||||
for pattern in patterns:
|
||||
if self._domain_match(pattern, domain):
|
||||
if self.domain_match(pattern, domain):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _can_set(self, cookie, url):
|
||||
def can_set(self, cookie, url):
|
||||
"""
|
||||
Checks an URL can set a particular cookie.
|
||||
See ACCEPT_DOMAINS, REJECT_DOMAINS to set exceptions.
|
||||
|
||||
The cookie must have a domain already set, you can
|
||||
use _normalize_cookie() for that.
|
||||
use normalize_cookie() for that.
|
||||
|
||||
:param cookie: The cookie the server set
|
||||
:type cookie: Cookie
|
||||
|
|
@ -146,9 +132,9 @@ class CookieJar(object):
|
|||
domain = url.hostname
|
||||
|
||||
# Accept/reject overrides
|
||||
if self._domain_match_list(self.ACCEPT_DOMAINS, domain):
|
||||
if self.domain_match_list(self.ACCEPT_DOMAINS, domain):
|
||||
return True
|
||||
if self._domain_match_list(self.REJECT_DOMAINS, domain):
|
||||
if self.domain_match_list(self.REJECT_DOMAINS, domain):
|
||||
return False
|
||||
|
||||
# check path
|
||||
|
|
@ -173,7 +159,7 @@ class CookieJar(object):
|
|||
|
||||
return False
|
||||
|
||||
def _normalize_cookie(self, cookie, url, now=None):
|
||||
def normalize_cookie(self, cookie, url, now=None):
|
||||
"""
|
||||
Update a cookie we got from the response.
|
||||
The goal is to have data relevant for use in future requests.
|
||||
|
|
@ -196,9 +182,34 @@ class CookieJar(object):
|
|||
now = datetime.now()
|
||||
cookie.expires = now + timedelta(seconds=cookie.max_age)
|
||||
if url.scheme == 'https' \
|
||||
and self._domain_match_list(self.SECURE_DOMAINS, cookie.domain):
|
||||
and self.domain_match_list(self.SECURE_DOMAINS, cookie.domain):
|
||||
cookie.secure = True
|
||||
|
||||
|
||||
class CookieJar(object):
|
||||
"""
|
||||
Manage Cookies like a real browser, with security and privacy in mind.
|
||||
|
||||
python-requests accepts cookies blindly,
|
||||
Expirations are not taken into account,
|
||||
it can't handle the server asking to delete a cookie,
|
||||
and sends cookies even when changing domains!
|
||||
Of course, secure (SSL only) cookies aren't handled either.
|
||||
|
||||
This behavior depends on a `policy` class.
|
||||
|
||||
This class fixes all that.
|
||||
"""
|
||||
|
||||
def __init__(self, policy):
|
||||
"""
|
||||
Cookies are delicious delicacies.
|
||||
|
||||
:type: :class:`CookiePolicy`
|
||||
"""
|
||||
self.cookies = dict()
|
||||
self.policy = policy
|
||||
|
||||
def from_response(self, response):
|
||||
"""
|
||||
Import cookies from the response.
|
||||
|
|
@ -208,8 +219,8 @@ class CookieJar(object):
|
|||
if 'Set-Cookie' in response.headers:
|
||||
cs = Cookies.from_response(response.headers['Set-Cookie'], True)
|
||||
for c in cs.itervalues():
|
||||
self._normalize_cookie(c, response.url)
|
||||
if self._can_set(c, response.url):
|
||||
self.policy.normalize_cookie(c, response.url)
|
||||
if self.policy.can_set(c, response.url):
|
||||
self.set(c)
|
||||
|
||||
def for_request(self, url, now=None):
|
||||
|
|
@ -281,7 +292,7 @@ class CookieJar(object):
|
|||
"""
|
||||
for cdomain, cpaths in self.cookies.iteritems():
|
||||
# domain matches (all domains if None)
|
||||
if domain is None or self._domain_match(cdomain, domain):
|
||||
if domain is None or self.policy.domain_match(cdomain, domain):
|
||||
for cpath, cnames in cpaths.iteritems():
|
||||
# path matches (all if None)
|
||||
if path is None or path.startswith(cpath):
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ from requests import HTTPError
|
|||
from nose.plugins.skip import SkipTest
|
||||
|
||||
from .browser import BaseBrowser, DomainBrowser, Weboob
|
||||
from .cookiejar import CookieJar
|
||||
from .cookiejar import CookieJar, CookiePolicy
|
||||
from .cookies import Cookies
|
||||
|
||||
from weboob.tools.json import json
|
||||
|
|
@ -251,14 +251,14 @@ def test_referrer():
|
|||
r = b.location(HTTPBIN + 'headers', referrer=False)
|
||||
assert 'Referer' not in json.loads(r.text)['headers']
|
||||
|
||||
assert b._get_referrer('https://example.com/', 'http://example.com/') is None
|
||||
assert b.get_referrer('https://example.com/', 'http://example.com/') is None
|
||||
|
||||
|
||||
def test_cookieparse():
|
||||
def test_cookiepolicy():
|
||||
"""
|
||||
Test cookie parsing and processing
|
||||
"""
|
||||
cj = CookieJar()
|
||||
policy = CookiePolicy()
|
||||
|
||||
def bc(data):
|
||||
"""
|
||||
|
|
@ -267,44 +267,44 @@ def test_cookieparse():
|
|||
cs = Cookies()
|
||||
cs.parse_response(data)
|
||||
for c in cs.itervalues():
|
||||
cj._normalize_cookie(c, 'http://example.com/')
|
||||
policy.normalize_cookie(c, 'http://example.com/')
|
||||
return c
|
||||
|
||||
# parse max-age
|
||||
assert bc('__bwid=58244366; max-age=42; path=/').expires
|
||||
|
||||
# security for received cookies
|
||||
assert cj._can_set(bc('k=v; domain=www.example.com'),
|
||||
assert policy.can_set(bc('k=v; domain=www.example.com'),
|
||||
'http://www.example.com/')
|
||||
assert cj._can_set(bc('k=v; domain=sub.example.com'),
|
||||
assert policy.can_set(bc('k=v; domain=sub.example.com'),
|
||||
'http://www.example.com/')
|
||||
assert cj._can_set(bc('k=v; domain=sub.example.com'),
|
||||
assert policy.can_set(bc('k=v; domain=sub.example.com'),
|
||||
'http://example.com/')
|
||||
assert cj._can_set(bc('k=v; domain=.example.com'),
|
||||
assert policy.can_set(bc('k=v; domain=.example.com'),
|
||||
'http://example.com/')
|
||||
assert cj._can_set(bc('k=v; domain=www.example.com'),
|
||||
assert policy.can_set(bc('k=v; domain=www.example.com'),
|
||||
'http://example.com/')
|
||||
assert not cj._can_set(bc('k=v; domain=example.com'),
|
||||
assert not policy.can_set(bc('k=v; domain=example.com'),
|
||||
'http://example.net/')
|
||||
assert not cj._can_set(bc('k=v; domain=.net'),
|
||||
assert not policy.can_set(bc('k=v; domain=.net'),
|
||||
'http://example.net/')
|
||||
assert not cj._can_set(bc('k=v; domain=www.example.net'),
|
||||
assert not policy.can_set(bc('k=v; domain=www.example.net'),
|
||||
'http://www.example.com/')
|
||||
assert not cj._can_set(bc('k=v; domain=wwwexample.com'),
|
||||
assert not policy.can_set(bc('k=v; domain=wwwexample.com'),
|
||||
'http://example.com/')
|
||||
assert not cj._can_set(bc('k=v; domain=.example.com'),
|
||||
assert not policy.can_set(bc('k=v; domain=.example.com'),
|
||||
'http://wwwexample.com/')
|
||||
|
||||
# pattern matching domains
|
||||
assert not cj._domain_match('example.com', 's.example.com')
|
||||
assert cj._domain_match('.example.com', 's.example.com')
|
||||
assert not cj._domain_match('.example.com', 'example.com') # yep.
|
||||
assert cj._domain_match('s.example.com', 's.example.com')
|
||||
assert not cj._domain_match('s.example.com', 's2.example.com')
|
||||
assert cj._domain_match_list(True, 'example.com')
|
||||
assert not cj._domain_match_list([], 'example.com')
|
||||
assert cj._domain_match_list(['example.net', 'example.com'], 'example.com')
|
||||
assert not cj._domain_match_list(['example.net', 'example.org'], 'example.com')
|
||||
assert not policy.domain_match('example.com', 's.example.com')
|
||||
assert policy.domain_match('.example.com', 's.example.com')
|
||||
assert not policy.domain_match('.example.com', 'example.com') # yep.
|
||||
assert policy.domain_match('s.example.com', 's.example.com')
|
||||
assert not policy.domain_match('s.example.com', 's2.example.com')
|
||||
assert policy.domain_match_list(True, 'example.com')
|
||||
assert not policy.domain_match_list([], 'example.com')
|
||||
assert policy.domain_match_list(['example.net', 'example.com'], 'example.com')
|
||||
assert not policy.domain_match_list(['example.net', 'example.org'], 'example.com')
|
||||
|
||||
|
||||
def test_cookiejar():
|
||||
|
|
@ -327,7 +327,7 @@ def test_cookiejar():
|
|||
cookie3 = bc('k=v3; domain=www.example.com; path=/lol/cat/')
|
||||
cookie4 = bc('k=v4; domain=www.example.com; path=/lol/')
|
||||
|
||||
cj = CookieJar()
|
||||
cj = CookieJar(CookiePolicy())
|
||||
cj.set(cookie0)
|
||||
cj.set(cookie1)
|
||||
cj.set(cookie2)
|
||||
|
|
@ -400,7 +400,7 @@ def test_buildcookie():
|
|||
"""
|
||||
Test easy cookie building
|
||||
"""
|
||||
cj = CookieJar()
|
||||
cj = CookieJar(CookiePolicy())
|
||||
c = cj.build('kk', 'vv', 'http://example.com/')
|
||||
assert c.domain == 'example.com'
|
||||
assert not c.secure
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue