Cleanups due to the usage of python-requests>=2.0

This commit is contained in:
Laurent Bachelier 2014-03-13 17:18:26 +01:00
commit 82c61f3668
4 changed files with 3 additions and 2170 deletions

View file

@ -46,6 +46,7 @@ class Profile(object):
Warning: Do not enable lzma, bzip or bzip2, sdch encodings
as python-requests does not support it yet.
Supported as of 2.2: gzip, deflate, compress.
In doubt, do not change the default Accept-Encoding header
of python-requests.
"""
@ -90,7 +91,7 @@ class Firefox(Profile):
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20100101 Firefox/10.0.3',
'DNT': '1'}
# It also has "Connection: Keep-Alive", that should only be added this way:
#session.config['keep_alive'] = True
#FIXME session.config['keep_alive'] = True
class Wget(Profile):
@ -108,7 +109,7 @@ class Wget(Profile):
session.headers.update({
'Accept': '*/*',
'User-Agent': 'Wget/%s' % self.version})
#session.config['keep_alive'] = True
#FIXME session.config['keep_alive'] = True
class BaseBrowser(object):
@ -163,7 +164,6 @@ class BaseBrowser(object):
"""
Make an HTTP request like a browser does:
* follow redirects (unless disabled)
* handle cookies
* provide referrers (unless disabled)
Unless a `method` is explicitly provided, it makes a GET request,

View file

@ -1,431 +0,0 @@
# TODO declare __all__
# TODO support logging
from urlparse import urlparse
from datetime import datetime, timedelta
import posixpath
from .cookies import Cookie, Cookies, strip_spaces_and_quotes, Definitions
def valid_domain(domain):
"""
Like cookies.valid_domain, but allows leading periods.
Because it is *very* common and useful for us.
"""
domain.encode('ascii')
if domain and domain[0] == '.':
domain = domain[1:]
if domain and domain[0] in '"':
return False
if Definitions.DOMAIN_RE.match(domain):
return True
return False
def parse_domain(value):
"""
Like cookies.parse_domain, but allows leading periods.
Because it is *very* common and useful for us.
"""
value = strip_spaces_and_quotes(value)
if value:
assert valid_domain(value)
return value
# this is ok because we are using our own copy of the lib
# TODO push a better way upstream
Cookie.attribute_parsers['domain'] = parse_domain
Cookie.attribute_validators['domain'] = valid_domain
class CookiePolicy(object):
"""
Defines how cookies are accepted, and what to do with them.
"""
ACCEPT_DOMAINS = []
"""
Domains where to accept cookies, even when we should not.
Add a "." before a domain to accept subdomains.
If True, accept all cookies (a bit insecure).
ACCEPT_DOMAINS has higher priority over REJECT_DOMAINS.
Disabling third-party cookies on most browsers acts like [], enabling them
acts like True. Since it is a very common browser option, we use the most
secure and privacy-aware method by default.
"""
REJECT_DOMAINS = []
"""
Domains where to reject cookies, even when we should not.
Add a "." before a domain to reject subdomains.
If True, reject all cookies.
REJECT_DOMAINS has lower priority over ACCEPT_DOMAINS.
"""
SECURE_DOMAINS = True
"""
When we get a cookie through an secure connection, mark it as secure
(not to be sent on insecure channels) if the server did not tell us to.
If True, do it automatically for all domains. Alternatively, you can put
a list of domains, like ACCEPT_DOMAINS or REJECT_DOMAINS.
If False, never do it (but still accept secure cookies as they are).
NoScript for Firefox does this, either by automated guesses or forced from a list.
"""
INSECURE_MATCHING = True
"""
Do sloppy matching to mimic what browsers do.
This is only for setting cookies; it should be relatively safe in Weboob.
"""
def domain_match(self, pattern, domain):
"""
Checks a domain matches a domain pattern.
Patterns can be either the exact domain, or a wildcard (starting with a dot).
example.com matches example.com only
.example.com matches *.example.com (but not example.com)
:param pattern: str
:param domain: str
:rytpe: bool
"""
if pattern.startswith('.'):
return domain.endswith(pattern)
return domain == pattern
def domain_match_list(self, patterns, domain):
"""
Checks domains match, from a list of patters.
If the list of patterns is True, it always matches.
:param pattern: list or True
:param domain: str
:rytpe: bool
"""
if patterns is True:
return True
for pattern in patterns:
if self.domain_match(pattern, domain):
return True
return False
def can_set(self, cookie, url):
"""
Checks an URL can set a particular cookie.
See ACCEPT_DOMAINS, REJECT_DOMAINS to set exceptions.
The cookie must have a domain already set, you can
use normalize_cookie() for that.
:param cookie: The cookie the server set
:type cookie: Cookie
:param url: URL of the response
:type url: str
:rtype: bool
"""
url = urlparse(url)
domain = url.hostname
# Accept/reject overrides
if self.domain_match_list(self.ACCEPT_DOMAINS, domain):
return True
if self.domain_match_list(self.REJECT_DOMAINS, domain):
return False
# check path
if not url.path.startswith(cookie.path):
return False
# check domain (secure & simple)
if cookie.domain.startswith('.'):
if cookie.domain.endswith(domain) or '.%s' % domain == cookie.domain:
return True
elif domain == cookie.domain:
return True
# whatever.example.com should be able to set .example.com
# Unbelievably stupid, but widely used.
#
# Our method is not ideal, as it isn't very secure for some TLDs.
# A solution could be to use tldextract.
if self.INSECURE_MATCHING:
if domain.split('.')[-2:] == cookie.domain.split('.')[-2:]:
return True
return False
def normalize_cookie(self, cookie, url, now=None):
"""
Update a cookie we got from the response.
The goal is to have data relevant for use in future requests.
* Sets domain if there is not one.
* Sets path if there is not one.
* Set Expires from Max-Age. We need the expires to have an absolute expiration date.
* Force the Secure flag if required. (see SECURE_DOMAINS)
:type cookie: :class:`cookies.Cookie`
:type url: str
:type now: datetime
"""
url = urlparse(url)
if cookie.domain is None:
cookie.domain = url.hostname
if cookie.path is None:
cookie.path = '/'
if cookie.max_age is not None:
if now is None:
now = datetime.now()
cookie.expires = now + timedelta(seconds=cookie.max_age)
if url.scheme == 'https' \
and self.domain_match_list(self.SECURE_DOMAINS, cookie.domain):
cookie.secure = True
class CookieJar(object):
"""
Manage Cookies like a real browser, with security and privacy in mind.
python-requests accepts cookies blindly,
Expirations are not taken into account,
it can't handle the server asking to delete a cookie,
and sends cookies even when changing domains!
Of course, secure (SSL only) cookies aren't handled either.
This behavior depends on a `policy` class.
This class fixes all that.
"""
def __init__(self, policy):
"""
Cookies are delicious delicacies.
:type: :class:`CookiePolicy`
"""
self.cookies = dict()
self.policy = policy
def from_response(self, response):
"""
Import cookies from the response.
:type response: responses.Response
"""
if 'Set-Cookie' in response.headers:
cs = Cookies.from_response(response.headers['Set-Cookie'], True)
for c in cs.itervalues():
self.policy.normalize_cookie(c, response.url)
if self.policy.can_set(c, response.url):
self.set(c)
def for_request(self, url, now=None):
"""
Get a key/value dictionnary of cookies for a given request URL.
:type url: str
:type now: datetime
:rtype: dict
"""
url = urlparse(url)
if now is None:
now = datetime.now()
# we want insecure cookies in https too!
secure = None if url.scheme == 'https' else False
cdict = dict()
# get sorted cookies
cookies = self.all(domain=url.hostname, path=url.path, secure=secure)
for cookie in cookies:
# only use session cookies and cookies with future expirations
if cookie.expires is None or cookie.expires > now:
# update only if not set, since first cookies are "better"
cdict.setdefault(cookie.name, cookie.value)
return cdict
def flush(self, now=None, session=False):
"""
Remove expired cookies. If session is True, also remove all session cookies.
:type now: datetime
:type session: bool
"""
# we need a list copy since we remove from the iterable
for cookie in list(self.iter()):
# remove session cookies if requested
if cookie.expires is None and session:
self.remove(cookie)
# remove non-session cookies if expired before now
if cookie.expires is not None and cookie.expires < now:
self.remove(cookie)
def set(self, cookie):
"""
Add or replace a Cookie in the jar.
This is for normalized and checked cookies, no validation is done.
Use from_response() to import cookies from a python-requests response.
:type cookie: cookies.Cookie
"""
# cookies are unique by domain, path and of course name
assert len(cookie.domain)
assert len(cookie.path)
assert len(cookie.name)
self.cookies.setdefault(cookie.domain, {}). \
setdefault(cookie.path, {})[cookie.name] = cookie
def iter(self, name=None, domain=None, path=None, secure=None):
"""
Iterate matching cookies.
You can restrict by name, domain, path or security.
:type name: str
:type domain: str
:type path: str
:type secure: bool
:rtype: iter[:class:`cookies.Cookie`]
"""
for cdomain, cpaths in self.cookies.iteritems():
# domain matches (all domains if None)
if domain is None or self.policy.domain_match(cdomain, domain):
for cpath, cnames in cpaths.iteritems():
# path matches (all if None)
if path is None or path.startswith(cpath):
for cname, cookie in cnames.iteritems():
# only wanted name (all if None)
if name is None or name == cname:
# wanted security (all if None)
# cookie.secure can be "None" if not secure!
if secure is None \
or (secure is False and not cookie.secure) \
or (secure is True and cookie.secure):
yield cookie
def all(self, name=None, domain=None, path=None, secure=None):
"""
Like iter(), but sorts the cookies, from most precise to less precise.
:rtype: list[:class:`cookies.Cookie`]
"""
cookies = list(self.iter(name, domain, path, secure))
# slowly compare all cookies
# XXX one of the worst things I've ever written
COOKIE1 = 1
COOKIE2 = -1
def ccmp(cookie1, cookie2):
# most precise matching domain
if domain and cookie1.domain != cookie2.domain:
if cookie1.domain == domain:
return COOKIE1
if cookie2.domain == domain:
return COOKIE2
if len(cookie1.domain) > len(cookie2.domain):
return COOKIE1
if len(cookie2.domain) > len(cookie1.domain):
return COOKIE2
# most precise matching path
if len(cookie1.path) > len(cookie2.path):
return COOKIE1
if len(cookie2.path) > len(cookie1.path):
return COOKIE2
# most secure
if cookie1.secure and not cookie2.secure:
return COOKIE1
if cookie2.secure and not cookie1.secure:
return COOKIE2
return 0
return sorted(cookies, cmp=ccmp, reverse=True)
def get(self, name=None, domain=None, path=None, secure=None):
"""
Return the best cookie from all().
Useful for changing the value or deleting a cookie.
name, domain, path and secure are the same as iter().
:rtype: :class:`cookies.Cookie` or None
"""
cookies = self.all(name, domain, path, secure)
try:
return cookies[0]
except IndexError:
pass
def remove(self, cookie):
"""
Remove a cookie. The cookie argument must have the same domain, path and name.
Return False if not present, True if just removed.
:type cookie: :class:`cookies.Cookie`
:rtype: bool
"""
# cookies are unique by domain, path and of course name
assert len(cookie.domain)
assert len(cookie.path)
assert len(cookie.name)
d = self.cookies.get(cookie.domain, {}).get(cookie.path)
if cookie.name in d:
del d[cookie.name]
return True
return False
def clear(self):
"""
Remove all cookies.
"""
self.cookies.clear()
def build(self, name, value, url, path=None, wildcard=False):
"""
Build a Cookie object for the current URL.
The domain and path are guessed. If you want to set for the whole domain,
take care of what you put in URL!
for_url('http://example.com/hello/world') will only set cookie for the
/hello/ path.
`name` and `value` are required parameters of Cookie.__init__()
You can force the `path` if you want.
The `wildcard` parameter will add a period before the domain.
Typical usage would be, inside a DomainBrowser:
cookie = self.cookies.for_url(k, v, self.url)
cookie = self.cookies.for_url(k, v, self.absurl('/'))
cookie = self.cookies.for_url(k, v, self.BASEURL)
And then:
self.cookies.set(cookie)
For more advanced usage, create a Cookie object manually, or
alter the returned Cookie object before set().
:type name: basestring
:type value: basestring
:type url: str
:type path: str
:type wildcard: bool
:rtype cookie: :class:`cookies.Cookie`
"""
cookie = Cookie(name, value)
url = urlparse(url)
if wildcard:
cookie.domain = '.' + url.hostname
else:
cookie.domain = url.hostname
if path is None:
cookie.path = posixpath.join(posixpath.dirname(url.path), '')
else:
cookie.path = path
if url.scheme == 'https':
cookie.secure = True
return cookie

File diff suppressed because it is too large Load diff

View file

@ -1,597 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
from datetime import datetime
from random import choice
import re
import string
from requests import HTTPError
from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
from .browser import BaseBrowser, DomainBrowser, Weboob, UrlNotAllowed
from .cookiejar import CookieJar, CookiePolicy
from .cookies import Cookies
from weboob.tools.json import json
# Those services can be run locally. More or less.
HTTPBIN = 'http://httpbin.org/' # https://github.com/kennethreitz/httpbin
POSTBIN = 'http://www.postbin.org/' # https://github.com/progrium/postbin
REQUESTBIN = 'http://requestb.in/' # https://github.com/progrium/requestbin
# if you change HTTPBIN, you should also change these URLs for some tests:
# redirect to http://httpbin.org/get
REDIRECTS1 = ('http://tinyurl.com/ouiboube-b2', 'http://bit.ly/st4Hcv')
# redirect to http://httpbin.org/cookies
REDIRECTS2 = ('http://tinyurl.com/7zp3jnr', 'http://bit.ly/HZCCX7')
def test_base():
b = BaseBrowser()
r = b.location(HTTPBIN + 'headers')
assert isinstance(r.text, unicode)
assert 'Firefox' in r.text
assert 'python' not in r.text
assert 'identity' not in r.text
assert b.url == HTTPBIN + 'headers'
r = b.location(HTTPBIN + 'gzip')
assert 'Firefox' in r.text
def test_redirects():
"""
Check redirects are followed
"""
b = BaseBrowser()
b.location(HTTPBIN + 'redirect/1')
assert b.url == HTTPBIN + 'get'
r = b.location(HTTPBIN + 'redirect/1')
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1'
assert r.url == HTTPBIN + 'get'
# Normal redirect chain
b.url = None
r = b.location(HTTPBIN + 'redirect/4')
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1'
assert len(r.history) == 4
assert r.history[3].request.url == HTTPBIN + 'redirect/1'
assert r.history[3].request.headers.get('Referer') == HTTPBIN + 'redirect/2'
assert r.history[2].request.url == HTTPBIN + 'redirect/2'
assert r.history[2].request.headers.get('Referer') == HTTPBIN + 'redirect/3'
assert r.history[1].request.url == HTTPBIN + 'redirect/3'
assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/4'
assert r.history[0].request.url == HTTPBIN + 'redirect/4'
assert r.history[0].request.headers.get('Referer') is None
assert r.url == HTTPBIN + 'get'
# Disable all referers
r = b.location(HTTPBIN + 'redirect/2', referrer=False)
assert json.loads(r.text)['headers'].get('Referer') is None
assert len(r.history) == 2
assert r.history[1].request.headers.get('Referer') is None
assert r.history[0].request.headers.get('Referer') is None
assert r.url == HTTPBIN + 'get'
# Only overrides first referer
r = b.location(HTTPBIN + 'redirect/2', referrer='http://example.com/')
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1'
assert len(r.history) == 2
assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/2'
assert r.history[0].request.headers.get('Referer') == 'http://example.com/'
assert r.url == HTTPBIN + 'get'
# Don't follow
r = b.location(HTTPBIN + 'redirect/2', allow_redirects=False)
assert len(r.history) == 0
assert r.url == HTTPBIN + 'redirect/2'
assert r.status_code == 302
def test_redirect2():
"""
More redirect tests
"""
rurl = choice(REDIRECTS1)
b = BaseBrowser()
r = b.location(rurl)
assert r.url == HTTPBIN + 'get'
assert json.loads(r.text)['headers'].get('Referer') == rurl
# TODO referrer privacy settings
def test_brokenpost():
"""
Test empty POST and redirect after POST
"""
raise SkipTest('PostBin is disabled')
try:
b = BaseBrowser()
# postbin is picky with empty posts. that's good!
r = b.location(POSTBIN, {})
# ensures empty data (but not None) does a POST
assert r.request.method == 'POST'
# ensure we were redirected after submitting a post
assert len(r.url) >= len(POSTBIN)
# send a POST with data
b.location(r.url, {'hello': 'world'})
r = b.location(r.url + '/feed')
assert 'hello' in r.text
assert 'world' in r.text
except HTTPError as e:
if str(e).startswith('503 '):
raise SkipTest('Quota exceeded')
else:
raise
def _getrqbin(b):
"""
Get a RequestBin
"""
# empty POST
r = b.location(REQUESTBIN + 'api/v1/bins', '')
name = json.loads(r.text)['name']
assert name
return name
def test_smartpost():
"""
Checks we use POST or GET depending on the parameters
"""
b = BaseBrowser()
n = _getrqbin(b)
r = b.location(REQUESTBIN + n)
assert 'ok' in r.text
r = b.location(REQUESTBIN + n + '?inspect')
assert 'GET /%s' % n in r.text
r = b.location(REQUESTBIN + n, {'hello': 'world'})
assert 'ok' in r.text
r = b.location(REQUESTBIN + n + '?inspect')
assert 'POST /%s' % n in r.text
assert 'hello' in r.text
assert 'world' in r.text
def test_weboob():
"""
Test the Weboob Profile
"""
class BooBrowser(BaseBrowser):
PROFILE = Weboob('0.0')
b = BooBrowser()
r = b.location(HTTPBIN + 'headers')
assert 'weboob/0.0' in r.text
assert 'identity' in r.text
def test_relative():
"""
Check relative URL / domain handling
"""
b = DomainBrowser()
b.location(HTTPBIN)
b.location('/ip')
assert b.url == HTTPBIN + 'ip'
assert b.absurl('/ip') == HTTPBIN + 'ip'
b.location(REQUESTBIN)
assert b.absurl('/ip') == REQUESTBIN + 'ip'
b.BASEURL = HTTPBIN + 'aaaaaa'
assert b.absurl('/ip') == HTTPBIN + 'ip'
assert b.absurl('ip') == HTTPBIN + 'ip'
assert b.absurl('/ip', False) == REQUESTBIN + 'ip'
b.BASEURL = HTTPBIN + 'aaaaaa/'
assert b.absurl('/') == HTTPBIN
assert b.absurl('/bb') == HTTPBIN + 'bb'
assert b.absurl('') == HTTPBIN + 'aaaaaa/'
assert b.absurl('bb') == HTTPBIN + 'aaaaaa/bb'
# Give an absolute URL, should get it unaltered
b.BASEURL = 'http://example.net/'
assert b.absurl('http://example.com/aaa/bbb') == 'http://example.com/aaa/bbb'
assert b.absurl('https://example.com/aaa/bbb') == 'https://example.com/aaa/bbb'
# Schemeless absolute URL
assert b.absurl('//example.com/aaa/bbb') == 'http://example.com/aaa/bbb'
b.BASEURL = 'https://example.net/'
assert b.absurl('//example.com/aaa/bbb') == 'https://example.com/aaa/bbb'
def test_allow_url():
b = DomainBrowser()
b.RESTRICT_URL = True
assert b.url_allowed('http://example.com/')
assert b.url_allowed('http://example.net/')
b.BASEURL = 'http://example.com/'
assert b.url_allowed('http://example.com/')
assert b.url_allowed('http://example.com/aaa')
assert not b.url_allowed('https://example.com/')
assert not b.url_allowed('http://example.net/')
assert not b.url_allowed('http://')
b.BASEURL = 'https://example.com/'
assert not b.url_allowed('http://example.com/')
assert not b.url_allowed('http://example.com/aaa')
assert b.url_allowed('https://example.com/')
assert b.url_allowed('https://example.com/aaa/bbb')
b.RESTRICT_URL = ['https://example.com/', 'http://example.com/']
assert b.url_allowed('http://example.com/aaa/bbb')
assert b.url_allowed('https://example.com/aaa/bbb')
assert not b.url_allowed('http://example.net/aaa/bbb')
assert not b.url_allowed('https://example.net/aaa/bbb')
assert_raises(UrlNotAllowed, b.location, 'http://example.net/')
assert_raises(UrlNotAllowed, b.open, 'http://example.net/')
def test_changereq():
"""
Test overloading request defaults
"""
b = BaseBrowser()
r = b.location(HTTPBIN + 'headers', method='HEAD')
assert r.text is None
r = b.location(HTTPBIN + 'put', method='PUT', data={'hello': 'world'})
assert 'hello' in r.text
assert 'world' in r.text
r = b.location(HTTPBIN + 'headers', headers={'User-Agent': 'Web Out of Browsers'})
assert 'Web Out of Browsers' in r.text
assert 'Firefox' not in r.text
def test_referrer():
"""
Test automatic referrer setting
"""
b = BaseBrowser()
r = b.location(HTTPBIN + 'get')
assert 'Referer' not in json.loads(r.text)['headers']
r = b.location(HTTPBIN + 'headers')
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'get'
r = b.location(HTTPBIN + 'headers')
assert 'Referer' not in json.loads(r.text)['headers']
# Force another referrer
r = b.location(HTTPBIN + 'get')
r = b.location(HTTPBIN + 'headers', referrer='http://example.com/')
assert json.loads(r.text)['headers'].get('Referer') == 'http://example.com/'
# Force no referrer
r = b.location(HTTPBIN + 'get')
r = b.location(HTTPBIN + 'headers', referrer=False)
assert 'Referer' not in json.loads(r.text)['headers']
assert b.get_referrer('https://example.com/', 'http://example.com/') is None
def test_cookiepolicy():
"""
Test cookie parsing and processing
"""
policy = CookiePolicy()
def bc(data):
"""
build one cookie, and normalize it
"""
cs = Cookies()
cs.parse_response(data)
for c in cs.itervalues():
policy.normalize_cookie(c, 'http://example.com/')
return c
# parse max-age
assert bc('__bwid=58244366; max-age=42; path=/').expires
# security for received cookies
assert policy.can_set(bc('k=v; domain=www.example.com'),
'http://www.example.com/')
assert policy.can_set(bc('k=v; domain=sub.example.com'),
'http://www.example.com/')
assert policy.can_set(bc('k=v; domain=sub.example.com'),
'http://example.com/')
assert policy.can_set(bc('k=v; domain=.example.com'),
'http://example.com/')
assert policy.can_set(bc('k=v; domain=www.example.com'),
'http://example.com/')
assert not policy.can_set(bc('k=v; domain=example.com'),
'http://example.net/')
assert not policy.can_set(bc('k=v; domain=.net'),
'http://example.net/')
assert not policy.can_set(bc('k=v; domain=www.example.net'),
'http://www.example.com/')
assert not policy.can_set(bc('k=v; domain=wwwexample.com'),
'http://example.com/')
assert not policy.can_set(bc('k=v; domain=.example.com'),
'http://wwwexample.com/')
# pattern matching domains
assert not policy.domain_match('example.com', 's.example.com')
assert policy.domain_match('.example.com', 's.example.com')
assert not policy.domain_match('.example.com', 'example.com') # yep.
assert policy.domain_match('s.example.com', 's.example.com')
assert not policy.domain_match('s.example.com', 's2.example.com')
assert policy.domain_match_list(True, 'example.com')
assert not policy.domain_match_list([], 'example.com')
assert policy.domain_match_list(['example.net', 'example.com'], 'example.com')
assert not policy.domain_match_list(['example.net', 'example.org'], 'example.com')
def test_cookiejar():
"""
Test adding, removing, finding cookies to and from the jar
"""
def bc(data):
"""
build one cookie
"""
cs = Cookies()
cs.parse_response(data)
for c in cs.itervalues():
return c
# filtering cookies
cookie0 = bc('j=v; domain=www.example.com; path=/')
cookie1 = bc('k=v1; domain=www.example.com; path=/; secure')
cookie2 = bc('k=v2; domain=.example.com; path=/')
cookie3 = bc('k=v3; domain=www.example.com; path=/lol/cat/')
cookie4 = bc('k=v4; domain=www.example.com; path=/lol/')
cj = CookieJar(CookiePolicy())
cj.set(cookie0)
cj.set(cookie1)
cj.set(cookie2)
cj.set(cookie3)
cj.set(cookie4)
assert len(cj.all()) == 5 # all cookies
assert len(cj.all(path='/')) == 3 # all cookies except the ones with deep paths
assert len(cj.all(name='k')) == 4 # this excludes cookie0
assert len(cj.all(domain='example.com')) == 0 # yep
assert len(cj.all(domain='s.example.com')) == 1 # cookie2
assert len(cj.all(domain='.example.com')) == 1 # cookie2 (exact match)
assert len(cj.all(domain='www.example.com')) == 5 # all cookies
assert len(cj.all(domain='www.example.com', path="/lol/")) == 4 # all + cookie4
assert len(cj.all(domain='www.example.com', path="/lol/cat")) == 4 # all + cookie4
assert len(cj.all(domain='www.example.com', path="/lol/cat/")) == 5 # all + cookie4 + cookie3
assert len(cj.all(secure=True)) == 1 # cookie1
assert len(cj.all(secure=False)) == 4 # all except cookie1
assert cj.get(domain='www.example.com', path="/lol/") is cookie4
assert cj.get(domain='www.example.com', path="/lol/cat/") is cookie3
assert cj.get(domain='www.example.com', path="/") is cookie1
assert cj.get(name='j', domain='www.example.com', path="/") is cookie0
assert cj.get(name='k', domain='www.example.com', path="/") is cookie1
assert cj.get(name='k', domain='s.example.com', path="/") is cookie2
assert cj.get(name='k', domain='www.example.com', path="/aaa") is cookie1
assert cj.get(domain='www.example.com', path='/') is cookie1
assert cj.get(domain='www.example.com', path='/', secure=False) is cookie0
assert cj.get(domain='www.example.com', path='/', secure=True) is cookie1
# this is just not API choice, but how browsers act
assert cj.for_request('http://www.example.com/') == {'k': 'v2', 'j': 'v'}
assert cj.for_request('https://www.example.com/') == {'k': 'v1', 'j': 'v'}
assert cj.for_request('http://www.example.com/lol/') == {'k': 'v4', 'j': 'v'}
assert cj.for_request('http://s.example.com/lol/') == {'k': 'v2'}
assert cj.for_request('http://example.com/lol/') == {}
# remove/add/replace
assert cj.remove(cookie1) is True
assert cj.get(secure=True) is None
cj.set(cookie1)
assert cj.get(secure=True) is cookie1
cookie5 = bc('k=w; domain=www.example.com; path=/; secure')
cj.set(cookie5)
assert cj.get(secure=True) is cookie5
assert len(cj.all(secure=True)) == 1
# not the same cookie, but the same identifiers
assert cj.remove(cookie1) is True
cj.clear()
cookie6 = bc('e1=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 1970 00:00:01 GMT;')
cookie7 = bc('e2=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 2010 00:00:01 GMT;')
now = datetime(2000, 01, 01)
cj.set(cookie0)
cj.set(cookie6)
cj.set(cookie7)
assert cj.for_request('http://www.example.com/', now) == {'e2': '1', 'j': 'v'}
assert cj.for_request('http://www.example.com/', datetime(2020, 01, 01)) == {'j': 'v'}
assert len(cj.all()) == 3
cj.flush(now)
assert len(cj.all()) == 2
assert cj.remove(cookie6) is False # already removed
cj.flush(now, session=True)
assert len(cj.all()) == 1
def test_buildcookie():
"""
Test easy cookie building
"""
cj = CookieJar(CookiePolicy())
c = cj.build('kk', 'vv', 'http://example.com/')
assert c.domain == 'example.com'
assert not c.secure
assert c.path == '/'
c = cj.build('kk', 'vv', 'http://example.com/', path='/plop/', wildcard=True)
assert c.domain == '.example.com'
assert c.path == '/plop/'
c = cj.build('kk', 'vv', 'http://example.com/plop/')
assert c.path == '/plop/'
c = cj.build('kk', 'vv', 'http://example.com/plop/plap')
assert c.path == '/plop/'
c = cj.build('kk', 'vv', 'http://example.com/plop/?http://example.net/plip/')
assert c.path == '/plop/'
assert c.domain == 'example.com'
c = cj.build('kk', 'vv', 'http://example.com/plop/plap', path='/')
assert c.path == '/'
c = cj.build('kk', 'vv', 'https://example.com/')
assert c.domain == 'example.com'
assert c.secure
# check the cookie works
c.name = 'k'
c.value = 'v'
cj.set(c)
assert cj.for_request('https://example.com/') == {'k': 'v'}
assert cj.for_request('http://example.com/') == {}
def test_cookienav():
"""
Test browsing while getting new cookies
"""
b = BaseBrowser()
r = b.location(HTTPBIN + 'cookies')
assert len(json.loads(r.text)['cookies']) == 0
r = b.location(HTTPBIN + 'cookies/set/hello/world')
assert len(json.loads(r.text)['cookies']) == 1
assert json.loads(r.text)['cookies']['hello'] == 'world'
r = b.location(HTTPBIN + 'cookies/set/hello2/world2')
assert len(json.loads(r.text)['cookies']) == 2
assert json.loads(r.text)['cookies']['hello2'] == 'world2'
r = b.location(REQUESTBIN)
assert 'session' in r.cookies # requestbin should give this by default
assert 'hello' not in r.cookies # we didn't send the wrong cookie
# return to httpbin, check we didn't give the wrong cookie
r = b.location(HTTPBIN + 'cookies')
assert 'session' not in json.loads(r.text)['cookies']
# override cookies temporarily
r = b.location(HTTPBIN + 'cookies', cookies={'bla': 'bli'})
assert len(json.loads(r.text)['cookies']) == 1
assert json.loads(r.text)['cookies']['bla'] == 'bli'
# reload, the "fake" cookie should not be there
r = b.location(HTTPBIN + 'cookies')
assert len(json.loads(r.text)['cookies']) == 2
assert 'bla' not in json.loads(r.text)['cookies']
def test_cookieredirect():
"""
Test cookie redirection security
"""
rurl = choice(REDIRECTS2)
b = BaseBrowser()
r = b.location(HTTPBIN + 'cookies')
assert len(json.loads(r.text)['cookies']) == 0
# add a cookie to the redirection service domain (not the target!)
cookie = b.cookies.build('k', 'v1', rurl)
b.cookies.set(cookie)
r = b.location(rurl)
assert r.url == HTTPBIN + 'cookies'
# the cookie was not forwarded; it's for another domain
# this is important for security reasons,
# and because python-requests tries to do it by default!
assert len(json.loads(r.text)['cookies']) == 0
# add a cookie for the target
cookie = b.cookies.build('k', 'v2', HTTPBIN)
b.cookies.set(cookie)
r = b.location(rurl)
assert r.url == HTTPBIN + 'cookies'
assert len(json.loads(r.text)['cookies']) == 1
assert json.loads(r.text)['cookies']['k'] == 'v2'
# check all cookies sent in the request chain
assert r.cookies == {'k': 'v2'}
assert r.history[0].cookies['k'] == 'v1' # some services add other cookies
def test_cookie_srv1():
"""
Test cookie in real conditions (service 1)
"""
class TestBrowser(DomainBrowser):
BASEURL = 'http://www.mria-arim.ca/'
b = TestBrowser()
b.location('testCookies.asp')
# TODO this is also a good place to test form parsing/submission
b.location('testCookies.asp', {'makeMe': 'Create Cookie'})
r = b.location('testCookies.asp', {'testMe': 'Test Browser'})
assert 'Your Browser accepts cookies' in r.text
def test_cookie_srv2():
"""
Test cookie in real conditions (service 2)
"""
def randtext():
return ''.join(choice(string.digits + string.letters) for _ in xrange(32))
class TestBrowser(DomainBrowser):
BASEURL = 'http://www.html-kit.com/tools/cookietester/'
def cookienum(self):
return int(re.search('Number of cookies received: (\d+)',
self.response.text).groups()[0])
def mypost(self, **data):
return self.location('', data)
b = TestBrowser()
b.home()
assert b.cookienum() == 0
r1 = randtext()
r1v = randtext()
# TODO this is also a good place to test form parsing/submission
# get a new cookie
r = b.mypost(cn=r1, cv=r1v)
assert b.cookienum() == 1
assert r1 in r.text
assert r1v in r.text
# cookie deletion
r = b.mypost(cr=r1)
assert b.cookienum() == 0
assert r1 not in r.text
assert r1v not in r.text
# om nom nom
b.mypost(cn=randtext(), cv=randtext())
b.mypost(cn=randtext(), cv=randtext())
b.mypost(cn=randtext(), cv=randtext())
b.mypost(cn=randtext(), cv=randtext())
assert b.cookienum() == 4