Cleanups due to the usage of python-requests>=2.0
This commit is contained in:
parent
ecd3a9c5ab
commit
82c61f3668
4 changed files with 3 additions and 2170 deletions
|
|
@ -46,6 +46,7 @@ class Profile(object):
|
|||
|
||||
Warning: Do not enable lzma, bzip or bzip2, sdch encodings
|
||||
as python-requests does not support it yet.
|
||||
Supported as of 2.2: gzip, deflate, compress.
|
||||
In doubt, do not change the default Accept-Encoding header
|
||||
of python-requests.
|
||||
"""
|
||||
|
|
@ -90,7 +91,7 @@ class Firefox(Profile):
|
|||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20100101 Firefox/10.0.3',
|
||||
'DNT': '1'}
|
||||
# It also has "Connection: Keep-Alive", that should only be added this way:
|
||||
#session.config['keep_alive'] = True
|
||||
#FIXME session.config['keep_alive'] = True
|
||||
|
||||
|
||||
class Wget(Profile):
|
||||
|
|
@ -108,7 +109,7 @@ class Wget(Profile):
|
|||
session.headers.update({
|
||||
'Accept': '*/*',
|
||||
'User-Agent': 'Wget/%s' % self.version})
|
||||
#session.config['keep_alive'] = True
|
||||
#FIXME session.config['keep_alive'] = True
|
||||
|
||||
|
||||
class BaseBrowser(object):
|
||||
|
|
@ -163,7 +164,6 @@ class BaseBrowser(object):
|
|||
"""
|
||||
Make an HTTP request like a browser does:
|
||||
* follow redirects (unless disabled)
|
||||
* handle cookies
|
||||
* provide referrers (unless disabled)
|
||||
|
||||
Unless a `method` is explicitly provided, it makes a GET request,
|
||||
|
|
|
|||
|
|
@ -1,431 +0,0 @@
|
|||
# TODO declare __all__
|
||||
# TODO support logging
|
||||
|
||||
from urlparse import urlparse
|
||||
from datetime import datetime, timedelta
|
||||
import posixpath
|
||||
|
||||
from .cookies import Cookie, Cookies, strip_spaces_and_quotes, Definitions
|
||||
|
||||
|
||||
def valid_domain(domain):
|
||||
"""
|
||||
Like cookies.valid_domain, but allows leading periods.
|
||||
Because it is *very* common and useful for us.
|
||||
"""
|
||||
domain.encode('ascii')
|
||||
if domain and domain[0] == '.':
|
||||
domain = domain[1:]
|
||||
if domain and domain[0] in '"':
|
||||
return False
|
||||
if Definitions.DOMAIN_RE.match(domain):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def parse_domain(value):
|
||||
"""
|
||||
Like cookies.parse_domain, but allows leading periods.
|
||||
Because it is *very* common and useful for us.
|
||||
"""
|
||||
value = strip_spaces_and_quotes(value)
|
||||
if value:
|
||||
assert valid_domain(value)
|
||||
return value
|
||||
|
||||
# this is ok because we are using our own copy of the lib
|
||||
# TODO push a better way upstream
|
||||
Cookie.attribute_parsers['domain'] = parse_domain
|
||||
Cookie.attribute_validators['domain'] = valid_domain
|
||||
|
||||
|
||||
class CookiePolicy(object):
|
||||
"""
|
||||
Defines how cookies are accepted, and what to do with them.
|
||||
"""
|
||||
|
||||
ACCEPT_DOMAINS = []
|
||||
"""
|
||||
Domains where to accept cookies, even when we should not.
|
||||
Add a "." before a domain to accept subdomains.
|
||||
If True, accept all cookies (a bit insecure).
|
||||
ACCEPT_DOMAINS has higher priority over REJECT_DOMAINS.
|
||||
|
||||
Disabling third-party cookies on most browsers acts like [], enabling them
|
||||
acts like True. Since it is a very common browser option, we use the most
|
||||
secure and privacy-aware method by default.
|
||||
"""
|
||||
|
||||
REJECT_DOMAINS = []
|
||||
"""
|
||||
Domains where to reject cookies, even when we should not.
|
||||
Add a "." before a domain to reject subdomains.
|
||||
If True, reject all cookies.
|
||||
REJECT_DOMAINS has lower priority over ACCEPT_DOMAINS.
|
||||
"""
|
||||
|
||||
SECURE_DOMAINS = True
|
||||
"""
|
||||
When we get a cookie through an secure connection, mark it as secure
|
||||
(not to be sent on insecure channels) if the server did not tell us to.
|
||||
If True, do it automatically for all domains. Alternatively, you can put
|
||||
a list of domains, like ACCEPT_DOMAINS or REJECT_DOMAINS.
|
||||
If False, never do it (but still accept secure cookies as they are).
|
||||
|
||||
NoScript for Firefox does this, either by automated guesses or forced from a list.
|
||||
"""
|
||||
|
||||
INSECURE_MATCHING = True
|
||||
"""
|
||||
Do sloppy matching to mimic what browsers do.
|
||||
This is only for setting cookies; it should be relatively safe in Weboob.
|
||||
"""
|
||||
|
||||
def domain_match(self, pattern, domain):
|
||||
"""
|
||||
Checks a domain matches a domain pattern.
|
||||
Patterns can be either the exact domain, or a wildcard (starting with a dot).
|
||||
|
||||
example.com matches example.com only
|
||||
.example.com matches *.example.com (but not example.com)
|
||||
|
||||
:param pattern: str
|
||||
:param domain: str
|
||||
:rytpe: bool
|
||||
"""
|
||||
if pattern.startswith('.'):
|
||||
return domain.endswith(pattern)
|
||||
return domain == pattern
|
||||
|
||||
def domain_match_list(self, patterns, domain):
|
||||
"""
|
||||
Checks domains match, from a list of patters.
|
||||
If the list of patterns is True, it always matches.
|
||||
|
||||
:param pattern: list or True
|
||||
:param domain: str
|
||||
:rytpe: bool
|
||||
"""
|
||||
if patterns is True:
|
||||
return True
|
||||
for pattern in patterns:
|
||||
if self.domain_match(pattern, domain):
|
||||
return True
|
||||
return False
|
||||
|
||||
def can_set(self, cookie, url):
|
||||
"""
|
||||
Checks an URL can set a particular cookie.
|
||||
See ACCEPT_DOMAINS, REJECT_DOMAINS to set exceptions.
|
||||
|
||||
The cookie must have a domain already set, you can
|
||||
use normalize_cookie() for that.
|
||||
|
||||
:param cookie: The cookie the server set
|
||||
:type cookie: Cookie
|
||||
:param url: URL of the response
|
||||
:type url: str
|
||||
|
||||
:rtype: bool
|
||||
"""
|
||||
url = urlparse(url)
|
||||
domain = url.hostname
|
||||
|
||||
# Accept/reject overrides
|
||||
if self.domain_match_list(self.ACCEPT_DOMAINS, domain):
|
||||
return True
|
||||
if self.domain_match_list(self.REJECT_DOMAINS, domain):
|
||||
return False
|
||||
|
||||
# check path
|
||||
if not url.path.startswith(cookie.path):
|
||||
return False
|
||||
|
||||
# check domain (secure & simple)
|
||||
if cookie.domain.startswith('.'):
|
||||
if cookie.domain.endswith(domain) or '.%s' % domain == cookie.domain:
|
||||
return True
|
||||
elif domain == cookie.domain:
|
||||
return True
|
||||
|
||||
# whatever.example.com should be able to set .example.com
|
||||
# Unbelievably stupid, but widely used.
|
||||
#
|
||||
# Our method is not ideal, as it isn't very secure for some TLDs.
|
||||
# A solution could be to use tldextract.
|
||||
if self.INSECURE_MATCHING:
|
||||
if domain.split('.')[-2:] == cookie.domain.split('.')[-2:]:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def normalize_cookie(self, cookie, url, now=None):
|
||||
"""
|
||||
Update a cookie we got from the response.
|
||||
The goal is to have data relevant for use in future requests.
|
||||
* Sets domain if there is not one.
|
||||
* Sets path if there is not one.
|
||||
* Set Expires from Max-Age. We need the expires to have an absolute expiration date.
|
||||
* Force the Secure flag if required. (see SECURE_DOMAINS)
|
||||
|
||||
:type cookie: :class:`cookies.Cookie`
|
||||
:type url: str
|
||||
:type now: datetime
|
||||
"""
|
||||
url = urlparse(url)
|
||||
if cookie.domain is None:
|
||||
cookie.domain = url.hostname
|
||||
if cookie.path is None:
|
||||
cookie.path = '/'
|
||||
if cookie.max_age is not None:
|
||||
if now is None:
|
||||
now = datetime.now()
|
||||
cookie.expires = now + timedelta(seconds=cookie.max_age)
|
||||
if url.scheme == 'https' \
|
||||
and self.domain_match_list(self.SECURE_DOMAINS, cookie.domain):
|
||||
cookie.secure = True
|
||||
|
||||
|
||||
class CookieJar(object):
|
||||
"""
|
||||
Manage Cookies like a real browser, with security and privacy in mind.
|
||||
|
||||
python-requests accepts cookies blindly,
|
||||
Expirations are not taken into account,
|
||||
it can't handle the server asking to delete a cookie,
|
||||
and sends cookies even when changing domains!
|
||||
Of course, secure (SSL only) cookies aren't handled either.
|
||||
|
||||
This behavior depends on a `policy` class.
|
||||
|
||||
This class fixes all that.
|
||||
"""
|
||||
|
||||
def __init__(self, policy):
|
||||
"""
|
||||
Cookies are delicious delicacies.
|
||||
|
||||
:type: :class:`CookiePolicy`
|
||||
"""
|
||||
self.cookies = dict()
|
||||
self.policy = policy
|
||||
|
||||
def from_response(self, response):
|
||||
"""
|
||||
Import cookies from the response.
|
||||
|
||||
:type response: responses.Response
|
||||
"""
|
||||
if 'Set-Cookie' in response.headers:
|
||||
cs = Cookies.from_response(response.headers['Set-Cookie'], True)
|
||||
for c in cs.itervalues():
|
||||
self.policy.normalize_cookie(c, response.url)
|
||||
if self.policy.can_set(c, response.url):
|
||||
self.set(c)
|
||||
|
||||
def for_request(self, url, now=None):
|
||||
"""
|
||||
Get a key/value dictionnary of cookies for a given request URL.
|
||||
|
||||
:type url: str
|
||||
:type now: datetime
|
||||
:rtype: dict
|
||||
"""
|
||||
url = urlparse(url)
|
||||
if now is None:
|
||||
now = datetime.now()
|
||||
# we want insecure cookies in https too!
|
||||
secure = None if url.scheme == 'https' else False
|
||||
|
||||
cdict = dict()
|
||||
# get sorted cookies
|
||||
cookies = self.all(domain=url.hostname, path=url.path, secure=secure)
|
||||
for cookie in cookies:
|
||||
# only use session cookies and cookies with future expirations
|
||||
if cookie.expires is None or cookie.expires > now:
|
||||
# update only if not set, since first cookies are "better"
|
||||
cdict.setdefault(cookie.name, cookie.value)
|
||||
return cdict
|
||||
|
||||
def flush(self, now=None, session=False):
|
||||
"""
|
||||
Remove expired cookies. If session is True, also remove all session cookies.
|
||||
|
||||
:type now: datetime
|
||||
:type session: bool
|
||||
"""
|
||||
# we need a list copy since we remove from the iterable
|
||||
for cookie in list(self.iter()):
|
||||
# remove session cookies if requested
|
||||
if cookie.expires is None and session:
|
||||
self.remove(cookie)
|
||||
# remove non-session cookies if expired before now
|
||||
if cookie.expires is not None and cookie.expires < now:
|
||||
self.remove(cookie)
|
||||
|
||||
def set(self, cookie):
|
||||
"""
|
||||
Add or replace a Cookie in the jar.
|
||||
This is for normalized and checked cookies, no validation is done.
|
||||
Use from_response() to import cookies from a python-requests response.
|
||||
|
||||
:type cookie: cookies.Cookie
|
||||
"""
|
||||
# cookies are unique by domain, path and of course name
|
||||
assert len(cookie.domain)
|
||||
assert len(cookie.path)
|
||||
assert len(cookie.name)
|
||||
self.cookies.setdefault(cookie.domain, {}). \
|
||||
setdefault(cookie.path, {})[cookie.name] = cookie
|
||||
|
||||
def iter(self, name=None, domain=None, path=None, secure=None):
|
||||
"""
|
||||
Iterate matching cookies.
|
||||
You can restrict by name, domain, path or security.
|
||||
|
||||
:type name: str
|
||||
:type domain: str
|
||||
:type path: str
|
||||
:type secure: bool
|
||||
|
||||
:rtype: iter[:class:`cookies.Cookie`]
|
||||
"""
|
||||
for cdomain, cpaths in self.cookies.iteritems():
|
||||
# domain matches (all domains if None)
|
||||
if domain is None or self.policy.domain_match(cdomain, domain):
|
||||
for cpath, cnames in cpaths.iteritems():
|
||||
# path matches (all if None)
|
||||
if path is None or path.startswith(cpath):
|
||||
for cname, cookie in cnames.iteritems():
|
||||
# only wanted name (all if None)
|
||||
if name is None or name == cname:
|
||||
# wanted security (all if None)
|
||||
# cookie.secure can be "None" if not secure!
|
||||
if secure is None \
|
||||
or (secure is False and not cookie.secure) \
|
||||
or (secure is True and cookie.secure):
|
||||
yield cookie
|
||||
|
||||
def all(self, name=None, domain=None, path=None, secure=None):
|
||||
"""
|
||||
Like iter(), but sorts the cookies, from most precise to less precise.
|
||||
|
||||
:rtype: list[:class:`cookies.Cookie`]
|
||||
"""
|
||||
cookies = list(self.iter(name, domain, path, secure))
|
||||
|
||||
# slowly compare all cookies
|
||||
# XXX one of the worst things I've ever written
|
||||
COOKIE1 = 1
|
||||
COOKIE2 = -1
|
||||
|
||||
def ccmp(cookie1, cookie2):
|
||||
# most precise matching domain
|
||||
if domain and cookie1.domain != cookie2.domain:
|
||||
if cookie1.domain == domain:
|
||||
return COOKIE1
|
||||
if cookie2.domain == domain:
|
||||
return COOKIE2
|
||||
if len(cookie1.domain) > len(cookie2.domain):
|
||||
return COOKIE1
|
||||
if len(cookie2.domain) > len(cookie1.domain):
|
||||
return COOKIE2
|
||||
# most precise matching path
|
||||
if len(cookie1.path) > len(cookie2.path):
|
||||
return COOKIE1
|
||||
if len(cookie2.path) > len(cookie1.path):
|
||||
return COOKIE2
|
||||
# most secure
|
||||
if cookie1.secure and not cookie2.secure:
|
||||
return COOKIE1
|
||||
if cookie2.secure and not cookie1.secure:
|
||||
return COOKIE2
|
||||
return 0
|
||||
|
||||
return sorted(cookies, cmp=ccmp, reverse=True)
|
||||
|
||||
def get(self, name=None, domain=None, path=None, secure=None):
|
||||
"""
|
||||
Return the best cookie from all().
|
||||
Useful for changing the value or deleting a cookie.
|
||||
|
||||
name, domain, path and secure are the same as iter().
|
||||
|
||||
:rtype: :class:`cookies.Cookie` or None
|
||||
"""
|
||||
cookies = self.all(name, domain, path, secure)
|
||||
try:
|
||||
return cookies[0]
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def remove(self, cookie):
|
||||
"""
|
||||
Remove a cookie. The cookie argument must have the same domain, path and name.
|
||||
Return False if not present, True if just removed.
|
||||
|
||||
:type cookie: :class:`cookies.Cookie`
|
||||
:rtype: bool
|
||||
"""
|
||||
# cookies are unique by domain, path and of course name
|
||||
assert len(cookie.domain)
|
||||
assert len(cookie.path)
|
||||
assert len(cookie.name)
|
||||
d = self.cookies.get(cookie.domain, {}).get(cookie.path)
|
||||
if cookie.name in d:
|
||||
del d[cookie.name]
|
||||
return True
|
||||
return False
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
Remove all cookies.
|
||||
"""
|
||||
self.cookies.clear()
|
||||
|
||||
def build(self, name, value, url, path=None, wildcard=False):
|
||||
"""
|
||||
Build a Cookie object for the current URL.
|
||||
|
||||
The domain and path are guessed. If you want to set for the whole domain,
|
||||
take care of what you put in URL!
|
||||
for_url('http://example.com/hello/world') will only set cookie for the
|
||||
/hello/ path.
|
||||
|
||||
`name` and `value` are required parameters of Cookie.__init__()
|
||||
|
||||
You can force the `path` if you want.
|
||||
|
||||
The `wildcard` parameter will add a period before the domain.
|
||||
|
||||
Typical usage would be, inside a DomainBrowser:
|
||||
cookie = self.cookies.for_url(k, v, self.url)
|
||||
cookie = self.cookies.for_url(k, v, self.absurl('/'))
|
||||
cookie = self.cookies.for_url(k, v, self.BASEURL)
|
||||
|
||||
And then:
|
||||
self.cookies.set(cookie)
|
||||
|
||||
For more advanced usage, create a Cookie object manually, or
|
||||
alter the returned Cookie object before set().
|
||||
|
||||
:type name: basestring
|
||||
:type value: basestring
|
||||
:type url: str
|
||||
:type path: str
|
||||
:type wildcard: bool
|
||||
:rtype cookie: :class:`cookies.Cookie`
|
||||
"""
|
||||
cookie = Cookie(name, value)
|
||||
url = urlparse(url)
|
||||
if wildcard:
|
||||
cookie.domain = '.' + url.hostname
|
||||
else:
|
||||
cookie.domain = url.hostname
|
||||
if path is None:
|
||||
cookie.path = posixpath.join(posixpath.dirname(url.path), '')
|
||||
else:
|
||||
cookie.path = path
|
||||
if url.scheme == 'https':
|
||||
cookie.secure = True
|
||||
return cookie
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,597 +0,0 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2012 Laurent Bachelier
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from datetime import datetime
|
||||
from random import choice
|
||||
import re
|
||||
import string
|
||||
|
||||
from requests import HTTPError
|
||||
from nose.plugins.skip import SkipTest
|
||||
from nose.tools import assert_raises
|
||||
|
||||
from .browser import BaseBrowser, DomainBrowser, Weboob, UrlNotAllowed
|
||||
from .cookiejar import CookieJar, CookiePolicy
|
||||
from .cookies import Cookies
|
||||
|
||||
from weboob.tools.json import json
|
||||
|
||||
# Those services can be run locally. More or less.
|
||||
HTTPBIN = 'http://httpbin.org/' # https://github.com/kennethreitz/httpbin
|
||||
POSTBIN = 'http://www.postbin.org/' # https://github.com/progrium/postbin
|
||||
REQUESTBIN = 'http://requestb.in/' # https://github.com/progrium/requestbin
|
||||
|
||||
# if you change HTTPBIN, you should also change these URLs for some tests:
|
||||
# redirect to http://httpbin.org/get
|
||||
REDIRECTS1 = ('http://tinyurl.com/ouiboube-b2', 'http://bit.ly/st4Hcv')
|
||||
# redirect to http://httpbin.org/cookies
|
||||
REDIRECTS2 = ('http://tinyurl.com/7zp3jnr', 'http://bit.ly/HZCCX7')
|
||||
|
||||
|
||||
def test_base():
|
||||
b = BaseBrowser()
|
||||
r = b.location(HTTPBIN + 'headers')
|
||||
assert isinstance(r.text, unicode)
|
||||
assert 'Firefox' in r.text
|
||||
assert 'python' not in r.text
|
||||
assert 'identity' not in r.text
|
||||
assert b.url == HTTPBIN + 'headers'
|
||||
|
||||
r = b.location(HTTPBIN + 'gzip')
|
||||
assert 'Firefox' in r.text
|
||||
|
||||
|
||||
def test_redirects():
|
||||
"""
|
||||
Check redirects are followed
|
||||
"""
|
||||
b = BaseBrowser()
|
||||
b.location(HTTPBIN + 'redirect/1')
|
||||
assert b.url == HTTPBIN + 'get'
|
||||
|
||||
r = b.location(HTTPBIN + 'redirect/1')
|
||||
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1'
|
||||
assert r.url == HTTPBIN + 'get'
|
||||
|
||||
# Normal redirect chain
|
||||
b.url = None
|
||||
r = b.location(HTTPBIN + 'redirect/4')
|
||||
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1'
|
||||
assert len(r.history) == 4
|
||||
assert r.history[3].request.url == HTTPBIN + 'redirect/1'
|
||||
assert r.history[3].request.headers.get('Referer') == HTTPBIN + 'redirect/2'
|
||||
assert r.history[2].request.url == HTTPBIN + 'redirect/2'
|
||||
assert r.history[2].request.headers.get('Referer') == HTTPBIN + 'redirect/3'
|
||||
assert r.history[1].request.url == HTTPBIN + 'redirect/3'
|
||||
assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/4'
|
||||
assert r.history[0].request.url == HTTPBIN + 'redirect/4'
|
||||
assert r.history[0].request.headers.get('Referer') is None
|
||||
assert r.url == HTTPBIN + 'get'
|
||||
|
||||
# Disable all referers
|
||||
r = b.location(HTTPBIN + 'redirect/2', referrer=False)
|
||||
assert json.loads(r.text)['headers'].get('Referer') is None
|
||||
assert len(r.history) == 2
|
||||
assert r.history[1].request.headers.get('Referer') is None
|
||||
assert r.history[0].request.headers.get('Referer') is None
|
||||
assert r.url == HTTPBIN + 'get'
|
||||
|
||||
# Only overrides first referer
|
||||
r = b.location(HTTPBIN + 'redirect/2', referrer='http://example.com/')
|
||||
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1'
|
||||
assert len(r.history) == 2
|
||||
assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/2'
|
||||
assert r.history[0].request.headers.get('Referer') == 'http://example.com/'
|
||||
assert r.url == HTTPBIN + 'get'
|
||||
|
||||
# Don't follow
|
||||
r = b.location(HTTPBIN + 'redirect/2', allow_redirects=False)
|
||||
assert len(r.history) == 0
|
||||
assert r.url == HTTPBIN + 'redirect/2'
|
||||
assert r.status_code == 302
|
||||
|
||||
|
||||
def test_redirect2():
|
||||
"""
|
||||
More redirect tests
|
||||
"""
|
||||
rurl = choice(REDIRECTS1)
|
||||
b = BaseBrowser()
|
||||
r = b.location(rurl)
|
||||
assert r.url == HTTPBIN + 'get'
|
||||
assert json.loads(r.text)['headers'].get('Referer') == rurl
|
||||
# TODO referrer privacy settings
|
||||
|
||||
|
||||
def test_brokenpost():
|
||||
"""
|
||||
Test empty POST and redirect after POST
|
||||
"""
|
||||
raise SkipTest('PostBin is disabled')
|
||||
try:
|
||||
b = BaseBrowser()
|
||||
# postbin is picky with empty posts. that's good!
|
||||
r = b.location(POSTBIN, {})
|
||||
# ensures empty data (but not None) does a POST
|
||||
assert r.request.method == 'POST'
|
||||
# ensure we were redirected after submitting a post
|
||||
assert len(r.url) >= len(POSTBIN)
|
||||
# send a POST with data
|
||||
b.location(r.url, {'hello': 'world'})
|
||||
r = b.location(r.url + '/feed')
|
||||
assert 'hello' in r.text
|
||||
assert 'world' in r.text
|
||||
except HTTPError as e:
|
||||
if str(e).startswith('503 '):
|
||||
raise SkipTest('Quota exceeded')
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def _getrqbin(b):
|
||||
"""
|
||||
Get a RequestBin
|
||||
"""
|
||||
# empty POST
|
||||
r = b.location(REQUESTBIN + 'api/v1/bins', '')
|
||||
name = json.loads(r.text)['name']
|
||||
assert name
|
||||
return name
|
||||
|
||||
|
||||
def test_smartpost():
|
||||
"""
|
||||
Checks we use POST or GET depending on the parameters
|
||||
"""
|
||||
b = BaseBrowser()
|
||||
n = _getrqbin(b)
|
||||
|
||||
r = b.location(REQUESTBIN + n)
|
||||
assert 'ok' in r.text
|
||||
r = b.location(REQUESTBIN + n + '?inspect')
|
||||
assert 'GET /%s' % n in r.text
|
||||
|
||||
r = b.location(REQUESTBIN + n, {'hello': 'world'})
|
||||
assert 'ok' in r.text
|
||||
r = b.location(REQUESTBIN + n + '?inspect')
|
||||
assert 'POST /%s' % n in r.text
|
||||
assert 'hello' in r.text
|
||||
assert 'world' in r.text
|
||||
|
||||
|
||||
def test_weboob():
|
||||
"""
|
||||
Test the Weboob Profile
|
||||
"""
|
||||
class BooBrowser(BaseBrowser):
|
||||
PROFILE = Weboob('0.0')
|
||||
|
||||
b = BooBrowser()
|
||||
r = b.location(HTTPBIN + 'headers')
|
||||
assert 'weboob/0.0' in r.text
|
||||
assert 'identity' in r.text
|
||||
|
||||
|
||||
def test_relative():
|
||||
"""
|
||||
Check relative URL / domain handling
|
||||
"""
|
||||
b = DomainBrowser()
|
||||
b.location(HTTPBIN)
|
||||
b.location('/ip')
|
||||
assert b.url == HTTPBIN + 'ip'
|
||||
|
||||
assert b.absurl('/ip') == HTTPBIN + 'ip'
|
||||
b.location(REQUESTBIN)
|
||||
assert b.absurl('/ip') == REQUESTBIN + 'ip'
|
||||
b.BASEURL = HTTPBIN + 'aaaaaa'
|
||||
assert b.absurl('/ip') == HTTPBIN + 'ip'
|
||||
assert b.absurl('ip') == HTTPBIN + 'ip'
|
||||
assert b.absurl('/ip', False) == REQUESTBIN + 'ip'
|
||||
b.BASEURL = HTTPBIN + 'aaaaaa/'
|
||||
assert b.absurl('/') == HTTPBIN
|
||||
assert b.absurl('/bb') == HTTPBIN + 'bb'
|
||||
assert b.absurl('') == HTTPBIN + 'aaaaaa/'
|
||||
assert b.absurl('bb') == HTTPBIN + 'aaaaaa/bb'
|
||||
|
||||
# Give an absolute URL, should get it unaltered
|
||||
b.BASEURL = 'http://example.net/'
|
||||
assert b.absurl('http://example.com/aaa/bbb') == 'http://example.com/aaa/bbb'
|
||||
assert b.absurl('https://example.com/aaa/bbb') == 'https://example.com/aaa/bbb'
|
||||
|
||||
# Schemeless absolute URL
|
||||
assert b.absurl('//example.com/aaa/bbb') == 'http://example.com/aaa/bbb'
|
||||
b.BASEURL = 'https://example.net/'
|
||||
assert b.absurl('//example.com/aaa/bbb') == 'https://example.com/aaa/bbb'
|
||||
|
||||
|
||||
def test_allow_url():
|
||||
b = DomainBrowser()
|
||||
b.RESTRICT_URL = True
|
||||
assert b.url_allowed('http://example.com/')
|
||||
assert b.url_allowed('http://example.net/')
|
||||
|
||||
b.BASEURL = 'http://example.com/'
|
||||
assert b.url_allowed('http://example.com/')
|
||||
assert b.url_allowed('http://example.com/aaa')
|
||||
assert not b.url_allowed('https://example.com/')
|
||||
assert not b.url_allowed('http://example.net/')
|
||||
assert not b.url_allowed('http://')
|
||||
|
||||
b.BASEURL = 'https://example.com/'
|
||||
assert not b.url_allowed('http://example.com/')
|
||||
assert not b.url_allowed('http://example.com/aaa')
|
||||
assert b.url_allowed('https://example.com/')
|
||||
assert b.url_allowed('https://example.com/aaa/bbb')
|
||||
|
||||
b.RESTRICT_URL = ['https://example.com/', 'http://example.com/']
|
||||
assert b.url_allowed('http://example.com/aaa/bbb')
|
||||
assert b.url_allowed('https://example.com/aaa/bbb')
|
||||
assert not b.url_allowed('http://example.net/aaa/bbb')
|
||||
assert not b.url_allowed('https://example.net/aaa/bbb')
|
||||
|
||||
assert_raises(UrlNotAllowed, b.location, 'http://example.net/')
|
||||
assert_raises(UrlNotAllowed, b.open, 'http://example.net/')
|
||||
|
||||
|
||||
def test_changereq():
|
||||
"""
|
||||
Test overloading request defaults
|
||||
"""
|
||||
b = BaseBrowser()
|
||||
r = b.location(HTTPBIN + 'headers', method='HEAD')
|
||||
assert r.text is None
|
||||
|
||||
r = b.location(HTTPBIN + 'put', method='PUT', data={'hello': 'world'})
|
||||
assert 'hello' in r.text
|
||||
assert 'world' in r.text
|
||||
|
||||
r = b.location(HTTPBIN + 'headers', headers={'User-Agent': 'Web Out of Browsers'})
|
||||
assert 'Web Out of Browsers' in r.text
|
||||
assert 'Firefox' not in r.text
|
||||
|
||||
|
||||
def test_referrer():
|
||||
"""
|
||||
Test automatic referrer setting
|
||||
"""
|
||||
b = BaseBrowser()
|
||||
r = b.location(HTTPBIN + 'get')
|
||||
assert 'Referer' not in json.loads(r.text)['headers']
|
||||
r = b.location(HTTPBIN + 'headers')
|
||||
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'get'
|
||||
r = b.location(HTTPBIN + 'headers')
|
||||
assert 'Referer' not in json.loads(r.text)['headers']
|
||||
|
||||
# Force another referrer
|
||||
r = b.location(HTTPBIN + 'get')
|
||||
r = b.location(HTTPBIN + 'headers', referrer='http://example.com/')
|
||||
assert json.loads(r.text)['headers'].get('Referer') == 'http://example.com/'
|
||||
|
||||
# Force no referrer
|
||||
r = b.location(HTTPBIN + 'get')
|
||||
r = b.location(HTTPBIN + 'headers', referrer=False)
|
||||
assert 'Referer' not in json.loads(r.text)['headers']
|
||||
|
||||
assert b.get_referrer('https://example.com/', 'http://example.com/') is None
|
||||
|
||||
|
||||
def test_cookiepolicy():
|
||||
"""
|
||||
Test cookie parsing and processing
|
||||
"""
|
||||
policy = CookiePolicy()
|
||||
|
||||
def bc(data):
|
||||
"""
|
||||
build one cookie, and normalize it
|
||||
"""
|
||||
cs = Cookies()
|
||||
cs.parse_response(data)
|
||||
for c in cs.itervalues():
|
||||
policy.normalize_cookie(c, 'http://example.com/')
|
||||
return c
|
||||
|
||||
# parse max-age
|
||||
assert bc('__bwid=58244366; max-age=42; path=/').expires
|
||||
|
||||
# security for received cookies
|
||||
assert policy.can_set(bc('k=v; domain=www.example.com'),
|
||||
'http://www.example.com/')
|
||||
assert policy.can_set(bc('k=v; domain=sub.example.com'),
|
||||
'http://www.example.com/')
|
||||
assert policy.can_set(bc('k=v; domain=sub.example.com'),
|
||||
'http://example.com/')
|
||||
assert policy.can_set(bc('k=v; domain=.example.com'),
|
||||
'http://example.com/')
|
||||
assert policy.can_set(bc('k=v; domain=www.example.com'),
|
||||
'http://example.com/')
|
||||
assert not policy.can_set(bc('k=v; domain=example.com'),
|
||||
'http://example.net/')
|
||||
assert not policy.can_set(bc('k=v; domain=.net'),
|
||||
'http://example.net/')
|
||||
assert not policy.can_set(bc('k=v; domain=www.example.net'),
|
||||
'http://www.example.com/')
|
||||
assert not policy.can_set(bc('k=v; domain=wwwexample.com'),
|
||||
'http://example.com/')
|
||||
assert not policy.can_set(bc('k=v; domain=.example.com'),
|
||||
'http://wwwexample.com/')
|
||||
|
||||
# pattern matching domains
|
||||
assert not policy.domain_match('example.com', 's.example.com')
|
||||
assert policy.domain_match('.example.com', 's.example.com')
|
||||
assert not policy.domain_match('.example.com', 'example.com') # yep.
|
||||
assert policy.domain_match('s.example.com', 's.example.com')
|
||||
assert not policy.domain_match('s.example.com', 's2.example.com')
|
||||
assert policy.domain_match_list(True, 'example.com')
|
||||
assert not policy.domain_match_list([], 'example.com')
|
||||
assert policy.domain_match_list(['example.net', 'example.com'], 'example.com')
|
||||
assert not policy.domain_match_list(['example.net', 'example.org'], 'example.com')
|
||||
|
||||
|
||||
def test_cookiejar():
|
||||
"""
|
||||
Test adding, removing, finding cookies to and from the jar
|
||||
"""
|
||||
def bc(data):
|
||||
"""
|
||||
build one cookie
|
||||
"""
|
||||
cs = Cookies()
|
||||
cs.parse_response(data)
|
||||
for c in cs.itervalues():
|
||||
return c
|
||||
|
||||
# filtering cookies
|
||||
cookie0 = bc('j=v; domain=www.example.com; path=/')
|
||||
cookie1 = bc('k=v1; domain=www.example.com; path=/; secure')
|
||||
cookie2 = bc('k=v2; domain=.example.com; path=/')
|
||||
cookie3 = bc('k=v3; domain=www.example.com; path=/lol/cat/')
|
||||
cookie4 = bc('k=v4; domain=www.example.com; path=/lol/')
|
||||
|
||||
cj = CookieJar(CookiePolicy())
|
||||
cj.set(cookie0)
|
||||
cj.set(cookie1)
|
||||
cj.set(cookie2)
|
||||
cj.set(cookie3)
|
||||
cj.set(cookie4)
|
||||
|
||||
assert len(cj.all()) == 5 # all cookies
|
||||
assert len(cj.all(path='/')) == 3 # all cookies except the ones with deep paths
|
||||
assert len(cj.all(name='k')) == 4 # this excludes cookie0
|
||||
assert len(cj.all(domain='example.com')) == 0 # yep
|
||||
assert len(cj.all(domain='s.example.com')) == 1 # cookie2
|
||||
assert len(cj.all(domain='.example.com')) == 1 # cookie2 (exact match)
|
||||
assert len(cj.all(domain='www.example.com')) == 5 # all cookies
|
||||
assert len(cj.all(domain='www.example.com', path="/lol/")) == 4 # all + cookie4
|
||||
assert len(cj.all(domain='www.example.com', path="/lol/cat")) == 4 # all + cookie4
|
||||
assert len(cj.all(domain='www.example.com', path="/lol/cat/")) == 5 # all + cookie4 + cookie3
|
||||
assert len(cj.all(secure=True)) == 1 # cookie1
|
||||
assert len(cj.all(secure=False)) == 4 # all except cookie1
|
||||
|
||||
assert cj.get(domain='www.example.com', path="/lol/") is cookie4
|
||||
assert cj.get(domain='www.example.com', path="/lol/cat/") is cookie3
|
||||
assert cj.get(domain='www.example.com', path="/") is cookie1
|
||||
assert cj.get(name='j', domain='www.example.com', path="/") is cookie0
|
||||
assert cj.get(name='k', domain='www.example.com', path="/") is cookie1
|
||||
assert cj.get(name='k', domain='s.example.com', path="/") is cookie2
|
||||
assert cj.get(name='k', domain='www.example.com', path="/aaa") is cookie1
|
||||
assert cj.get(domain='www.example.com', path='/') is cookie1
|
||||
assert cj.get(domain='www.example.com', path='/', secure=False) is cookie0
|
||||
assert cj.get(domain='www.example.com', path='/', secure=True) is cookie1
|
||||
|
||||
# this is just not API choice, but how browsers act
|
||||
assert cj.for_request('http://www.example.com/') == {'k': 'v2', 'j': 'v'}
|
||||
assert cj.for_request('https://www.example.com/') == {'k': 'v1', 'j': 'v'}
|
||||
assert cj.for_request('http://www.example.com/lol/') == {'k': 'v4', 'j': 'v'}
|
||||
assert cj.for_request('http://s.example.com/lol/') == {'k': 'v2'}
|
||||
assert cj.for_request('http://example.com/lol/') == {}
|
||||
|
||||
# remove/add/replace
|
||||
assert cj.remove(cookie1) is True
|
||||
assert cj.get(secure=True) is None
|
||||
cj.set(cookie1)
|
||||
assert cj.get(secure=True) is cookie1
|
||||
cookie5 = bc('k=w; domain=www.example.com; path=/; secure')
|
||||
cj.set(cookie5)
|
||||
assert cj.get(secure=True) is cookie5
|
||||
assert len(cj.all(secure=True)) == 1
|
||||
# not the same cookie, but the same identifiers
|
||||
assert cj.remove(cookie1) is True
|
||||
|
||||
cj.clear()
|
||||
cookie6 = bc('e1=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 1970 00:00:01 GMT;')
|
||||
cookie7 = bc('e2=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 2010 00:00:01 GMT;')
|
||||
now = datetime(2000, 01, 01)
|
||||
cj.set(cookie0)
|
||||
cj.set(cookie6)
|
||||
cj.set(cookie7)
|
||||
|
||||
assert cj.for_request('http://www.example.com/', now) == {'e2': '1', 'j': 'v'}
|
||||
assert cj.for_request('http://www.example.com/', datetime(2020, 01, 01)) == {'j': 'v'}
|
||||
|
||||
assert len(cj.all()) == 3
|
||||
cj.flush(now)
|
||||
assert len(cj.all()) == 2
|
||||
assert cj.remove(cookie6) is False # already removed
|
||||
cj.flush(now, session=True)
|
||||
assert len(cj.all()) == 1
|
||||
|
||||
|
||||
def test_buildcookie():
|
||||
"""
|
||||
Test easy cookie building
|
||||
"""
|
||||
cj = CookieJar(CookiePolicy())
|
||||
c = cj.build('kk', 'vv', 'http://example.com/')
|
||||
assert c.domain == 'example.com'
|
||||
assert not c.secure
|
||||
assert c.path == '/'
|
||||
|
||||
c = cj.build('kk', 'vv', 'http://example.com/', path='/plop/', wildcard=True)
|
||||
assert c.domain == '.example.com'
|
||||
|
||||
assert c.path == '/plop/'
|
||||
c = cj.build('kk', 'vv', 'http://example.com/plop/')
|
||||
assert c.path == '/plop/'
|
||||
c = cj.build('kk', 'vv', 'http://example.com/plop/plap')
|
||||
assert c.path == '/plop/'
|
||||
c = cj.build('kk', 'vv', 'http://example.com/plop/?http://example.net/plip/')
|
||||
assert c.path == '/plop/'
|
||||
assert c.domain == 'example.com'
|
||||
c = cj.build('kk', 'vv', 'http://example.com/plop/plap', path='/')
|
||||
assert c.path == '/'
|
||||
|
||||
c = cj.build('kk', 'vv', 'https://example.com/')
|
||||
assert c.domain == 'example.com'
|
||||
assert c.secure
|
||||
|
||||
# check the cookie works
|
||||
c.name = 'k'
|
||||
c.value = 'v'
|
||||
cj.set(c)
|
||||
assert cj.for_request('https://example.com/') == {'k': 'v'}
|
||||
assert cj.for_request('http://example.com/') == {}
|
||||
|
||||
|
||||
def test_cookienav():
|
||||
"""
|
||||
Test browsing while getting new cookies
|
||||
"""
|
||||
b = BaseBrowser()
|
||||
r = b.location(HTTPBIN + 'cookies')
|
||||
assert len(json.loads(r.text)['cookies']) == 0
|
||||
|
||||
r = b.location(HTTPBIN + 'cookies/set/hello/world')
|
||||
assert len(json.loads(r.text)['cookies']) == 1
|
||||
assert json.loads(r.text)['cookies']['hello'] == 'world'
|
||||
r = b.location(HTTPBIN + 'cookies/set/hello2/world2')
|
||||
assert len(json.loads(r.text)['cookies']) == 2
|
||||
assert json.loads(r.text)['cookies']['hello2'] == 'world2'
|
||||
|
||||
r = b.location(REQUESTBIN)
|
||||
assert 'session' in r.cookies # requestbin should give this by default
|
||||
assert 'hello' not in r.cookies # we didn't send the wrong cookie
|
||||
# return to httpbin, check we didn't give the wrong cookie
|
||||
r = b.location(HTTPBIN + 'cookies')
|
||||
assert 'session' not in json.loads(r.text)['cookies']
|
||||
|
||||
# override cookies temporarily
|
||||
r = b.location(HTTPBIN + 'cookies', cookies={'bla': 'bli'})
|
||||
assert len(json.loads(r.text)['cookies']) == 1
|
||||
assert json.loads(r.text)['cookies']['bla'] == 'bli'
|
||||
# reload, the "fake" cookie should not be there
|
||||
r = b.location(HTTPBIN + 'cookies')
|
||||
assert len(json.loads(r.text)['cookies']) == 2
|
||||
assert 'bla' not in json.loads(r.text)['cookies']
|
||||
|
||||
|
||||
def test_cookieredirect():
|
||||
"""
|
||||
Test cookie redirection security
|
||||
"""
|
||||
rurl = choice(REDIRECTS2)
|
||||
|
||||
b = BaseBrowser()
|
||||
r = b.location(HTTPBIN + 'cookies')
|
||||
assert len(json.loads(r.text)['cookies']) == 0
|
||||
|
||||
# add a cookie to the redirection service domain (not the target!)
|
||||
cookie = b.cookies.build('k', 'v1', rurl)
|
||||
b.cookies.set(cookie)
|
||||
r = b.location(rurl)
|
||||
assert r.url == HTTPBIN + 'cookies'
|
||||
# the cookie was not forwarded; it's for another domain
|
||||
# this is important for security reasons,
|
||||
# and because python-requests tries to do it by default!
|
||||
assert len(json.loads(r.text)['cookies']) == 0
|
||||
|
||||
# add a cookie for the target
|
||||
cookie = b.cookies.build('k', 'v2', HTTPBIN)
|
||||
b.cookies.set(cookie)
|
||||
r = b.location(rurl)
|
||||
assert r.url == HTTPBIN + 'cookies'
|
||||
assert len(json.loads(r.text)['cookies']) == 1
|
||||
assert json.loads(r.text)['cookies']['k'] == 'v2'
|
||||
|
||||
# check all cookies sent in the request chain
|
||||
assert r.cookies == {'k': 'v2'}
|
||||
assert r.history[0].cookies['k'] == 'v1' # some services add other cookies
|
||||
|
||||
|
||||
def test_cookie_srv1():
|
||||
"""
|
||||
Test cookie in real conditions (service 1)
|
||||
"""
|
||||
class TestBrowser(DomainBrowser):
|
||||
BASEURL = 'http://www.mria-arim.ca/'
|
||||
|
||||
b = TestBrowser()
|
||||
b.location('testCookies.asp')
|
||||
# TODO this is also a good place to test form parsing/submission
|
||||
b.location('testCookies.asp', {'makeMe': 'Create Cookie'})
|
||||
r = b.location('testCookies.asp', {'testMe': 'Test Browser'})
|
||||
assert 'Your Browser accepts cookies' in r.text
|
||||
|
||||
|
||||
def test_cookie_srv2():
|
||||
"""
|
||||
Test cookie in real conditions (service 2)
|
||||
"""
|
||||
def randtext():
|
||||
return ''.join(choice(string.digits + string.letters) for _ in xrange(32))
|
||||
|
||||
class TestBrowser(DomainBrowser):
|
||||
BASEURL = 'http://www.html-kit.com/tools/cookietester/'
|
||||
|
||||
def cookienum(self):
|
||||
return int(re.search('Number of cookies received: (\d+)',
|
||||
self.response.text).groups()[0])
|
||||
|
||||
def mypost(self, **data):
|
||||
return self.location('', data)
|
||||
|
||||
b = TestBrowser()
|
||||
b.home()
|
||||
assert b.cookienum() == 0
|
||||
|
||||
r1 = randtext()
|
||||
r1v = randtext()
|
||||
|
||||
# TODO this is also a good place to test form parsing/submission
|
||||
# get a new cookie
|
||||
r = b.mypost(cn=r1, cv=r1v)
|
||||
assert b.cookienum() == 1
|
||||
assert r1 in r.text
|
||||
assert r1v in r.text
|
||||
|
||||
# cookie deletion
|
||||
r = b.mypost(cr=r1)
|
||||
assert b.cookienum() == 0
|
||||
assert r1 not in r.text
|
||||
assert r1v not in r.text
|
||||
|
||||
# om nom nom
|
||||
b.mypost(cn=randtext(), cv=randtext())
|
||||
b.mypost(cn=randtext(), cv=randtext())
|
||||
b.mypost(cn=randtext(), cv=randtext())
|
||||
b.mypost(cn=randtext(), cv=randtext())
|
||||
assert b.cookienum() == 4
|
||||
Loading…
Add table
Add a link
Reference in a new issue