weboob-devel/weboob/tools/browser2/test.py
Laurent Bachelier e793fd14da browser2: Use cookie testing services
Both are aimed at real browsers.
This also adds a "real-life" cookie deletion/expiration test.
2014-03-10 13:11:07 +01:00

557 lines
19 KiB
Python

# -*- coding: utf-8 -*-
# Copyright(C) 2012 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
from datetime import datetime
from random import choice
import re
import string
from requests import HTTPError
from nose.plugins.skip import SkipTest
from .browser import BaseBrowser, DomainBrowser, Weboob
from .cookiejar import CookieJar
from .cookies import Cookies
from weboob.tools.json import json
# Those services can be run locally. More or less.
HTTPBIN = 'http://httpbin.org/' # https://github.com/kennethreitz/httpbin
POSTBIN = 'http://www.postbin.org/' # https://github.com/progrium/postbin
REQUESTBIN = 'http://requestb.in/' # https://github.com/progrium/requestbin
# if you change HTTPBIN, you should also change these URLs for some tests:
# redirect to http://httpbin.org/get
REDIRECTS1 = ('http://tinyurl.com/ouiboube-b2', 'http://bit.ly/st4Hcv')
# redirect to http://httpbin.org/cookies
REDIRECTS2 = ('http://tinyurl.com/7zp3jnr', 'http://bit.ly/HZCCX7')
def test_base():
b = BaseBrowser()
r = b.location(HTTPBIN + 'headers')
assert isinstance(r.text, unicode)
assert 'Firefox' in r.text
assert 'python' not in r.text
assert 'identity' not in r.text
assert b.url == HTTPBIN + 'headers'
r = b.location(HTTPBIN + 'gzip')
assert 'Firefox' in r.text
def test_redirects():
"""
Check redirects are followed
"""
b = BaseBrowser()
b.location(HTTPBIN + 'redirect/1')
assert b.url == HTTPBIN + 'get'
r = b.location(HTTPBIN + 'redirect/1')
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1'
assert r.url == HTTPBIN + 'get'
# Normal redirect chain
b.url = None
r = b.location(HTTPBIN + 'redirect/4')
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1'
assert len(r.history) == 4
assert r.history[3].request.url == HTTPBIN + 'redirect/1'
assert r.history[3].request.headers.get('Referer') == HTTPBIN + 'redirect/2'
assert r.history[2].request.url == HTTPBIN + 'redirect/2'
assert r.history[2].request.headers.get('Referer') == HTTPBIN + 'redirect/3'
assert r.history[1].request.url == HTTPBIN + 'redirect/3'
assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/4'
assert r.history[0].request.url == HTTPBIN + 'redirect/4'
assert r.history[0].request.headers.get('Referer') == None
assert r.url == HTTPBIN + 'get'
# Disable all referers
r = b.location(HTTPBIN + 'redirect/2', referrer=False)
assert json.loads(r.text)['headers'].get('Referer') == None
assert len(r.history) == 2
assert r.history[1].request.headers.get('Referer') == None
assert r.history[0].request.headers.get('Referer') == None
assert r.url == HTTPBIN + 'get'
# Only overrides first referer
r = b.location(HTTPBIN + 'redirect/2', referrer='http://example.com/')
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1'
assert len(r.history) == 2
assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/2'
assert r.history[0].request.headers.get('Referer') == 'http://example.com/'
assert r.url == HTTPBIN + 'get'
# Don't follow
r = b.location(HTTPBIN + 'redirect/2', allow_redirects=False)
assert len(r.history) == 0
assert r.url == HTTPBIN + 'redirect/2'
assert r.status_code == 302
def test_redirect2():
"""
More redirect tests
"""
rurl = choice(REDIRECTS1)
b = BaseBrowser()
r = b.location(rurl)
assert r.url == HTTPBIN + 'get'
assert json.loads(r.text)['headers'].get('Referer') == rurl
# TODO referrer privacy settings
def test_brokenpost():
"""
Test empty POST and redirect after POST
"""
raise SkipTest('PostBin is disabled')
try:
b = BaseBrowser()
# postbin is picky with empty posts. that's good!
r = b.location(POSTBIN, {})
# ensures empty data (but not None) does a POST
assert r.request.method == 'POST'
# ensure we were redirected after submitting a post
assert len(r.url) >= len(POSTBIN)
# send a POST with data
b.location(r.url, {'hello': 'world'})
r = b.location(r.url + '/feed')
assert 'hello' in r.text
assert 'world' in r.text
except HTTPError, e:
if str(e).startswith('503 '):
raise SkipTest('Quota exceeded')
else:
raise
def _getrqbin(b):
"""
Get a RequestBin
"""
# empty POST
r = b.location(REQUESTBIN + 'api/v1/bins', '')
name = json.loads(r.text)['name']
assert name
return name
def test_smartpost():
"""
Checks we use POST or GET depending on the parameters
"""
b = BaseBrowser()
n = _getrqbin(b)
r = b.location(REQUESTBIN + n)
assert 'ok' in r.text
r = b.location(REQUESTBIN + n + '?inspect')
assert 'GET /%s' % n in r.text
r = b.location(REQUESTBIN + n, {'hello': 'world'})
assert 'ok' in r.text
r = b.location(REQUESTBIN + n + '?inspect')
assert 'POST /%s' % n in r.text
assert 'hello' in r.text
assert 'world' in r.text
def test_weboob():
"""
Test the Weboob Profile
"""
class BooBrowser(BaseBrowser):
PROFILE = Weboob('0.0')
b = BooBrowser()
r = b.location(HTTPBIN + 'headers')
assert 'weboob/0.0' in r.text
assert 'identity' in r.text
def test_relative():
"""
Check relative URL / domain handling
"""
b = DomainBrowser()
b.location(HTTPBIN)
b.location('/ip')
assert b.url == HTTPBIN + 'ip'
assert b.absurl('/ip') == HTTPBIN + 'ip'
b.location(REQUESTBIN)
assert b.absurl('/ip') == REQUESTBIN + 'ip'
b.BASEURL = HTTPBIN + 'aaaaaa'
assert b.absurl('/ip') == HTTPBIN + 'ip'
assert b.absurl('ip') == HTTPBIN + 'ip'
assert b.absurl('/ip', False) == REQUESTBIN + 'ip'
b.BASEURL = HTTPBIN + 'aaaaaa/'
assert b.absurl('/') == HTTPBIN
assert b.absurl('/bb') == HTTPBIN + 'bb'
assert b.absurl('') == HTTPBIN + 'aaaaaa/'
assert b.absurl('bb') == HTTPBIN + 'aaaaaa/bb'
def test_changereq():
"""
Test overloading request defaults
"""
b = BaseBrowser()
r = b.location(HTTPBIN + 'headers', method='HEAD')
assert r.text is None
r = b.location(HTTPBIN + 'put', method='PUT', data={'hello': 'world'})
assert 'hello' in r.text
assert 'world' in r.text
r = b.location(HTTPBIN + 'headers', headers={'User-Agent': 'Web Out of Browsers'})
assert 'Web Out of Browsers' in r.text
assert 'Firefox' not in r.text
def test_referrer():
"""
Test automatic referrer setting
"""
b = BaseBrowser()
r = b.location(HTTPBIN + 'get')
assert 'Referer' not in json.loads(r.text)['headers']
r = b.location(HTTPBIN + 'headers')
assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'get'
r = b.location(HTTPBIN + 'headers')
assert 'Referer' not in json.loads(r.text)['headers']
# Force another referrer
r = b.location(HTTPBIN + 'get')
r = b.location(HTTPBIN + 'headers', referrer='http://example.com/')
assert json.loads(r.text)['headers'].get('Referer') == 'http://example.com/'
# Force no referrer
r = b.location(HTTPBIN + 'get')
r = b.location(HTTPBIN + 'headers', referrer=False)
assert 'Referer' not in json.loads(r.text)['headers']
assert b._get_referrer('https://example.com/', 'http://example.com/') is None
def test_cookieparse():
"""
Test cookie parsing and processing
"""
cj = CookieJar()
def bc(data):
"""
build one cookie, and normalize it
"""
cs = Cookies()
cs.parse_response(data)
for c in cs.itervalues():
cj._normalize_cookie(c, 'http://example.com/')
return c
# parse max-age
assert bc('__bwid=58244366; max-age=42; path=/').expires
# security for received cookies
assert cj._can_set(bc('k=v; domain=www.example.com'),
'http://www.example.com/')
assert cj._can_set(bc('k=v; domain=sub.example.com'),
'http://www.example.com/')
assert cj._can_set(bc('k=v; domain=sub.example.com'),
'http://example.com/')
assert cj._can_set(bc('k=v; domain=.example.com'),
'http://example.com/')
assert cj._can_set(bc('k=v; domain=www.example.com'),
'http://example.com/')
assert not cj._can_set(bc('k=v; domain=example.com'),
'http://example.net/')
assert not cj._can_set(bc('k=v; domain=.net'),
'http://example.net/')
assert not cj._can_set(bc('k=v; domain=www.example.net'),
'http://www.example.com/')
assert not cj._can_set(bc('k=v; domain=wwwexample.com'),
'http://example.com/')
assert not cj._can_set(bc('k=v; domain=.example.com'),
'http://wwwexample.com/')
# pattern matching domains
assert not cj._domain_match('example.com', 's.example.com')
assert cj._domain_match('.example.com', 's.example.com')
assert not cj._domain_match('.example.com', 'example.com') # yep.
assert cj._domain_match('s.example.com', 's.example.com')
assert not cj._domain_match('s.example.com', 's2.example.com')
assert cj._domain_match_list(True, 'example.com')
assert not cj._domain_match_list([], 'example.com')
assert cj._domain_match_list(['example.net', 'example.com'], 'example.com')
assert not cj._domain_match_list(['example.net', 'example.org'], 'example.com')
def test_cookiejar():
"""
Test adding, removing, finding cookies to and from the jar
"""
def bc(data):
"""
build one cookie
"""
cs = Cookies()
cs.parse_response(data)
for c in cs.itervalues():
return c
# filtering cookies
cookie0 = bc('j=v; domain=www.example.com; path=/')
cookie1 = bc('k=v1; domain=www.example.com; path=/; secure')
cookie2 = bc('k=v2; domain=.example.com; path=/')
cookie3 = bc('k=v3; domain=www.example.com; path=/lol/cat/')
cookie4 = bc('k=v4; domain=www.example.com; path=/lol/')
cj = CookieJar()
cj.set(cookie0)
cj.set(cookie1)
cj.set(cookie2)
cj.set(cookie3)
cj.set(cookie4)
assert len(cj.all()) == 5 # all cookies
assert len(cj.all(path='/')) == 3 # all cookies except the ones with deep paths
assert len(cj.all(name='k')) == 4 # this excludes cookie0
assert len(cj.all(domain='example.com')) == 0 # yep
assert len(cj.all(domain='s.example.com')) == 1 # cookie2
assert len(cj.all(domain='.example.com')) == 1 # cookie2 (exact match)
assert len(cj.all(domain='www.example.com')) == 5 # all cookies
assert len(cj.all(domain='www.example.com', path="/lol/")) == 4 # all + cookie4
assert len(cj.all(domain='www.example.com', path="/lol/cat")) == 4 # all + cookie4
assert len(cj.all(domain='www.example.com', path="/lol/cat/")) == 5 # all + cookie4 + cookie3
assert len(cj.all(secure=True)) == 1 # cookie1
assert len(cj.all(secure=False)) == 4 # all except cookie1
assert cj.get(domain='www.example.com', path="/lol/") is cookie4
assert cj.get(domain='www.example.com', path="/lol/cat/") is cookie3
assert cj.get(domain='www.example.com', path="/") is cookie1
assert cj.get(name='j', domain='www.example.com', path="/") is cookie0
assert cj.get(name='k', domain='www.example.com', path="/") is cookie1
assert cj.get(name='k', domain='s.example.com', path="/") is cookie2
assert cj.get(name='k', domain='www.example.com', path="/aaa") is cookie1
assert cj.get(domain='www.example.com', path='/') is cookie1
assert cj.get(domain='www.example.com', path='/', secure=False) is cookie0
assert cj.get(domain='www.example.com', path='/', secure=True) is cookie1
# this is just not API choice, but how browsers act
assert cj.for_request('http://www.example.com/') == {'k': 'v2', 'j': 'v'}
assert cj.for_request('https://www.example.com/') == {'k': 'v1', 'j': 'v'}
assert cj.for_request('http://www.example.com/lol/') == {'k': 'v4', 'j': 'v'}
assert cj.for_request('http://s.example.com/lol/') == {'k': 'v2'}
assert cj.for_request('http://example.com/lol/') == {}
# remove/add/replace
assert cj.remove(cookie1) is True
assert cj.get(secure=True) is None
cj.set(cookie1)
assert cj.get(secure=True) is cookie1
cookie5 = bc('k=w; domain=www.example.com; path=/; secure')
cj.set(cookie5)
assert cj.get(secure=True) is cookie5
assert len(cj.all(secure=True)) == 1
# not the same cookie, but the same identifiers
assert cj.remove(cookie1) is True
cj.clear()
cookie6 = bc('e1=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 1970 00:00:01 GMT;')
cookie7 = bc('e2=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 2010 00:00:01 GMT;')
now = datetime(2000, 01, 01)
cj.set(cookie0)
cj.set(cookie6)
cj.set(cookie7)
assert cj.for_request('http://www.example.com/', now) == {'e2': '1', 'j': 'v'}
assert cj.for_request('http://www.example.com/', datetime(2020, 01, 01)) == {'j': 'v'}
assert len(cj.all()) == 3
cj.flush(now)
assert len(cj.all()) == 2
assert cj.remove(cookie6) is False # already removed
cj.flush(now, session=True)
assert len(cj.all()) == 1
def test_buildcookie():
"""
Test easy cookie building
"""
cj = CookieJar()
c = cj.build('kk', 'vv', 'http://example.com/')
assert c.domain == 'example.com'
assert not c.secure
assert c.path == '/'
c = cj.build('kk', 'vv', 'http://example.com/', path='/plop/', wildcard=True)
assert c.domain == '.example.com'
assert c.path == '/plop/'
c = cj.build('kk', 'vv', 'http://example.com/plop/')
assert c.path == '/plop/'
c = cj.build('kk', 'vv', 'http://example.com/plop/plap')
assert c.path == '/plop/'
c = cj.build('kk', 'vv', 'http://example.com/plop/?http://example.net/plip/')
assert c.path == '/plop/'
assert c.domain == 'example.com'
c = cj.build('kk', 'vv', 'http://example.com/plop/plap', path='/')
assert c.path == '/'
c = cj.build('kk', 'vv', 'https://example.com/')
assert c.domain == 'example.com'
assert c.secure
# check the cookie works
c.name = 'k'
c.value = 'v'
cj.set(c)
assert cj.for_request('https://example.com/') == {'k': 'v'}
assert cj.for_request('http://example.com/') == {}
def test_cookienav():
"""
Test browsing while getting new cookies
"""
b = BaseBrowser()
r = b.location(HTTPBIN + 'cookies')
assert len(json.loads(r.text)['cookies']) == 0
r = b.location(HTTPBIN + 'cookies/set/hello/world')
assert len(json.loads(r.text)['cookies']) == 1
assert json.loads(r.text)['cookies']['hello'] == 'world'
r = b.location(HTTPBIN + 'cookies/set/hello2/world2')
assert len(json.loads(r.text)['cookies']) == 2
assert json.loads(r.text)['cookies']['hello2'] == 'world2'
r = b.location(REQUESTBIN)
assert 'session' in r.cookies # requestbin should give this by default
assert 'hello' not in r.cookies # we didn't send the wrong cookie
# return to httpbin, check we didn't give the wrong cookie
r = b.location(HTTPBIN + 'cookies')
assert 'session' not in json.loads(r.text)['cookies']
# override cookies temporarily
r = b.location(HTTPBIN + 'cookies', cookies={'bla': 'bli'})
assert len(json.loads(r.text)['cookies']) == 1
assert json.loads(r.text)['cookies']['bla'] == 'bli'
# reload, the "fake" cookie should not be there
r = b.location(HTTPBIN + 'cookies')
assert len(json.loads(r.text)['cookies']) == 2
assert 'bla' not in json.loads(r.text)['cookies']
def test_cookieredirect():
"""
Test cookie redirection security
"""
rurl = choice(REDIRECTS2)
b = BaseBrowser()
r = b.location(HTTPBIN + 'cookies')
assert len(json.loads(r.text)['cookies']) == 0
# add a cookie to the redirection service domain (not the target!)
cookie = b.cookies.build('k', 'v1', rurl)
b.cookies.set(cookie)
r = b.location(rurl)
assert r.url == HTTPBIN + 'cookies'
# the cookie was not forwarded; it's for another domain
# this is important for security reasons,
# and because python-requests tries to do it by default!
assert len(json.loads(r.text)['cookies']) == 0
# add a cookie for the target
cookie = b.cookies.build('k', 'v2', HTTPBIN)
b.cookies.set(cookie)
r = b.location(rurl)
assert r.url == HTTPBIN + 'cookies'
assert len(json.loads(r.text)['cookies']) == 1
assert json.loads(r.text)['cookies']['k'] == 'v2'
# check all cookies sent in the request chain
assert r.cookies == {'k': 'v2'}
assert r.history[0].cookies['k'] == 'v1' # some services add other cookies
def test_cookie_srv1():
"""
Test cookie in real conditions (service 1)
"""
class TestBrowser(DomainBrowser):
BASEURL = 'http://www.mria-arim.ca/'
b = TestBrowser()
b.location('testCookies.asp')
# TODO this is also a good place to test form parsing/submission
b.location('testCookies.asp', {'makeMe': 'Create Cookie'})
r = b.location('testCookies.asp', {'testMe': 'Test Browser'})
assert 'Your Browser accepts cookies' in r.text
def test_cookie_srv2():
"""
Test cookie in real conditions (service 2)
"""
def randtext():
return ''.join(choice(string.digits + string.letters) for _ in xrange(32))
class TestBrowser(DomainBrowser):
BASEURL = 'http://www.html-kit.com/tools/cookietester/'
def cookienum(self):
return int(re.search('Number of cookies received: (\d+)',
self.response.text).groups()[0])
def mypost(self, **data):
return self.location('', data)
b = TestBrowser()
b.home()
assert b.cookienum() == 0
r1 = randtext()
r1v = randtext()
# TODO this is also a good place to test form parsing/submission
# get a new cookie
r = b.mypost(cn=r1, cv=r1v)
assert b.cookienum() == 1
assert r1 in r.text
assert r1v in r.text
# cookie deletion
r = b.mypost(cr=r1)
assert b.cookienum() == 0
assert r1 not in r.text
assert r1v not in r.text
# om nom nom
b.mypost(cn=randtext(), cv=randtext())
b.mypost(cn=randtext(), cv=randtext())
b.mypost(cn=randtext(), cv=randtext())
b.mypost(cn=randtext(), cv=randtext())
assert b.cookienum() == 4