# -*- coding: utf-8 -*- # Copyright(C) 2012 Laurent Bachelier # # This file is part of weboob. # # weboob is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # weboob is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . from __future__ import absolute_import from datetime import datetime from requests import HTTPError from nose.plugins.skip import SkipTest from .browser import BaseBrowser, DomainBrowser, Weboob from .cookiejar import CookieJar from .cookies import Cookies from weboob.tools.json import json # Those services can be run locally. More or less. HTTPBIN = 'http://httpbin.org/' # https://github.com/kennethreitz/httpbin POSTBIN = 'http://www.postbin.org/' # https://github.com/progrium/postbin REQUESTBIN = 'http://requestb.in/' # https://github.com/progrium/requestbin def test_base(): b = BaseBrowser() r = b.location(HTTPBIN + 'headers') assert isinstance(r.text, unicode) assert 'Firefox' in r.text assert 'python' not in r.text assert 'identity' not in r.text assert b.url == HTTPBIN + 'headers' r = b.location(HTTPBIN + 'gzip') assert 'Firefox' in r.text def test_redirects(): """ Check redirects are followed """ b = BaseBrowser() b.location(HTTPBIN + 'redirect/1') assert b.url == HTTPBIN + 'get' r = b.location(HTTPBIN + 'redirect/1') assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1' assert r.url == HTTPBIN + 'get' # Normal redirect chain b.url = None r = b.location(HTTPBIN + 'redirect/4') assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1' assert len(r.history) == 4 assert r.history[3].request.url == HTTPBIN + 'redirect/1' assert r.history[3].request.headers.get('Referer') == HTTPBIN + 'redirect/2' assert r.history[2].request.url == HTTPBIN + 'redirect/2' assert r.history[2].request.headers.get('Referer') == HTTPBIN + 'redirect/3' assert r.history[1].request.url == HTTPBIN + 'redirect/3' assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/4' assert r.history[0].request.url == HTTPBIN + 'redirect/4' assert r.history[0].request.headers.get('Referer') == None assert r.url == HTTPBIN + 'get' # Disable all referers r = b.location(HTTPBIN + 'redirect/2', referrer=False) assert json.loads(r.text)['headers'].get('Referer') == None assert len(r.history) == 2 assert r.history[1].request.headers.get('Referer') == None assert r.history[0].request.headers.get('Referer') == None assert r.url == HTTPBIN + 'get' # Only overrides first referer r = b.location(HTTPBIN + 'redirect/2', referrer='http://example.com/') assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1' assert len(r.history) == 2 assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/2' assert r.history[0].request.headers.get('Referer') == 'http://example.com/' assert r.url == HTTPBIN + 'get' # Don't follow r = b.location(HTTPBIN + 'redirect/2', allow_redirects=False) assert len(r.history) == 0 assert r.url == HTTPBIN + 'redirect/2' assert r.status_code == 302 def test_brokenpost(): """ Tests _fix_redirect() """ raise SkipTest('PostBin is disabled') try: b = BaseBrowser() # postbin is picky with empty posts. that's good! r = b.location(POSTBIN, {}) # ensures empty data (but not None) does a POST assert r.request.method == 'POST' # ensure we were redirected after submitting a post assert len(r.url) >= len(POSTBIN) # send a POST with data b.location(r.url, {'hello': 'world'}) r = b.location(r.url + '/feed') assert 'hello' in r.text assert 'world' in r.text except HTTPError, e: if str(e).startswith('503 '): raise SkipTest('Quota exceeded') else: raise def _getrqbin(b): """ Get a RequestBin """ # empty POST r = b.location(REQUESTBIN + 'api/v1/bins', '') name = json.loads(r.text)['name'] assert name return name def test_smartpost(): """ Checks we use POST or GET depending on the parameters """ b = BaseBrowser() n = _getrqbin(b) r = b.location(REQUESTBIN + n) assert 'ok' in r.text r = b.location(REQUESTBIN + n + '?inspect') assert 'GET /%s' % n in r.text r = b.location(REQUESTBIN + n, {'hello': 'world'}) assert 'ok' in r.text r = b.location(REQUESTBIN + n + '?inspect') assert 'POST /%s' % n in r.text assert 'hello' in r.text assert 'world' in r.text def test_weboob(): """ Test the Weboob Profile """ class BooBrowser(BaseBrowser): PROFILE = Weboob('0.0') b = BooBrowser() r = b.location(HTTPBIN + 'headers') assert 'weboob/0.0' in r.text assert 'identity' in r.text def test_relative(): """ Check relative URL / domain handling """ b = DomainBrowser() b.location(HTTPBIN) b.location('/ip') assert b.url == HTTPBIN + 'ip' assert b.absurl('/ip') == HTTPBIN + 'ip' b.location(REQUESTBIN) assert b.absurl('/ip') == REQUESTBIN + 'ip' b.BASEURL = HTTPBIN + 'aaaaaa' assert b.absurl('/ip') == HTTPBIN + 'ip' assert b.absurl('ip') == HTTPBIN + 'ip' assert b.absurl('/ip', False) == REQUESTBIN + 'ip' b.BASEURL = HTTPBIN + 'aaaaaa/' assert b.absurl('/') == HTTPBIN assert b.absurl('/bb') == HTTPBIN + 'bb' assert b.absurl('') == HTTPBIN + 'aaaaaa/' assert b.absurl('bb') == HTTPBIN + 'aaaaaa/bb' def test_changereq(): """ Test overloading request defaults """ b = BaseBrowser() r = b.location(HTTPBIN + 'headers', method='HEAD') assert r.text is None r = b.location(HTTPBIN + 'put', method='PUT', data={'hello': 'world'}) assert 'hello' in r.text assert 'world' in r.text r = b.location(HTTPBIN + 'headers', headers={'User-Agent': 'Web Out of Browsers'}) assert 'Web Out of Browsers' in r.text assert 'Firefox' not in r.text def test_referrer(): """ Test automatic referrer setting """ b = BaseBrowser() r = b.location(HTTPBIN + 'get') assert 'Referer' not in json.loads(r.text)['headers'] r = b.location(HTTPBIN + 'headers') assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'get' r = b.location(HTTPBIN + 'headers') assert 'Referer' not in json.loads(r.text)['headers'] # Force another referrer r = b.location(HTTPBIN + 'get') r = b.location(HTTPBIN + 'headers', referrer='http://example.com/') assert json.loads(r.text)['headers'].get('Referer') == 'http://example.com/' # Force no referrer r = b.location(HTTPBIN + 'get') r = b.location(HTTPBIN + 'headers', referrer=False) assert 'Referer' not in json.loads(r.text)['headers'] assert b._get_referrer('https://example.com/', 'http://example.com/') is None def test_cookieparse(): cj = CookieJar() def bc(data): """ build one cookie, and normalize it """ cs = Cookies() cs.parse_response(data) for c in cs.itervalues(): cj._normalize_cookie(c, 'http://example.com/') return c # parse max-age assert bc('__bwid=58244366; max-age=42; path=/').expires # security for received cookies assert cj._can_set(bc('k=v; domain=www.example.com'), 'http://www.example.com/') assert cj._can_set(bc('k=v; domain=sub.example.com'), 'http://www.example.com/') assert cj._can_set(bc('k=v; domain=sub.example.com'), 'http://example.com/') assert cj._can_set(bc('k=v; domain=.example.com'), 'http://example.com/') assert cj._can_set(bc('k=v; domain=www.example.com'), 'http://example.com/') assert not cj._can_set(bc('k=v; domain=example.com'), 'http://example.net/') assert not cj._can_set(bc('k=v; domain=.net'), 'http://example.net/') assert not cj._can_set(bc('k=v; domain=www.example.net'), 'http://www.example.com/') assert not cj._can_set(bc('k=v; domain=wwwexample.com'), 'http://example.com/') assert not cj._can_set(bc('k=v; domain=.example.com'), 'http://wwwexample.com/') # pattern matching domains assert not cj._domain_match('example.com', 's.example.com') assert cj._domain_match('.example.com', 's.example.com') assert not cj._domain_match('.example.com', 'example.com') # yep. assert cj._domain_match('s.example.com', 's.example.com') assert not cj._domain_match('s.example.com', 's2.example.com') assert cj._domain_match_list(True, 'example.com') assert not cj._domain_match_list([], 'example.com') assert cj._domain_match_list(['example.net', 'example.com'], 'example.com') assert not cj._domain_match_list(['example.net', 'example.org'], 'example.com') def test_cookiejar(): def bc(data): """ build one cookie """ cs = Cookies() cs.parse_response(data) for c in cs.itervalues(): return c # filtering cookies cookie0 = bc('j=v; domain=www.example.com; path=/') cookie1 = bc('k=v1; domain=www.example.com; path=/; secure') cookie2 = bc('k=v2; domain=.example.com; path=/') cookie3 = bc('k=v3; domain=www.example.com; path=/lol/cat/') cookie4 = bc('k=v4; domain=www.example.com; path=/lol/') cj = CookieJar() cj.set(cookie0) cj.set(cookie1) cj.set(cookie2) cj.set(cookie3) cj.set(cookie4) assert len(cj.all()) == 5 # all cookies assert len(cj.all(path='/')) == 3 # all cookies except the ones with deep paths assert len(cj.all(name='k')) == 4 # this excludes cookie0 assert len(cj.all(domain='example.com')) == 0 # yep assert len(cj.all(domain='s.example.com')) == 1 # cookie2 assert len(cj.all(domain='.example.com')) == 1 # cookie2 (exact match) assert len(cj.all(domain='www.example.com')) == 5 # all cookies assert len(cj.all(domain='www.example.com', path="/lol/")) == 4 # all + cookie4 assert len(cj.all(domain='www.example.com', path="/lol/cat")) == 4 # all + cookie4 assert len(cj.all(domain='www.example.com', path="/lol/cat/")) == 5 # all + cookie4 + cookie3 assert len(cj.all(secure=True)) == 1 # cookie1 assert len(cj.all(secure=False)) == 4 # all except cookie1 assert cj.get(domain='www.example.com', path="/lol/") is cookie4 assert cj.get(domain='www.example.com', path="/lol/cat/") is cookie3 assert cj.get(domain='www.example.com', path="/") is cookie1 assert cj.get(name='j', domain='www.example.com', path="/") is cookie0 assert cj.get(name='k', domain='www.example.com', path="/") is cookie1 assert cj.get(name='k', domain='s.example.com', path="/") is cookie2 assert cj.get(name='k', domain='www.example.com', path="/aaa") is cookie1 assert cj.get(domain='www.example.com', path='/') is cookie1 assert cj.get(domain='www.example.com', path='/', secure=False) is cookie0 assert cj.get(domain='www.example.com', path='/', secure=True) is cookie1 # this is just not API choice, but how browsers act assert cj.for_request('http://www.example.com/') == {'k': 'v2', 'j': 'v'} assert cj.for_request('https://www.example.com/') == {'k': 'v1', 'j': 'v'} assert cj.for_request('http://www.example.com/lol/') == {'k': 'v4', 'j': 'v'} assert cj.for_request('http://s.example.com/lol/') == {'k': 'v2'} assert cj.for_request('http://example.com/lol/') == {} # remove/add/replace assert cj.remove(cookie1) is True assert cj.get(secure=True) is None cj.set(cookie1) assert cj.get(secure=True) is cookie1 cookie5 = bc('k=w; domain=www.example.com; path=/; secure') cj.set(cookie5) assert cj.get(secure=True) is cookie5 assert len(cj.all(secure=True)) == 1 # not the same cookie, but the same identifiers assert cj.remove(cookie1) is True cj.clear() cookie6 = bc('e1=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 1970 00:00:01 GMT;') cookie7 = bc('e2=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 2010 00:00:01 GMT;') now = datetime(2000, 01, 01) cj.set(cookie0) cj.set(cookie6) cj.set(cookie7) assert cj.for_request('http://www.example.com/', now) == {'e2': '1', 'j': 'v'} assert cj.for_request('http://www.example.com/', datetime(2020, 01, 01)) == {'j': 'v'} assert len(cj.all()) == 3 cj.flush(now) assert len(cj.all()) == 2 assert cj.remove(cookie6) is False # already removed cj.flush(now, session=True) assert len(cj.all()) == 1 def test_buildcookie(): cj = CookieJar() """ Test cookie building """ c = cj.build('kk', 'vv', 'http://example.com/') assert c.domain == 'example.com' assert not c.secure assert c.path == '/' c = cj.build('kk', 'vv', 'http://example.com/', path='/plop/', wildcard=True) assert c.domain == '.example.com' assert c.path == '/plop/' c = cj.build('kk', 'vv', 'http://example.com/plop/') assert c.path == '/plop/' c = cj.build('kk', 'vv', 'http://example.com/plop/plap') assert c.path == '/plop/' c = cj.build('kk', 'vv', 'http://example.com/plop/?http://example.net/plip/') assert c.path == '/plop/' assert c.domain == 'example.com' c = cj.build('kk', 'vv', 'http://example.com/plop/plap', path='/') assert c.path == '/' c = cj.build('kk', 'vv', 'https://example.com/') assert c.domain == 'example.com' assert c.secure # check the cookie works c.name = 'k' c.value = 'v' cj.set(c) assert cj.for_request('https://example.com/') == {'k': 'v'} assert cj.for_request('http://example.com/') == {} def test_cookienav(): """ Test browsing while getting new cookies """ b = BaseBrowser() r = b.location(HTTPBIN + 'cookies') assert len(json.loads(r.text)['cookies']) == 0 r = b.location(HTTPBIN + 'cookies/set/hello/world') assert len(json.loads(r.text)['cookies']) == 1 assert json.loads(r.text)['cookies']['hello'] == 'world' r = b.location(HTTPBIN + 'cookies/set/hello2/world2') assert len(json.loads(r.text)['cookies']) == 2 assert json.loads(r.text)['cookies']['hello2'] == 'world2' r = b.location(REQUESTBIN) assert 'session' in r.cookies # requestbin should give this by default assert 'hello' not in r.cookies # we didn't send the wrong cookie # return to httpbin, check we didn't give the wrong cookie r = b.location(HTTPBIN + 'cookies') assert 'session' not in json.loads(r.text)['cookies'] # override cookies temporarily r = b.location(HTTPBIN + 'cookies', cookies={'bla': 'bli'}) assert len(json.loads(r.text)['cookies']) == 1 assert json.loads(r.text)['cookies']['bla'] == 'bli' # reload, the "fake" cookie should not be there r = b.location(HTTPBIN + 'cookies') assert len(json.loads(r.text)['cookies']) == 2 assert 'bla' not in json.loads(r.text)['cookies']