From 2d70d11822ca8b4fe01291003fc0a0bf8346211a Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Mon, 16 Apr 2012 03:40:37 +0200 Subject: [PATCH] browser2: Handle cookies in the request/response cycle Unlike the one by python-requests, it is secure, and generally behaves like a real browser. Basic test added. --- weboob/tools/browser2/browser.py | 37 ++++++++++++++++++++++++++------ weboob/tools/browser2/test.py | 23 ++++++++++++++++++++ 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/weboob/tools/browser2/browser.py b/weboob/tools/browser2/browser.py index 6a7c2e9b..b01ee83e 100644 --- a/weboob/tools/browser2/browser.py +++ b/weboob/tools/browser2/browser.py @@ -25,6 +25,8 @@ import requests from requests.status_codes import codes from copy import deepcopy +from .cookiejar import CookieJar + # TODO define __all__ @@ -116,9 +118,13 @@ class BaseBrowser(object): def __init__(self): profile = self.PROFILE self._setup_session(profile) + self._setup_cookies() self.url = None self.response = None + def _setup_cookies(self): + self.cookies = CookieJar() + def _setup_session(self, profile): """ Set up a python-requests session for our usage. @@ -139,7 +145,10 @@ class BaseBrowser(object): """ Follow redirects *properly*. * Mimic what browsers do on 302 - * TODO Handle cookies securely + * Handle cookies securely + + Returns a new Response object with the history of previous + responses in it. :type response: :class:`requests.Response` :type orig_args: dict @@ -252,9 +261,14 @@ class BaseBrowser(object): allow_redirects=True, referrer=None, **kwargs): """ - Makes a GET request, or a POST if data is not None, unless a `method` - is explicitly provided. - An empty `data` (not None) *will* make a post. + Make an HTTP request like a browser does: + * follow redirects (unless disabled) + * handle cookies + * provide referrers (unless disabled) + + Unless a `method` is explicitly provided, it makes a GET request, + or a POST if data is not None, + An empty `data` (not None, like '' or {}) *will* make a POST. It is a wrapper around session.request(). All session.request() options are available. @@ -295,7 +309,7 @@ class BaseBrowser(object): kwargs.setdefault('headers', {}).setdefault('Content-Length', '0') # Use our own redirection handling - # python-requests's sucks to much to be allowed. + # python-requests's one sucks too much to be allowed. kwargs.setdefault('config', {}).setdefault('strict_mode', False) kwargs['allow_redirects'] = False @@ -308,13 +322,24 @@ class BaseBrowser(object): if self.TIMEOUT: kwargs.setdefault('timeout', self.TIMEOUT) + cookies = kwargs.pop('cookies', None) + # get the relevant cookies for the URL + # from the jar (unless they are overriden) + if cookies is None: + cookies = self.cookies.for_request(url) + kwargs['cookies'] = cookies + # call python-requests response = self.session.request(method, url, **kwargs) + + # read cookies + self.cookies.from_response(response) + if allow_redirects: response = self.follow_redirects(response, orig_args) # erase all cookies, python-requests does not handle them securely - self.session.cookies = {} + self.session.cookies.clear() return response diff --git a/weboob/tools/browser2/test.py b/weboob/tools/browser2/test.py index 7169486b..1deadc7c 100644 --- a/weboob/tools/browser2/test.py +++ b/weboob/tools/browser2/test.py @@ -367,3 +367,26 @@ def test_cookiejar(): assert cj.remove(cookie6) is False # already removed cj.flush(now, session=True) assert len(cj.all()) == 1 + + +def test_cookienav(): + """ + Test browsing while getting new cookies + """ + b = BaseBrowser() + r = b.location(HTTPBIN + 'cookies') + assert len(json.loads(r.text)['cookies']) == 0 + + r = b.location(HTTPBIN + 'cookies/set/hello/world') + assert len(json.loads(r.text)['cookies']) == 1 + assert json.loads(r.text)['cookies']['hello'] == 'world' + r = b.location(HTTPBIN + 'cookies/set/hello2/world2') + assert len(json.loads(r.text)['cookies']) == 2 + assert json.loads(r.text)['cookies']['hello2'] == 'world2' + + r = b.location(REQUESTBIN) + assert 'session' in r.cookies # requestbin should give this by default + assert 'hello' not in r.cookies # we didn't send the wrong cookie + # return to httpbin, check we didn't give the wrong cookie + r = b.location(HTTPBIN + 'cookies') + assert 'session' not in json.loads(r.text)['cookies']