From db304b955cd1a56efb886f8db3fbce0a059666ae Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Wed, 11 Apr 2012 08:50:07 +0200 Subject: [PATCH] browser2: Handle cookie expiration, session cookies Every related method accepts a "now" parameter. If provided, it will be used instead of the system time. --- weboob/tools/browser2/cookiejar.py | 36 ++++++++++++++++++++++++++---- weboob/tools/browser2/test.py | 24 ++++++++++++++++++-- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/weboob/tools/browser2/cookiejar.py b/weboob/tools/browser2/cookiejar.py index 3a8cbf9e..97a3db26 100644 --- a/weboob/tools/browser2/cookiejar.py +++ b/weboob/tools/browser2/cookiejar.py @@ -161,7 +161,7 @@ class CookieJar(object): return False - def _normalize_cookie(self, cookie, url): + def _normalize_cookie(self, cookie, url, now=None): """ Update a cookie we got from the response. The goal is to have data relevant for use in future requests. @@ -169,6 +169,10 @@ class CookieJar(object): * Sets path if there is not one. * Set Expires from Max-Age. We need the expires to have an absolute expiration date. * Force the Secure flag if required. (see SECURE_DOMAINS) + + :type cookie: :class:`cookies.Cookie` + :type url: str + :type now: datetime """ url = urlparse.urlparse(url) if cookie.domain is None: @@ -176,7 +180,9 @@ class CookieJar(object): if cookie.path is None: cookie.path = '/' if cookie.max_age is not None: - cookie.expires = datetime.now() + timedelta(seconds=cookie.max_age) + if now is None: + now = datetime.now() + cookie.expires = now + timedelta(seconds=cookie.max_age) if url.scheme == 'https' \ and self._match_domain_list(self.SECURE_DOMAINS, cookie.domain): cookie.secure = True @@ -194,24 +200,46 @@ class CookieJar(object): if self._can_set(c, response.url): self.set(c) - def for_request(self, url): + def for_request(self, url, now=None): """ Get a key/value dictionnary of cookies for a given request URL. :type url: str + :type now: datetime :rtype: dict """ url = urlparse.urlparse(url) + if now is None: + now = datetime.now() # we want insecure cookies in https too! secure = None if url.scheme == 'https' else False + cdict = dict() # get sorted cookies cookies = self.all(domain=url.hostname, path=url.path, secure=secure) for cookie in cookies: + # only use session cookies and cookies with future expirations + if cookie.expires is None or cookie.expires > now: # update only if not set, since first cookies are "better" - cdict.setdefault(cookie.name, cookie.value) + cdict.setdefault(cookie.name, cookie.value) return cdict + def flush(self, now=None, session=False): + """ + Remove expired cookies. If session is True, also remove all session cookies. + + :type now: datetime + :type session: bool + """ + # we need a list copy since we remove from the iterable + for cookie in list(self.iter()): + # remove session cookies if requested + if cookie.expires is None and session: + self.remove(cookie) + # remove non-session cookies if expired before now + if cookie.expires is not None and cookie.expires < now: + self.remove(cookie) + def set(self, cookie): """ Add or replace a Cookie in the jar. diff --git a/weboob/tools/browser2/test.py b/weboob/tools/browser2/test.py index 1d5c1ebb..415352ec 100644 --- a/weboob/tools/browser2/test.py +++ b/weboob/tools/browser2/test.py @@ -19,7 +19,9 @@ from __future__ import absolute_import -import requests +from datetime import datetime + +from requests import HTTPError from nose.plugins.skip import SkipTest from .browser import BaseBrowser, DomainBrowser, Weboob @@ -74,7 +76,7 @@ def test_brokenpost(): r = b.location(r.url + '/feed') assert 'hello' in r.text assert 'world' in r.text - except requests.HTTPError, e: + except HTTPError, e: if str(e).startswith('503 '): raise SkipTest('Quota exceeded') else: @@ -296,3 +298,21 @@ def test_cookiejar(): assert len(cj.all(secure=True)) == 1 # not the same cookie, but the same identifiers assert cj.remove(cookie1) is True + + cj.clear() + cookie6 = bc('e1=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 1970 00:00:01 GMT;') + cookie7 = bc('e2=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 2010 00:00:01 GMT;') + now = datetime(2000, 01, 01) + cj.set(cookie0) + cj.set(cookie6) + cj.set(cookie7) + + assert cj.for_request('http://www.example.com/', now) == {'e2': '1', 'j': 'v'} + assert cj.for_request('http://www.example.com/', datetime(2020, 01, 01)) == {'j': 'v'} + + assert len(cj.all()) == 3 + cj.flush(now) + assert len(cj.all()) == 2 + assert cj.remove(cookie6) is False # already removed + cj.flush(now, session=True) + assert len(cj.all()) == 1