diff --git a/weboob/tools/browser2/browser.py b/weboob/tools/browser2/browser.py index 4b626538..d3585235 100644 --- a/weboob/tools/browser2/browser.py +++ b/weboob/tools/browser2/browser.py @@ -46,6 +46,7 @@ class Profile(object): Warning: Do not enable lzma, bzip or bzip2, sdch encodings as python-requests does not support it yet. + Supported as of 2.2: gzip, deflate, compress. In doubt, do not change the default Accept-Encoding header of python-requests. """ @@ -90,7 +91,7 @@ class Firefox(Profile): 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20100101 Firefox/10.0.3', 'DNT': '1'} # It also has "Connection: Keep-Alive", that should only be added this way: - #session.config['keep_alive'] = True + #FIXME session.config['keep_alive'] = True class Wget(Profile): @@ -108,7 +109,7 @@ class Wget(Profile): session.headers.update({ 'Accept': '*/*', 'User-Agent': 'Wget/%s' % self.version}) - #session.config['keep_alive'] = True + #FIXME session.config['keep_alive'] = True class BaseBrowser(object): @@ -163,7 +164,6 @@ class BaseBrowser(object): """ Make an HTTP request like a browser does: * follow redirects (unless disabled) - * handle cookies * provide referrers (unless disabled) Unless a `method` is explicitly provided, it makes a GET request, diff --git a/weboob/tools/browser2/cookiejar.py b/weboob/tools/browser2/cookiejar.py deleted file mode 100644 index 9387108e..00000000 --- a/weboob/tools/browser2/cookiejar.py +++ /dev/null @@ -1,431 +0,0 @@ -# TODO declare __all__ -# TODO support logging - -from urlparse import urlparse -from datetime import datetime, timedelta -import posixpath - -from .cookies import Cookie, Cookies, strip_spaces_and_quotes, Definitions - - -def valid_domain(domain): - """ - Like cookies.valid_domain, but allows leading periods. - Because it is *very* common and useful for us. - """ - domain.encode('ascii') - if domain and domain[0] == '.': - domain = domain[1:] - if domain and domain[0] in '"': - return False - if Definitions.DOMAIN_RE.match(domain): - return True - return False - - -def parse_domain(value): - """ - Like cookies.parse_domain, but allows leading periods. - Because it is *very* common and useful for us. - """ - value = strip_spaces_and_quotes(value) - if value: - assert valid_domain(value) - return value - -# this is ok because we are using our own copy of the lib -# TODO push a better way upstream -Cookie.attribute_parsers['domain'] = parse_domain -Cookie.attribute_validators['domain'] = valid_domain - - -class CookiePolicy(object): - """ - Defines how cookies are accepted, and what to do with them. - """ - - ACCEPT_DOMAINS = [] - """ - Domains where to accept cookies, even when we should not. - Add a "." before a domain to accept subdomains. - If True, accept all cookies (a bit insecure). - ACCEPT_DOMAINS has higher priority over REJECT_DOMAINS. - - Disabling third-party cookies on most browsers acts like [], enabling them - acts like True. Since it is a very common browser option, we use the most - secure and privacy-aware method by default. - """ - - REJECT_DOMAINS = [] - """ - Domains where to reject cookies, even when we should not. - Add a "." before a domain to reject subdomains. - If True, reject all cookies. - REJECT_DOMAINS has lower priority over ACCEPT_DOMAINS. - """ - - SECURE_DOMAINS = True - """ - When we get a cookie through an secure connection, mark it as secure - (not to be sent on insecure channels) if the server did not tell us to. - If True, do it automatically for all domains. Alternatively, you can put - a list of domains, like ACCEPT_DOMAINS or REJECT_DOMAINS. - If False, never do it (but still accept secure cookies as they are). - - NoScript for Firefox does this, either by automated guesses or forced from a list. - """ - - INSECURE_MATCHING = True - """ - Do sloppy matching to mimic what browsers do. - This is only for setting cookies; it should be relatively safe in Weboob. - """ - - def domain_match(self, pattern, domain): - """ - Checks a domain matches a domain pattern. - Patterns can be either the exact domain, or a wildcard (starting with a dot). - - example.com matches example.com only - .example.com matches *.example.com (but not example.com) - - :param pattern: str - :param domain: str - :rytpe: bool - """ - if pattern.startswith('.'): - return domain.endswith(pattern) - return domain == pattern - - def domain_match_list(self, patterns, domain): - """ - Checks domains match, from a list of patters. - If the list of patterns is True, it always matches. - - :param pattern: list or True - :param domain: str - :rytpe: bool - """ - if patterns is True: - return True - for pattern in patterns: - if self.domain_match(pattern, domain): - return True - return False - - def can_set(self, cookie, url): - """ - Checks an URL can set a particular cookie. - See ACCEPT_DOMAINS, REJECT_DOMAINS to set exceptions. - - The cookie must have a domain already set, you can - use normalize_cookie() for that. - - :param cookie: The cookie the server set - :type cookie: Cookie - :param url: URL of the response - :type url: str - - :rtype: bool - """ - url = urlparse(url) - domain = url.hostname - - # Accept/reject overrides - if self.domain_match_list(self.ACCEPT_DOMAINS, domain): - return True - if self.domain_match_list(self.REJECT_DOMAINS, domain): - return False - - # check path - if not url.path.startswith(cookie.path): - return False - - # check domain (secure & simple) - if cookie.domain.startswith('.'): - if cookie.domain.endswith(domain) or '.%s' % domain == cookie.domain: - return True - elif domain == cookie.domain: - return True - - # whatever.example.com should be able to set .example.com - # Unbelievably stupid, but widely used. - # - # Our method is not ideal, as it isn't very secure for some TLDs. - # A solution could be to use tldextract. - if self.INSECURE_MATCHING: - if domain.split('.')[-2:] == cookie.domain.split('.')[-2:]: - return True - - return False - - def normalize_cookie(self, cookie, url, now=None): - """ - Update a cookie we got from the response. - The goal is to have data relevant for use in future requests. - * Sets domain if there is not one. - * Sets path if there is not one. - * Set Expires from Max-Age. We need the expires to have an absolute expiration date. - * Force the Secure flag if required. (see SECURE_DOMAINS) - - :type cookie: :class:`cookies.Cookie` - :type url: str - :type now: datetime - """ - url = urlparse(url) - if cookie.domain is None: - cookie.domain = url.hostname - if cookie.path is None: - cookie.path = '/' - if cookie.max_age is not None: - if now is None: - now = datetime.now() - cookie.expires = now + timedelta(seconds=cookie.max_age) - if url.scheme == 'https' \ - and self.domain_match_list(self.SECURE_DOMAINS, cookie.domain): - cookie.secure = True - - -class CookieJar(object): - """ - Manage Cookies like a real browser, with security and privacy in mind. - - python-requests accepts cookies blindly, - Expirations are not taken into account, - it can't handle the server asking to delete a cookie, - and sends cookies even when changing domains! - Of course, secure (SSL only) cookies aren't handled either. - - This behavior depends on a `policy` class. - - This class fixes all that. - """ - - def __init__(self, policy): - """ - Cookies are delicious delicacies. - - :type: :class:`CookiePolicy` - """ - self.cookies = dict() - self.policy = policy - - def from_response(self, response): - """ - Import cookies from the response. - - :type response: responses.Response - """ - if 'Set-Cookie' in response.headers: - cs = Cookies.from_response(response.headers['Set-Cookie'], True) - for c in cs.itervalues(): - self.policy.normalize_cookie(c, response.url) - if self.policy.can_set(c, response.url): - self.set(c) - - def for_request(self, url, now=None): - """ - Get a key/value dictionnary of cookies for a given request URL. - - :type url: str - :type now: datetime - :rtype: dict - """ - url = urlparse(url) - if now is None: - now = datetime.now() - # we want insecure cookies in https too! - secure = None if url.scheme == 'https' else False - - cdict = dict() - # get sorted cookies - cookies = self.all(domain=url.hostname, path=url.path, secure=secure) - for cookie in cookies: - # only use session cookies and cookies with future expirations - if cookie.expires is None or cookie.expires > now: - # update only if not set, since first cookies are "better" - cdict.setdefault(cookie.name, cookie.value) - return cdict - - def flush(self, now=None, session=False): - """ - Remove expired cookies. If session is True, also remove all session cookies. - - :type now: datetime - :type session: bool - """ - # we need a list copy since we remove from the iterable - for cookie in list(self.iter()): - # remove session cookies if requested - if cookie.expires is None and session: - self.remove(cookie) - # remove non-session cookies if expired before now - if cookie.expires is not None and cookie.expires < now: - self.remove(cookie) - - def set(self, cookie): - """ - Add or replace a Cookie in the jar. - This is for normalized and checked cookies, no validation is done. - Use from_response() to import cookies from a python-requests response. - - :type cookie: cookies.Cookie - """ - # cookies are unique by domain, path and of course name - assert len(cookie.domain) - assert len(cookie.path) - assert len(cookie.name) - self.cookies.setdefault(cookie.domain, {}). \ - setdefault(cookie.path, {})[cookie.name] = cookie - - def iter(self, name=None, domain=None, path=None, secure=None): - """ - Iterate matching cookies. - You can restrict by name, domain, path or security. - - :type name: str - :type domain: str - :type path: str - :type secure: bool - - :rtype: iter[:class:`cookies.Cookie`] - """ - for cdomain, cpaths in self.cookies.iteritems(): - # domain matches (all domains if None) - if domain is None or self.policy.domain_match(cdomain, domain): - for cpath, cnames in cpaths.iteritems(): - # path matches (all if None) - if path is None or path.startswith(cpath): - for cname, cookie in cnames.iteritems(): - # only wanted name (all if None) - if name is None or name == cname: - # wanted security (all if None) - # cookie.secure can be "None" if not secure! - if secure is None \ - or (secure is False and not cookie.secure) \ - or (secure is True and cookie.secure): - yield cookie - - def all(self, name=None, domain=None, path=None, secure=None): - """ - Like iter(), but sorts the cookies, from most precise to less precise. - - :rtype: list[:class:`cookies.Cookie`] - """ - cookies = list(self.iter(name, domain, path, secure)) - - # slowly compare all cookies - # XXX one of the worst things I've ever written - COOKIE1 = 1 - COOKIE2 = -1 - - def ccmp(cookie1, cookie2): - # most precise matching domain - if domain and cookie1.domain != cookie2.domain: - if cookie1.domain == domain: - return COOKIE1 - if cookie2.domain == domain: - return COOKIE2 - if len(cookie1.domain) > len(cookie2.domain): - return COOKIE1 - if len(cookie2.domain) > len(cookie1.domain): - return COOKIE2 - # most precise matching path - if len(cookie1.path) > len(cookie2.path): - return COOKIE1 - if len(cookie2.path) > len(cookie1.path): - return COOKIE2 - # most secure - if cookie1.secure and not cookie2.secure: - return COOKIE1 - if cookie2.secure and not cookie1.secure: - return COOKIE2 - return 0 - - return sorted(cookies, cmp=ccmp, reverse=True) - - def get(self, name=None, domain=None, path=None, secure=None): - """ - Return the best cookie from all(). - Useful for changing the value or deleting a cookie. - - name, domain, path and secure are the same as iter(). - - :rtype: :class:`cookies.Cookie` or None - """ - cookies = self.all(name, domain, path, secure) - try: - return cookies[0] - except IndexError: - pass - - def remove(self, cookie): - """ - Remove a cookie. The cookie argument must have the same domain, path and name. - Return False if not present, True if just removed. - - :type cookie: :class:`cookies.Cookie` - :rtype: bool - """ - # cookies are unique by domain, path and of course name - assert len(cookie.domain) - assert len(cookie.path) - assert len(cookie.name) - d = self.cookies.get(cookie.domain, {}).get(cookie.path) - if cookie.name in d: - del d[cookie.name] - return True - return False - - def clear(self): - """ - Remove all cookies. - """ - self.cookies.clear() - - def build(self, name, value, url, path=None, wildcard=False): - """ - Build a Cookie object for the current URL. - - The domain and path are guessed. If you want to set for the whole domain, - take care of what you put in URL! - for_url('http://example.com/hello/world') will only set cookie for the - /hello/ path. - - `name` and `value` are required parameters of Cookie.__init__() - - You can force the `path` if you want. - - The `wildcard` parameter will add a period before the domain. - - Typical usage would be, inside a DomainBrowser: - cookie = self.cookies.for_url(k, v, self.url) - cookie = self.cookies.for_url(k, v, self.absurl('/')) - cookie = self.cookies.for_url(k, v, self.BASEURL) - - And then: - self.cookies.set(cookie) - - For more advanced usage, create a Cookie object manually, or - alter the returned Cookie object before set(). - - :type name: basestring - :type value: basestring - :type url: str - :type path: str - :type wildcard: bool - :rtype cookie: :class:`cookies.Cookie` - """ - cookie = Cookie(name, value) - url = urlparse(url) - if wildcard: - cookie.domain = '.' + url.hostname - else: - cookie.domain = url.hostname - if path is None: - cookie.path = posixpath.join(posixpath.dirname(url.path), '') - else: - cookie.path = path - if url.scheme == 'https': - cookie.secure = True - return cookie diff --git a/weboob/tools/browser2/cookies.py b/weboob/tools/browser2/cookies.py deleted file mode 100644 index 9d5b73cc..00000000 --- a/weboob/tools/browser2/cookies.py +++ /dev/null @@ -1,1139 +0,0 @@ -"""Parse, manipulate and render cookies in a convenient way. - -Copyright (c) 2011 Sasha Hart. - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" -import re -import datetime -import logging -import sys -from unicodedata import normalize -if sys.version_info >= (3, 0, 0): # pragma: no cover - from urllib.parse import quote as _default_quote, \ - unquote as _default_unquote - basestring = str - long = int -else: # pragma: no cover - from urllib import quote as _default_quote, \ - unquote as _default_unquote - - -def _total_seconds(td): - """Wrapper to work around lack of .total_seconds() method in Python 3.1. - """ - if hasattr(td, "total_seconds"): - return td.total_seconds() - return td.days * 3600 * 24 + td.seconds + td.microseconds / 100000.0 - -# see test_encoding_assumptions for how these magical safe= parms were figured -# out. the differences are because of what cookie-octet may contain -# vs the more liberal spec for extension-av -default_cookie_quote = lambda item: _default_quote( - item, safe='!#$%&\'()*+/:<=>?@[]^`{|}~') - -default_extension_quote = lambda item: _default_quote( - item, safe=' !"#$%&\'()*+,/:<=>?@[\\]^`{|}~') - -default_unquote = _default_unquote - - -def _report_invalid_cookie(data): - "How this module logs a bad cookie when exception suppressed" - logging.error("invalid Cookie: %s", repr(data)) - - -def _report_unknown_attribute(name): - "How this module logs an unknown attribute when exception suppressed" - logging.error("unknown Cookie attribute: %s", repr(name)) - - -def _report_invalid_attribute(name, value, reason): - "How this module logs a bad attribute when exception suppressed" - logging.error("invalid Cookie attribute (%s): %s=%s", reason, repr(name), - repr(value)) - - -class CookieError(Exception): - """Base class for this module's exceptions, so you can catch them all if - you want to. - """ - def __init__(self): - Exception.__init__(self) - - -class InvalidCookieError(CookieError): - """Raised when attempting to parse or construct a cookie which is - syntactically invalid (in any way that has possibly serious implications). - """ - def __init__(self, data=None, message=""): - CookieError.__init__(self) - self.data = data - self.message = message - - def __str__(self): - return '%s %s' % (repr(self.message), repr(self.data)) - - -class InvalidCookieAttributeError(CookieError): - """Raised when setting an invalid attribute on a Cookie. - """ - def __init__(self, name, value, reason=None): - CookieError.__init__(self) - self.name = name - self.value = value - self.reason = reason - - def __str__(self): - prefix = ("%s: " % self.reason) if self.reason else "" - if self.name is None: - return '%s%s' % (prefix, repr(self.value)) - return '%s%s = %s' % (prefix, repr(self.name), repr(self.value)) - - -class Definitions(object): - """Namespace to hold definitions used in cookie parsing (mostly pieces of - regex). - - These are separated out for individual testing against examples and RFC - grammar, and kept here to avoid cluttering other namespaces. - """ - # Most of the following are set down or cited in RFC 6265 4.1.1 - - # This is the grammar's 'cookie-name' defined as 'token' per RFC 2616 2.2. - COOKIE_NAME = r"!#$%&'*+\-.0-9A-Z^_`a-z|~" - - # 'cookie-octet' - as used twice in definition of 'cookie-value' - COOKIE_OCTET = r"\x21\x23-\x2B\--\x3A\x3C-\x5B\]-\x7E" - - # extension-av - also happens to be a superset of cookie-av and path-value - EXTENSION_AV = """ !"#$%&\\\\'()*+,\-./0-9:<=>?@A-Z[\\]^_`a-z{|}~""" - - # This is for the first pass parse on a Set-Cookie: response header. It - # includes cookie-value, cookie-pair, set-cookie-string, cookie-av. - # extension-av is used to extract the chunk containing variable-length, - # unordered attributes. The second pass then uses ATTR to break out each - # attribute and extract it appropriately. - # As compared with the RFC production grammar, it is must more liberal with - # space characters, in order not to break on data made by barbarians. - SET_COOKIE_HEADER = """(?x) # Verbose mode - ^(?:Set-Cookie:[ ]*)? - (?P[{name}:]+) - [ ]*=[ ]* - - # Accept anything in quotes - this is not RFC 6265, but might ease - # working with older code that half-heartedly works with 2965. Accept - # spaces inside tokens up front, so we can deal with that error one - # cookie at a time, after this first pass. - (?P(?:"{value}*")|(?:[{cookie_octet} ]*)) - [ ]* - - # Extract everything up to the end in one chunk, which will be broken - # down in the second pass. Don't match if there's any unexpected - # garbage at the end (hence the \Z; $ matches before newline). - (?P(?:;[ ]*[{cookie_av}]+)*) - """.format(name=COOKIE_NAME, cookie_av=EXTENSION_AV + ";", - cookie_octet=COOKIE_OCTET, value="[^;]") - - # Now we specify the individual patterns for the attribute extraction pass - # of Set-Cookie parsing (mapping to *-av in the RFC grammar). Things which - # don't match any of these but are in extension-av are simply ignored; - # anything else should be rejected in the first pass (SET_COOKIE_HEADER). - - # Max-Age attribute. These are digits, they are expressed this way - # because that is how they are expressed in the RFC. - MAX_AGE_AV = "Max-Age=(?P[\x31-\x39][\x30-\x39]*)" - - # Domain attribute; a label is one part of the domain - LABEL = '{let_dig}(?:(?:{let_dig_hyp}+)?{let_dig})?'.format( - let_dig="[A-Za-z0-9]", let_dig_hyp="[0-9A-Za-z\-]") - DOMAIN = "(?:{label}\.)*(?:{label})".format(label=LABEL) - # Parse initial period though it's wrong, as RFC 6265 4.1.2.3 - DOMAIN_AV = "Domain=(?P\.?{domain})".format(domain=DOMAIN) - - # Path attribute. We don't take special care with quotes because - # they are hardly used, they don't allow invalid characters per RFC 6265, - # and " is a valid character to occur in a path value anyway. - PATH_AV = 'Path=(?P[%s]+)' % EXTENSION_AV - - # Expires attribute. This gets big because of date parsing, which needs to - # support a large range of formats, so it's broken down into pieces. - - # Generate a mapping of months to use in render/parse, to avoid - # localizations which might be produced by strftime (e.g. %a -> Mayo) - month_list = ["January", "February", "March", "April", "May", "June", - "July", "August", "September", "October", "November", "December"] - month_abbr_list = [item[:3] for item in month_list] - month_numbers = {} - for index, name in enumerate(month_list): - name = name.lower() - month_numbers[name[:3]] = index + 1 - month_numbers[name] = index + 1 - # Use the same list to create regexps for months. - MONTH_SHORT = "(?:" + "|".join(item[:3] for item in month_list) + ")" - MONTH_LONG = "(?:" + "|".join(item for item in month_list) + ")" - - # Same drill with weekdays, for the same reason. - weekday_list = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", - "Saturday", "Sunday"] - weekday_abbr_list = [item[:3] for item in weekday_list] - WEEKDAY_SHORT = "(?:" + "|".join(item[:3] for item in weekday_list) + ")" - WEEKDAY_LONG = "(?:" + "|".join(item for item in weekday_list) + ")" - - # This regexp tries to exclude obvious nonsense in the first pass. - DAY_OF_MONTH = "(?:[0 ]?[1-9]|[12][0-9]|[3][01])(?!\d)" - - # Here is the overall date format; ~99% of cases fold into one generalized - # syntax like RFC 1123, and many of the rest use asctime-like formats. - # (see test_date_formats for a full exegesis) - DATE = """(?ix) # Case-insensitive mode, verbose mode - (?: - (?P(?:{wdy}|{weekday}),[ ])? - (?P{day}) - [ \-] - (?P{mon}|{month}) - [ \-] - # This does not support 3-digit years, which are rare and don't - # seem to have one canonical interpretation. - (?P(?:\d{{2}}|\d{{4}})) - [ ] - # HH:MM[:SS] GMT - (?P(?:[ 0][0-9]|[01][0-9]|2[0-3])) - :(?P(?:0[0-9]|[1-5][0-9])) - (?::(?P\d{{2}}))? - [ ]GMT - | - # Support asctime format, e.g. 'Sun Nov 6 08:49:37 1994' - (?P{wdy})[ ] - (?P{mon})[ ] - (?P[ ]\d|\d\d)[ ] - (?P\d\d): - (?P\d\d) - (?::(?P\d\d)?)[ ] - (?P\d\d\d\d) - (?:[ ]GMT)? # GMT (Amazon) - ) - """ - DATE = DATE.format(wdy=WEEKDAY_SHORT, weekday=WEEKDAY_LONG, - day=DAY_OF_MONTH, mon=MONTH_SHORT, month=MONTH_LONG) - - EXPIRES_AV = "Expires=(?P%s)" % DATE - - # Now we're ready to define a regexp which can match any number of attrs - # in the variable portion of the Set-Cookie header (like the unnamed latter - # part of set-cookie-string in the grammar). Each regexp of any complexity - # is split out for testing by itself. - ATTR = """(?ix) # Case-insensitive mode, verbose mode - # Always start with start or semicolon and any number of spaces - (?:^|;)[ ]*(?: - # Big disjunction of attribute patterns (*_AV), with named capture - # groups to extract everything in one pass. Anything unrecognized - # goes in the 'unrecognized' capture group for reporting. - {expires} - |{max_age} - |{domain} - |{path} - |(?PSecure=?) - |(?PHttpOnly=?) - |Version=(?P[{stuff}]+) - |Comment=(?P[{stuff}]+) - |(?P[{stuff}]+) - ) - # End with any number of spaces not matched by the preceding (up to the - # next semicolon) - but do not capture these. - [ ]* - """.format(expires=EXPIRES_AV, max_age=MAX_AGE_AV, domain=DOMAIN_AV, - path=PATH_AV, stuff=EXTENSION_AV) - - # For request data ("Cookie: ") parsing, with finditer cf. RFC 6265 4.2.1 - COOKIE = """(?x) # Verbose mode - (?: # Either something close to valid... - - # Match starts at start of string, or at separator. - # Split on comma for the sake of legacy code (RFC 2109/2965), - # and since it only breaks when invalid commas are put in values. - # see http://bugs.python.org/issue1210326 - (?:^Cookie:|^|;|,) - - # 1 or more valid token characters making up the name (captured) - # with colon added to accommodate users of some old Java apps, etc. - [ ]* - (?P[{name}:]+) - [ ]* - = - [ ]* - - # While 6265 provides only for cookie-octet, this allows just about - # anything in quotes (like in RFC 2616); people stuck on RFC - # 2109/2965 will expect it to work this way. The non-quoted token - # allows interior spaces ('\x20'), which is not valid. In both - # cases, the decision of whether to allow these is downstream. - (?P - ["][^\00-\31"]*["] - | - [{value}] - | - [{value}][{value} ]*[{value}]+) - - # ... Or something way off-spec - extract to report and move on - | - (?P[^;]+) - ) - # Trailing spaces after value - [ ]* - # Must end with ; or be at end of string (don't consume this though, - # so use the lookahead assertion ?= - (?=;|\Z) - """.format(name=COOKIE_NAME, value=COOKIE_OCTET) - - # Precompile externally useful definitions into re objects. - COOKIE_NAME_RE = re.compile("^([%s:]+)\Z" % COOKIE_NAME) - COOKIE_RE = re.compile(COOKIE) - SET_COOKIE_HEADER_RE = re.compile(SET_COOKIE_HEADER) - ATTR_RE = re.compile(ATTR) - DATE_RE = re.compile(DATE) - DOMAIN_RE = re.compile(DOMAIN) - PATH_RE = re.compile('^([%s]+)\Z' % EXTENSION_AV) - EOL = re.compile("(?:\r\n|\n)") - - -def strip_spaces_and_quotes(value): - """Remove invalid whitespace and/or single pair of dquotes and return None - for empty strings. - - Used to prepare cookie values, path, and domain attributes in a way which - tolerates simple formatting mistakes and standards variations. - """ - value = value.strip() if value else "" - if value and len(value) > 1 and (value[0] == value[-1] == '"'): - value = value[1:-1] - if not value: - value = "" - return value - - -def parse_string(data, unquote=default_unquote): - """Decode URL-encoded strings to UTF-8 containing the escaped chars. - """ - if data is None: - return None - - # We'll soon need to unquote to recover our UTF-8 data. - # In Python 2, unquote crashes on chars beyond ASCII. So encode functions - # had better not include anything beyond ASCII in data. - # In Python 3, unquote crashes on bytes objects, requiring conversion to - # str objects (unicode) using decode(). - # But in Python 2, the same decode causes unquote to butcher the data. - # So in that case, just leave the bytes. - if isinstance(data, bytes): - if sys.version_info > (3, 0, 0): # pragma: no cover - data = data.decode('ascii') - # Recover URL encoded data - unquoted = unquote(data) - # Without this step, Python 2 may have good URL decoded *bytes*, - # which will therefore not normalize as unicode and not compare to - # the original. - if isinstance(unquoted, bytes): - unquoted = unquoted.decode('utf-8') - return unquoted - - -def parse_date(value): - """Parse an RFC 1123 or asctime-like format date string to produce - a Python datetime object (without a timezone). - """ - # Do the regex magic; also enforces 2 or 4 digit years - match = Definitions.DATE_RE.match(value) if value else None - if not match: - return None - # We're going to extract and prepare captured data in 'data'. - data = {} - captured = match.groupdict() - fields = ['year', 'month', 'day', 'hour', 'minute', 'second'] - # If we matched on the RFC 1123 family format - if captured['year']: - for field in fields: - data[field] = captured[field] - # If we matched on the asctime format, use year2 etc. - else: - for field in fields: - data[field] = captured[field + "2"] - year = data['year'] - # Interpret lame 2-digit years - base the cutoff on UNIX epoch, in case - # someone sets a '70' cookie meaning 'distant past'. This won't break for - # 58 years and people who use 2-digit years are asking for it anyway. - if len(year) == 2: - if int(year) < 70: - year = "20" + year - else: - year = "19" + year - year = int(year) - # Clamp to [1900, 9999]: strftime has min 1900, datetime has max 9999 - data['year'] = max(1900, min(year, 9999)) - # Other things which are numbers should convert to integer - for field in ['day', 'hour', 'minute', 'second']: - if data[field] == None: - data[field] = 0 - data[field] = int(data[field]) - # Look up the number datetime needs for the named month - data['month'] = Definitions.month_numbers[data['month'].lower()] - return datetime.datetime(**data) - - -def parse_domain(value): - """Parse and validate an incoming Domain attribute value. - """ - value = strip_spaces_and_quotes(value) - # Strip/ignore invalid leading period as in RFC 5.2.3 - if value and value[0] == '.': - value = value[1:] - if value: - assert valid_domain(value) - return value - - -def parse_path(value): - """Parse and validate an incoming Path attribute value. - """ - value = strip_spaces_and_quotes(value) - assert valid_path(value) - return value - - -def parse_value(value, allow_spaces=True, unquote=default_unquote): - "Process a cookie value" - if value is None: - return None - value = strip_spaces_and_quotes(value) - value = parse_string(value, unquote=unquote) - if not allow_spaces: - assert ' ' not in value - return value - - -def valid_name(name): - "Validate a cookie name string" - if isinstance(name, bytes): - name = name.decode('ascii') - if not Definitions.COOKIE_NAME_RE.match(name): - return False - # This module doesn't support $identifiers, which are part of an obsolete - # and highly complex standard which is never used. - if name[0] == "$": - return False - return True - - -def valid_value(value, quote=default_cookie_quote, unquote=default_unquote): - """Validate a cookie value string. - - This is generic across quote/unquote functions because it directly verifies - the encoding round-trip using the specified quote/unquote functions. - So if you use different quote/unquote functions, use something like this - as a replacement for valid_value:: - - my_valid_value = lambda value: valid_value(value, quote=my_quote, - unquote=my_unquote) - """ - if value is None: - return False - - # Put the value through a round trip with the given quote and unquote - # functions, so we will know whether data will get lost or not in the event - # that we don't complain. - encoded = encode_cookie_value(value, quote=quote) - decoded = parse_string(encoded, unquote=unquote) - - # If the original string made the round trip, this is a valid value for the - # given quote and unquote functions. Since the round trip can generate - # different unicode forms, normalize before comparing, so we can ignore - # trivial inequalities. - decoded_normalized = normalize("NFKD", decoded) \ - if not isinstance(decoded, bytes) \ - else decoded - value_normalized = normalize("NFKD", value) \ - if not isinstance(value, bytes) \ - else value - if decoded_normalized == value_normalized: - return True - - return False - - -def valid_date(date): - "Validate an expires datetime object" - # We want something that acts like a datetime. In particular, - # strings indicate a failure to parse down to an object and ints are - # nonstandard and ambiguous at best. - if not hasattr(date, 'tzinfo'): - return False - # Relevant RFCs define UTC as 'close enough' to GMT, and the maximum - # difference between UTC and GMT is often stated to be less than a second. - if date.tzinfo is None or _total_seconds(date.utcoffset()) < 1.1: - return True - return False - - -def valid_domain(domain): - "Validate a cookie domain ASCII string" - # Using encoding on domain would confuse browsers into not sending cookies. - # Generate UnicodeDecodeError up front if it can't store as ASCII. - domain.encode('ascii') - if domain and domain[0] in '."': - return False - # Domains starting with periods are not RFC-valid, but this is very common - # in existing cookies, so they should still parse with DOMAIN_AV. - if Definitions.DOMAIN_RE.match(domain): - return True - return False - - -def valid_path(value): - "Validate a cookie path ASCII string" - # Generate UnicodeDecodeError if path can't store as ASCII. - value.encode("ascii") - # Cookies without leading slash will likely be ignored, raise ASAP. - if not (value and value[0] == "/"): - return False - if not Definitions.PATH_RE.match(value): - return False - return True - - -def valid_max_age(number): - "Validate a cookie Max-Age" - if isinstance(number, basestring): - try: - number = long(number) - except (ValueError, TypeError): - return False - if number >= 0 and number % 1 == 0: - return True - return False - - -def encode_cookie_value(data, quote=default_cookie_quote): - """URL-encode strings to make them safe for a cookie value. - - By default this uses urllib quoting, as used in many other cookie - implementations and in other Python code, instead of an ad hoc escaping - mechanism which includes backslashes (these also being illegal chars in RFC - 6265). - """ - if data is None: - return None - - # encode() to ASCII bytes so quote won't crash on non-ASCII. - # but doing that to bytes objects is nonsense. - # On Python 2 encode crashes if s is bytes containing non-ASCII. - # On Python 3 encode crashes on all byte objects. - if not isinstance(data, bytes): - data = data.encode("utf-8") - - # URL encode data so it is safe for cookie value - quoted = quote(data) - - # Don't force to bytes, so that downstream can use proper string API rather - # than crippled bytes, and to encourage encoding to be done just once. - return quoted - - -def encode_extension_av(data, quote=default_extension_quote): - """URL-encode strings to make them safe for an extension-av - (extension attribute value): - """ - if not data: - return '' - return quote(data) - - -def render_date(date): - """Render a date (e.g. an Expires value) per RFCs 6265/2616/1123. - - Don't give this localized (timezone-aware) datetimes. If you use them, - convert them to GMT before passing them to this. There are too many - conversion corner cases to handle this universally. - """ - if not date: - return None - assert valid_date(date) - # Avoid %a and %b, which can change with locale, breaking compliance - weekday = Definitions.weekday_abbr_list[date.weekday()] - month = Definitions.month_abbr_list[date.month - 1] - return date.strftime("{day}, %d {month} %Y %H:%M:%S GMT")\ - .format(day=weekday, month=month) - - -def _parse_request(header_data, ignore_bad_cookies=False): - """Turn one or more lines of 'Cookie:' header data into a dict mapping - cookie names to cookie values (raw strings). - """ - cookies_dict = {} - for line in Definitions.EOL.split(header_data.strip()): - matches = Definitions.COOKIE_RE.finditer(line) - matches = [item for item in matches] - for match in matches: - invalid = match.group('invalid') - if invalid: - if not ignore_bad_cookies: - raise InvalidCookieError(data=invalid) - _report_invalid_cookie(invalid) - continue - name = match.group('name') - if name in cookies_dict: - continue - cookies_dict[name] = match.group('value').strip('"') - if not matches: - if not ignore_bad_cookies: - raise InvalidCookieError(data=line) - _report_invalid_cookie(line) - return cookies_dict - - -def parse_one_response(line, - ignore_bad_cookies=False, - ignore_bad_attributes=True): - """Turn one 'Set-Cookie:' line into a dict mapping attribute names to - attribute values (raw strings). - """ - cookie_dict = {} - # Basic validation, extract name/value/attrs-chunk - match = Definitions.SET_COOKIE_HEADER_RE.match(line) - if not match: - if not ignore_bad_cookies: - raise InvalidCookieError(data=line) - _report_invalid_cookie(line) - return None - cookie_dict.update({ - 'name': match.group('name'), - 'value': match.group('value')}) - # Extract individual attrs from the attrs chunk - for match in Definitions.ATTR_RE.finditer(match.group('attrs')): - captured = dict((k, v) for (k, v) in match.groupdict().items() if v) - unrecognized = captured.get('unrecognized', None) - if unrecognized: - if not ignore_bad_attributes: - raise InvalidCookieAttributeError(None, unrecognized, - "unrecognized") - _report_unknown_attribute(unrecognized) - continue - # for unary flags - for key in ('secure', 'httponly'): - if captured.get(key): - captured[key] = True - # ignore subcomponents of expires - they're still there to avoid doing - # two passes - timekeys = ('weekday', 'month', 'day', 'hour', 'minute', 'second', - 'year') - if 'year' in captured: - for key in timekeys: - del captured[key] - elif 'year2' in captured: - for key in timekeys: - del captured[key + "2"] - cookie_dict.update(captured) - return cookie_dict - - -def _parse_response(header_data, - ignore_bad_cookies=False, - ignore_bad_attributes=True): - """Turn one or more lines of 'Set-Cookie:' header data into a list of dicts - mapping attribute names to attribute values (as plain strings). - """ - cookie_dicts = [] - for line in Definitions.EOL.split(header_data.strip()): - if not line: - break - cookie_dict = parse_one_response(line, - ignore_bad_cookies=ignore_bad_cookies, - ignore_bad_attributes=ignore_bad_attributes) - if not cookie_dict: - continue - cookie_dicts.append(cookie_dict) - if not cookie_dicts: - if not ignore_bad_cookies: - raise InvalidCookieError(data=header_data) - _report_invalid_cookie(header_data) - return cookie_dicts - - -class Cookie(object): - """Provide a simple interface for creating, modifying, and rendering - individual HTTP cookies. - - Cookie attributes are represented as normal Python object attributes. - Parsing, rendering and validation are reconfigurable per-attribute. The - default behavior is intended to comply with RFC 6265, URL-encoding illegal - characters where necessary. For example: the default behavior for the - Expires attribute is to parse strings as datetimes using parse_date, - validate that any set value is a datetime, and render the attribute per the - preferred date format in RFC 1123. - """ - def __init__(self, name, value, **kwargs): - # If we don't have or can't set a name value, we don't want to return - # junk, so we must break control flow. And we don't want to use - # InvalidCookieAttributeError, because users may want to catch that to - # suppress all complaining about funky attributes. - try: - self.name = name - except InvalidCookieAttributeError: - raise InvalidCookieError(message="invalid name for new Cookie") - self.value = value or '' - if kwargs: - self._set_attributes(kwargs, ignore_bad_attributes=False) - - def _set_attributes(self, attrs, ignore_bad_attributes=False): - for attr_name, attr_value in attrs.items(): - if not attr_name in self.attribute_names: - if not ignore_bad_attributes: - raise InvalidCookieAttributeError(attr_name, attr_value, - "unknown cookie attribute '%s'" % attr_name) - _report_unknown_attribute(attr_name) - - try: - setattr(self, attr_name, attr_value) - except InvalidCookieAttributeError as error: - if not ignore_bad_attributes: - raise - _report_invalid_attribute(attr_name, attr_value, error.reason) - continue - - @classmethod - def from_dict(cls, cookie_dict, ignore_bad_attributes=True): - """Construct a Cookie object from a dict of strings to parse. - - The main difference between this and Cookie(name, value, **kwargs) is - that the values in the argument to this method are parsed. - - If ignore_bad_attributes=True (default), values which did not parse - are set to '' in order to avoid passing bad data. - """ - name = cookie_dict.get('name', None) - if not name: - raise InvalidCookieError("Cookie must have name") - raw_value = cookie_dict.get('value', '') - # Absence or failure of parser here is fatal; errors in present name - # and value should be found by Cookie.__init__. - value = cls.attribute_parsers['value'](raw_value) - cookie = Cookie(name, value) - - # Parse values from serialized formats into objects - parsed = {} - for key, value in cookie_dict.items(): - # Don't want to pass name/value to _set_attributes - if key in ('name', 'value'): continue - parser = cls.attribute_parsers.get(key) - if not parser: - # Don't let totally unknown attributes pass silently - if not ignore_bad_attributes: - raise InvalidCookieAttributeError(key, value, - "unknown cookie attribute '%s'" % key) - _report_unknown_attribute(key) - continue - try: - parsed_value = parser(value) - except Exception as e: - reason = "did not parse with %s: %s" % (repr(parser), repr(e)) - if not ignore_bad_attributes: - raise InvalidCookieAttributeError( - key, value, reason) - _report_invalid_attribute(key, value, reason) - parsed_value = '' - parsed[key] = parsed_value - - # Set the parsed objects (does object validation automatically) - cookie._set_attributes(parsed, ignore_bad_attributes) - return cookie - - @classmethod - def from_string(cls, line, ignore_bad_cookies=False, - ignore_bad_attributes=True): - "Construct a Cookie object from a line of Set-Cookie header data." - cookie_dict = parse_one_response(line, - ignore_bad_cookies=ignore_bad_cookies, - ignore_bad_attributes=ignore_bad_attributes) - if not cookie_dict: - return None - return cls.from_dict(cookie_dict, - ignore_bad_attributes=ignore_bad_attributes) - - def to_dict(self): - this_dict = {'name': self.name, 'value': self.value} - this_dict.update(self.attributes()) - return this_dict - - def validate(self, name, value): - """Validate a cookie attribute with an appropriate validator. - - The value comes in already parsed (for example, an expires value - should be a datetime). Called automatically when an attribute - value is set. - """ - validator = self.attribute_validators.get(name, None) - if validator: - return True if validator(value) else False - return True - - def __setattr__(self, name, value): - """Attributes mentioned in attribute_names get validated using - functions in attribute_validators, raising an exception on failure. - Others get left alone. - """ - if name in self.attribute_names or name in ("name", "value"): - if name == 'name' and not value: - raise InvalidCookieError(message="Cookies must have names") - # Ignore None values indicating unset attr. Other invalids should - # raise error so users of __setattr__ can learn. - if value is not None: - if not self.validate(name, value): - raise InvalidCookieAttributeError(name, value, - "did not validate with " + - repr(self.attribute_validators.get(name))) - object.__setattr__(self, name, value) - - def __getattr__(self, name): - """Provide for acting like everything in attribute_names is - automatically set to None, rather than having to do so explicitly and - only at import time. - """ - if name in self.attribute_names: - return None - raise AttributeError(name) - - def attributes(self): - """Export this cookie's attributes as a dict of encoded values. - - This is an important part of the code for rendering attributes, e.g. - render_response(). - """ - dictionary = {} - # Only look for attributes registered in attribute_names. - for python_attr_name, cookie_attr_name in self.attribute_names.items(): - value = getattr(self, python_attr_name) - renderer = self.attribute_renderers.get(python_attr_name, None) - if renderer: - value = renderer(value) - # If renderer returns None, or it's just natively none, then the - # value is suppressed entirely - does not appear in any rendering. - if not value: - continue - dictionary[cookie_attr_name] = value - return dictionary - - def render_request(self, prefix="Cookie: "): - """Render as a string formatted for HTTP request headers - (simple 'Cookie: ' style). - """ - # Use whatever renderers are defined for name and value. - name, value = self.name, self.value - renderer = self.attribute_renderers.get('name', None) - if renderer: - name = renderer(name) - renderer = self.attribute_renderers.get('value', None) - if renderer: - value = renderer(value) - return ''.join((prefix, name, "=", value)) - - def render_response(self, prefix="Set-Cookie: "): - """Render as a string formatted for HTTP response headers - (detailed 'Set-Cookie: ' style). - """ - # Use whatever renderers are defined for name and value. - # (.attributes() is responsible for all other rendering.) - name, value = self.name, self.value - renderer = self.attribute_renderers.get('name', None) - if renderer: - name = renderer(name) - renderer = self.attribute_renderers.get('value', None) - if renderer: - value = renderer(value) - return '; '.join( - [''.join((prefix, name, '=', value))] + - [k if isinstance(v, bool) else '='.join((k, v)) - for k, v in self.attributes().items()]) - - def __eq__(self, other): - attrs = ['name', 'value'] + list(self.attribute_names.keys()) - for attr in attrs: - mine = getattr(self, attr, None) - his = getattr(other, attr, None) - if isinstance(mine, bytes): - mine = mine.decode('utf-8') - if isinstance(his, bytes): - his = his.decode('utf-8') - if mine != his: - return False - return True - - def __ne__(self, other): - return not self.__eq__(other) - - # Add a name and its proper rendering to this dict to register an attribute - # as exportable. The key is the name of the Cookie object attribute in - # Python, and it is mapped to the name you want in the output. - # 'name' and 'value' should not be here. - attribute_names = { - 'expires': 'Expires', - 'max_age': 'Max-Age', - 'domain': 'Domain', - 'path': 'Path', - 'comment': 'Comment', - 'version': 'Version', - 'secure': 'Secure', - 'httponly': 'HttpOnly', - } - - # Register single-parameter functions in this dictionary to have them - # used for encoding outgoing values (e.g. as RFC compliant strings, - # as base64, encrypted stuff, etc.) - # These are called by the property generated by cookie_attribute(). - # Usually it would be wise not to define a renderer for name, but it is - # supported in case there is ever a real need. - attribute_renderers = { - 'value': encode_cookie_value, - 'expires': render_date, - 'max_age': lambda item: str(item) if item else None, - 'secure': lambda item: True if item else False, - 'httponly': lambda item: True if item else False, - 'comment': encode_extension_av, - 'version': lambda item: str(item) if isinstance(item, int) - else encode_extension_av(item), - } - - # Register single-parameter functions in this dictionary to have them used - # for decoding incoming values for use in the Python API (e.g. into nice - # objects, numbers, unicode strings, etc.) - # These are called by the property generated by cookie_attribute(). - attribute_parsers = { - 'value': parse_value, - 'expires': parse_date, - 'domain': parse_domain, - 'path': parse_path, - 'max_age': lambda item: long(strip_spaces_and_quotes(item)), - 'comment': parse_string, - 'version': lambda item: int(strip_spaces_and_quotes(item)), - 'secure': lambda item: True if item else False, - 'httponly': lambda item: True if item else False, - } - - # Register single-parameter functions which return a true value for - # acceptable values, and a false value for unacceptable ones. An - # attribute's validator is run after it is parsed or when it is directly - # set, and InvalidCookieAttribute is raised if validation fails (and the - # validator doesn't raise a different exception prior) - attribute_validators = { - 'name': valid_name, - 'value': valid_value, - 'expires': valid_date, - 'domain': valid_domain, - 'path': valid_path, - 'max_age': valid_max_age, - 'comment': valid_value, - 'version': lambda number: re.match("^\d+\Z", str(number)), - 'secure': lambda item: item is True or item is False, - 'httponly': lambda item: item is True or item is False, - } - - -class Cookies(dict): - """Represent a set of cookies indexed by name. - - This class bundles together a set of Cookie objects and provides - a convenient interface to them. for parsing and producing cookie headers. - In basic operation it acts just like a dict of Cookie objects, but it adds - additional convenience methods for the usual cookie tasks: add cookie - objects by their names, create new cookie objects under specified names, - parse HTTP request or response data into new cookie objects automatically - stored in the dict, and render the set in formats suitable for HTTP request - or response headers. - """ - def __init__(self, *args, **kwargs): - dict.__init__(self) - self.add(*args, **kwargs) - - def add(self, *args, **kwargs): - """Add Cookie objects by their names, or create new ones under - specified names. - - Any unnamed arguments are interpreted as existing cookies, and - are added under the value in their .name attribute. With keyword - arguments, the key is interpreted as the cookie name and the - value as the UNENCODED value stored in the cookie. - """ - # Only take the first one, don't create new ones if unnecessary - for cookie in args: - if cookie.name in self: - continue - self[cookie.name] = cookie - for key, value in kwargs.items(): - if key in self: - continue - cookie = Cookie(key, value) - self[key] = cookie - - def parse_request(self, header_data, ignore_bad_cookies=False): - """Parse 'Cookie' header data into Cookie objects, and add them to - this Cookies object. - - :arg header_data: string containing only 'Cookie:' request headers or - header values (as in CGI/WSGI HTTP_COOKIE); if more than one, they must - be separated by CRLF (\\r\\n). - - :arg ignore_bad_cookies: if set, will log each syntactically invalid - cookie (at the granularity of semicolon-delimited blocks) rather than - raising an exception at the first bad cookie. - - :returns: a Cookies instance containing Cookie objects parsed from - header_data. - - .. note:: - If you want to parse 'Set-Cookie:' response headers, please use - parse_response instead. parse_request will happily turn 'expires=frob' - into a separate cookie without complaining, according to the grammar. - """ - cookies_dict = _parse_request(header_data, - ignore_bad_cookies=ignore_bad_cookies) - cookie_objects = [] - for name, value in cookies_dict.items(): - # Use from_dict to check name and parse value - cookie_dict = {'name': name, 'value': value} - try: - cookie = Cookie.from_dict(cookie_dict) - except InvalidCookieError: - if not ignore_bad_cookies: - raise - else: - cookie_objects.append(cookie) - try: - self.add(*cookie_objects) - except (InvalidCookieError): - if not ignore_bad_cookies: - raise - _report_invalid_cookie(header_data) - return self - - def parse_response(self, header_data, ignore_bad_cookies=False, - ignore_bad_attributes=True): - """Parse 'Set-Cookie' header data into Cookie objects, and add them to - this Cookies object. - - :arg header_data: string containing only 'Set-Cookie:' request headers - or their corresponding header values; if more than one, they must be - separated by CRLF (\\r\\n). - - :arg ignore_bad_cookies: if set, will log each syntactically invalid - cookie rather than raising an exception at the first bad cookie. (This - includes cookies which have noncompliant characters in the attribute - section). - - :arg ignore_bad_attributes: defaults to True, which means to log but - not raise an error when a particular attribute is unrecognized. (This - does not necessarily mean that the attribute is invalid, although that - would often be the case.) if unset, then an error will be raised at the - first semicolon-delimited block which has an unknown attribute. - - :returns: a Cookies instance containing Cookie objects parsed from - header_data, each with recognized attributes populated. - - .. note:: - If you want to parse 'Cookie:' headers (i.e., data like what's sent - with an HTTP request, which has only name=value pairs and no - attributes), then please use parse_request instead. Such lines often - contain multiple name=value pairs, and parse_response will throw away - the pairs after the first one, which will probably generate errors or - confusing behavior. (Since there's no perfect way to automatically - determine which kind of parsing to do, you have to tell it manually by - choosing correctly from parse_request between part_response.) - """ - cookie_dicts = _parse_response(header_data, - ignore_bad_cookies=ignore_bad_cookies, - ignore_bad_attributes=ignore_bad_attributes) - cookie_objects = [] - for cookie_dict in cookie_dicts: - cookie = Cookie.from_dict(cookie_dict) - cookie_objects.append(cookie) - self.add(*cookie_objects) - return self - - @classmethod - def from_request(cls, header_data, ignore_bad_cookies=False): - "Construct a Cookies object from request header data." - cookies = Cookies() - cookies.parse_request(header_data, - ignore_bad_cookies=ignore_bad_cookies) - return cookies - - @classmethod - def from_response(cls, header_data, ignore_bad_cookies=False, - ignore_bad_attributes=True): - "Construct a Cookies object from response header data." - cookies = Cookies() - cookies.parse_response(header_data, - ignore_bad_cookies=ignore_bad_cookies, - ignore_bad_attributes=ignore_bad_attributes) - return cookies - - def render_request(self, combined=False, prefix="Cookie: "): - """Render the dict's Cookie objects into a string formatted for HTTP - request headers (simple 'Cookie: ' style). - """ - if not combined: - return "\r\n".join( - cookie.render_request(prefix=prefix) - for cookie in self.values()) - return "%s%s" % (prefix, "; ".join( - cookie.render_request(prefix='') - for cookie in self.values())) - - def render_response(self, prefix="Set-Cookie: "): - """Render the dict's Cookie objects into a string formatted for HTTP - response headers (detailed 'Set-Cookie: ' style). - """ - return "\r\n".join( - cookie.render_response(prefix=prefix) - for cookie in self.values()) - - def __repr__(self): - return "Cookies(%s)" % ', '.join( - "%s=%s" % (name, repr(cookie.value)) - for (name, cookie) in self.items()) - - def __eq__(self, other): - """Test if a Cookies object is globally 'equal' to another one by - seeing if it looks like a dict such that d[k] == self[k]. This depends - on each Cookie object reporting its equality correctly. - """ - if not hasattr(other, "keys"): - return False - try: - keys = sorted(set(self.keys()) | set(other.keys())) - for key in keys: - if not key in self: - return False - if not key in other: - return False - if not(self[key] == other[key]): - return False - except (TypeError, KeyError): - raise - return True - - def __ne__(self, other): - return not self.__eq__(other) diff --git a/weboob/tools/browser2/test.py b/weboob/tools/browser2/test.py deleted file mode 100644 index 50a877c5..00000000 --- a/weboob/tools/browser2/test.py +++ /dev/null @@ -1,597 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2012 Laurent Bachelier -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - -from __future__ import absolute_import - -from datetime import datetime -from random import choice -import re -import string - -from requests import HTTPError -from nose.plugins.skip import SkipTest -from nose.tools import assert_raises - -from .browser import BaseBrowser, DomainBrowser, Weboob, UrlNotAllowed -from .cookiejar import CookieJar, CookiePolicy -from .cookies import Cookies - -from weboob.tools.json import json - -# Those services can be run locally. More or less. -HTTPBIN = 'http://httpbin.org/' # https://github.com/kennethreitz/httpbin -POSTBIN = 'http://www.postbin.org/' # https://github.com/progrium/postbin -REQUESTBIN = 'http://requestb.in/' # https://github.com/progrium/requestbin - -# if you change HTTPBIN, you should also change these URLs for some tests: -# redirect to http://httpbin.org/get -REDIRECTS1 = ('http://tinyurl.com/ouiboube-b2', 'http://bit.ly/st4Hcv') -# redirect to http://httpbin.org/cookies -REDIRECTS2 = ('http://tinyurl.com/7zp3jnr', 'http://bit.ly/HZCCX7') - - -def test_base(): - b = BaseBrowser() - r = b.location(HTTPBIN + 'headers') - assert isinstance(r.text, unicode) - assert 'Firefox' in r.text - assert 'python' not in r.text - assert 'identity' not in r.text - assert b.url == HTTPBIN + 'headers' - - r = b.location(HTTPBIN + 'gzip') - assert 'Firefox' in r.text - - -def test_redirects(): - """ - Check redirects are followed - """ - b = BaseBrowser() - b.location(HTTPBIN + 'redirect/1') - assert b.url == HTTPBIN + 'get' - - r = b.location(HTTPBIN + 'redirect/1') - assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1' - assert r.url == HTTPBIN + 'get' - - # Normal redirect chain - b.url = None - r = b.location(HTTPBIN + 'redirect/4') - assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1' - assert len(r.history) == 4 - assert r.history[3].request.url == HTTPBIN + 'redirect/1' - assert r.history[3].request.headers.get('Referer') == HTTPBIN + 'redirect/2' - assert r.history[2].request.url == HTTPBIN + 'redirect/2' - assert r.history[2].request.headers.get('Referer') == HTTPBIN + 'redirect/3' - assert r.history[1].request.url == HTTPBIN + 'redirect/3' - assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/4' - assert r.history[0].request.url == HTTPBIN + 'redirect/4' - assert r.history[0].request.headers.get('Referer') is None - assert r.url == HTTPBIN + 'get' - - # Disable all referers - r = b.location(HTTPBIN + 'redirect/2', referrer=False) - assert json.loads(r.text)['headers'].get('Referer') is None - assert len(r.history) == 2 - assert r.history[1].request.headers.get('Referer') is None - assert r.history[0].request.headers.get('Referer') is None - assert r.url == HTTPBIN + 'get' - - # Only overrides first referer - r = b.location(HTTPBIN + 'redirect/2', referrer='http://example.com/') - assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'redirect/1' - assert len(r.history) == 2 - assert r.history[1].request.headers.get('Referer') == HTTPBIN + 'redirect/2' - assert r.history[0].request.headers.get('Referer') == 'http://example.com/' - assert r.url == HTTPBIN + 'get' - - # Don't follow - r = b.location(HTTPBIN + 'redirect/2', allow_redirects=False) - assert len(r.history) == 0 - assert r.url == HTTPBIN + 'redirect/2' - assert r.status_code == 302 - - -def test_redirect2(): - """ - More redirect tests - """ - rurl = choice(REDIRECTS1) - b = BaseBrowser() - r = b.location(rurl) - assert r.url == HTTPBIN + 'get' - assert json.loads(r.text)['headers'].get('Referer') == rurl - # TODO referrer privacy settings - - -def test_brokenpost(): - """ - Test empty POST and redirect after POST - """ - raise SkipTest('PostBin is disabled') - try: - b = BaseBrowser() - # postbin is picky with empty posts. that's good! - r = b.location(POSTBIN, {}) - # ensures empty data (but not None) does a POST - assert r.request.method == 'POST' - # ensure we were redirected after submitting a post - assert len(r.url) >= len(POSTBIN) - # send a POST with data - b.location(r.url, {'hello': 'world'}) - r = b.location(r.url + '/feed') - assert 'hello' in r.text - assert 'world' in r.text - except HTTPError as e: - if str(e).startswith('503 '): - raise SkipTest('Quota exceeded') - else: - raise - - -def _getrqbin(b): - """ - Get a RequestBin - """ - # empty POST - r = b.location(REQUESTBIN + 'api/v1/bins', '') - name = json.loads(r.text)['name'] - assert name - return name - - -def test_smartpost(): - """ - Checks we use POST or GET depending on the parameters - """ - b = BaseBrowser() - n = _getrqbin(b) - - r = b.location(REQUESTBIN + n) - assert 'ok' in r.text - r = b.location(REQUESTBIN + n + '?inspect') - assert 'GET /%s' % n in r.text - - r = b.location(REQUESTBIN + n, {'hello': 'world'}) - assert 'ok' in r.text - r = b.location(REQUESTBIN + n + '?inspect') - assert 'POST /%s' % n in r.text - assert 'hello' in r.text - assert 'world' in r.text - - -def test_weboob(): - """ - Test the Weboob Profile - """ - class BooBrowser(BaseBrowser): - PROFILE = Weboob('0.0') - - b = BooBrowser() - r = b.location(HTTPBIN + 'headers') - assert 'weboob/0.0' in r.text - assert 'identity' in r.text - - -def test_relative(): - """ - Check relative URL / domain handling - """ - b = DomainBrowser() - b.location(HTTPBIN) - b.location('/ip') - assert b.url == HTTPBIN + 'ip' - - assert b.absurl('/ip') == HTTPBIN + 'ip' - b.location(REQUESTBIN) - assert b.absurl('/ip') == REQUESTBIN + 'ip' - b.BASEURL = HTTPBIN + 'aaaaaa' - assert b.absurl('/ip') == HTTPBIN + 'ip' - assert b.absurl('ip') == HTTPBIN + 'ip' - assert b.absurl('/ip', False) == REQUESTBIN + 'ip' - b.BASEURL = HTTPBIN + 'aaaaaa/' - assert b.absurl('/') == HTTPBIN - assert b.absurl('/bb') == HTTPBIN + 'bb' - assert b.absurl('') == HTTPBIN + 'aaaaaa/' - assert b.absurl('bb') == HTTPBIN + 'aaaaaa/bb' - - # Give an absolute URL, should get it unaltered - b.BASEURL = 'http://example.net/' - assert b.absurl('http://example.com/aaa/bbb') == 'http://example.com/aaa/bbb' - assert b.absurl('https://example.com/aaa/bbb') == 'https://example.com/aaa/bbb' - - # Schemeless absolute URL - assert b.absurl('//example.com/aaa/bbb') == 'http://example.com/aaa/bbb' - b.BASEURL = 'https://example.net/' - assert b.absurl('//example.com/aaa/bbb') == 'https://example.com/aaa/bbb' - - -def test_allow_url(): - b = DomainBrowser() - b.RESTRICT_URL = True - assert b.url_allowed('http://example.com/') - assert b.url_allowed('http://example.net/') - - b.BASEURL = 'http://example.com/' - assert b.url_allowed('http://example.com/') - assert b.url_allowed('http://example.com/aaa') - assert not b.url_allowed('https://example.com/') - assert not b.url_allowed('http://example.net/') - assert not b.url_allowed('http://') - - b.BASEURL = 'https://example.com/' - assert not b.url_allowed('http://example.com/') - assert not b.url_allowed('http://example.com/aaa') - assert b.url_allowed('https://example.com/') - assert b.url_allowed('https://example.com/aaa/bbb') - - b.RESTRICT_URL = ['https://example.com/', 'http://example.com/'] - assert b.url_allowed('http://example.com/aaa/bbb') - assert b.url_allowed('https://example.com/aaa/bbb') - assert not b.url_allowed('http://example.net/aaa/bbb') - assert not b.url_allowed('https://example.net/aaa/bbb') - - assert_raises(UrlNotAllowed, b.location, 'http://example.net/') - assert_raises(UrlNotAllowed, b.open, 'http://example.net/') - - -def test_changereq(): - """ - Test overloading request defaults - """ - b = BaseBrowser() - r = b.location(HTTPBIN + 'headers', method='HEAD') - assert r.text is None - - r = b.location(HTTPBIN + 'put', method='PUT', data={'hello': 'world'}) - assert 'hello' in r.text - assert 'world' in r.text - - r = b.location(HTTPBIN + 'headers', headers={'User-Agent': 'Web Out of Browsers'}) - assert 'Web Out of Browsers' in r.text - assert 'Firefox' not in r.text - - -def test_referrer(): - """ - Test automatic referrer setting - """ - b = BaseBrowser() - r = b.location(HTTPBIN + 'get') - assert 'Referer' not in json.loads(r.text)['headers'] - r = b.location(HTTPBIN + 'headers') - assert json.loads(r.text)['headers'].get('Referer') == HTTPBIN + 'get' - r = b.location(HTTPBIN + 'headers') - assert 'Referer' not in json.loads(r.text)['headers'] - - # Force another referrer - r = b.location(HTTPBIN + 'get') - r = b.location(HTTPBIN + 'headers', referrer='http://example.com/') - assert json.loads(r.text)['headers'].get('Referer') == 'http://example.com/' - - # Force no referrer - r = b.location(HTTPBIN + 'get') - r = b.location(HTTPBIN + 'headers', referrer=False) - assert 'Referer' not in json.loads(r.text)['headers'] - - assert b.get_referrer('https://example.com/', 'http://example.com/') is None - - -def test_cookiepolicy(): - """ - Test cookie parsing and processing - """ - policy = CookiePolicy() - - def bc(data): - """ - build one cookie, and normalize it - """ - cs = Cookies() - cs.parse_response(data) - for c in cs.itervalues(): - policy.normalize_cookie(c, 'http://example.com/') - return c - - # parse max-age - assert bc('__bwid=58244366; max-age=42; path=/').expires - - # security for received cookies - assert policy.can_set(bc('k=v; domain=www.example.com'), - 'http://www.example.com/') - assert policy.can_set(bc('k=v; domain=sub.example.com'), - 'http://www.example.com/') - assert policy.can_set(bc('k=v; domain=sub.example.com'), - 'http://example.com/') - assert policy.can_set(bc('k=v; domain=.example.com'), - 'http://example.com/') - assert policy.can_set(bc('k=v; domain=www.example.com'), - 'http://example.com/') - assert not policy.can_set(bc('k=v; domain=example.com'), - 'http://example.net/') - assert not policy.can_set(bc('k=v; domain=.net'), - 'http://example.net/') - assert not policy.can_set(bc('k=v; domain=www.example.net'), - 'http://www.example.com/') - assert not policy.can_set(bc('k=v; domain=wwwexample.com'), - 'http://example.com/') - assert not policy.can_set(bc('k=v; domain=.example.com'), - 'http://wwwexample.com/') - - # pattern matching domains - assert not policy.domain_match('example.com', 's.example.com') - assert policy.domain_match('.example.com', 's.example.com') - assert not policy.domain_match('.example.com', 'example.com') # yep. - assert policy.domain_match('s.example.com', 's.example.com') - assert not policy.domain_match('s.example.com', 's2.example.com') - assert policy.domain_match_list(True, 'example.com') - assert not policy.domain_match_list([], 'example.com') - assert policy.domain_match_list(['example.net', 'example.com'], 'example.com') - assert not policy.domain_match_list(['example.net', 'example.org'], 'example.com') - - -def test_cookiejar(): - """ - Test adding, removing, finding cookies to and from the jar - """ - def bc(data): - """ - build one cookie - """ - cs = Cookies() - cs.parse_response(data) - for c in cs.itervalues(): - return c - - # filtering cookies - cookie0 = bc('j=v; domain=www.example.com; path=/') - cookie1 = bc('k=v1; domain=www.example.com; path=/; secure') - cookie2 = bc('k=v2; domain=.example.com; path=/') - cookie3 = bc('k=v3; domain=www.example.com; path=/lol/cat/') - cookie4 = bc('k=v4; domain=www.example.com; path=/lol/') - - cj = CookieJar(CookiePolicy()) - cj.set(cookie0) - cj.set(cookie1) - cj.set(cookie2) - cj.set(cookie3) - cj.set(cookie4) - - assert len(cj.all()) == 5 # all cookies - assert len(cj.all(path='/')) == 3 # all cookies except the ones with deep paths - assert len(cj.all(name='k')) == 4 # this excludes cookie0 - assert len(cj.all(domain='example.com')) == 0 # yep - assert len(cj.all(domain='s.example.com')) == 1 # cookie2 - assert len(cj.all(domain='.example.com')) == 1 # cookie2 (exact match) - assert len(cj.all(domain='www.example.com')) == 5 # all cookies - assert len(cj.all(domain='www.example.com', path="/lol/")) == 4 # all + cookie4 - assert len(cj.all(domain='www.example.com', path="/lol/cat")) == 4 # all + cookie4 - assert len(cj.all(domain='www.example.com', path="/lol/cat/")) == 5 # all + cookie4 + cookie3 - assert len(cj.all(secure=True)) == 1 # cookie1 - assert len(cj.all(secure=False)) == 4 # all except cookie1 - - assert cj.get(domain='www.example.com', path="/lol/") is cookie4 - assert cj.get(domain='www.example.com', path="/lol/cat/") is cookie3 - assert cj.get(domain='www.example.com', path="/") is cookie1 - assert cj.get(name='j', domain='www.example.com', path="/") is cookie0 - assert cj.get(name='k', domain='www.example.com', path="/") is cookie1 - assert cj.get(name='k', domain='s.example.com', path="/") is cookie2 - assert cj.get(name='k', domain='www.example.com', path="/aaa") is cookie1 - assert cj.get(domain='www.example.com', path='/') is cookie1 - assert cj.get(domain='www.example.com', path='/', secure=False) is cookie0 - assert cj.get(domain='www.example.com', path='/', secure=True) is cookie1 - - # this is just not API choice, but how browsers act - assert cj.for_request('http://www.example.com/') == {'k': 'v2', 'j': 'v'} - assert cj.for_request('https://www.example.com/') == {'k': 'v1', 'j': 'v'} - assert cj.for_request('http://www.example.com/lol/') == {'k': 'v4', 'j': 'v'} - assert cj.for_request('http://s.example.com/lol/') == {'k': 'v2'} - assert cj.for_request('http://example.com/lol/') == {} - - # remove/add/replace - assert cj.remove(cookie1) is True - assert cj.get(secure=True) is None - cj.set(cookie1) - assert cj.get(secure=True) is cookie1 - cookie5 = bc('k=w; domain=www.example.com; path=/; secure') - cj.set(cookie5) - assert cj.get(secure=True) is cookie5 - assert len(cj.all(secure=True)) == 1 - # not the same cookie, but the same identifiers - assert cj.remove(cookie1) is True - - cj.clear() - cookie6 = bc('e1=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 1970 00:00:01 GMT;') - cookie7 = bc('e2=1; domain=www.example.com; path=/; Expires=Thu, 01 Jan 2010 00:00:01 GMT;') - now = datetime(2000, 01, 01) - cj.set(cookie0) - cj.set(cookie6) - cj.set(cookie7) - - assert cj.for_request('http://www.example.com/', now) == {'e2': '1', 'j': 'v'} - assert cj.for_request('http://www.example.com/', datetime(2020, 01, 01)) == {'j': 'v'} - - assert len(cj.all()) == 3 - cj.flush(now) - assert len(cj.all()) == 2 - assert cj.remove(cookie6) is False # already removed - cj.flush(now, session=True) - assert len(cj.all()) == 1 - - -def test_buildcookie(): - """ - Test easy cookie building - """ - cj = CookieJar(CookiePolicy()) - c = cj.build('kk', 'vv', 'http://example.com/') - assert c.domain == 'example.com' - assert not c.secure - assert c.path == '/' - - c = cj.build('kk', 'vv', 'http://example.com/', path='/plop/', wildcard=True) - assert c.domain == '.example.com' - - assert c.path == '/plop/' - c = cj.build('kk', 'vv', 'http://example.com/plop/') - assert c.path == '/plop/' - c = cj.build('kk', 'vv', 'http://example.com/plop/plap') - assert c.path == '/plop/' - c = cj.build('kk', 'vv', 'http://example.com/plop/?http://example.net/plip/') - assert c.path == '/plop/' - assert c.domain == 'example.com' - c = cj.build('kk', 'vv', 'http://example.com/plop/plap', path='/') - assert c.path == '/' - - c = cj.build('kk', 'vv', 'https://example.com/') - assert c.domain == 'example.com' - assert c.secure - - # check the cookie works - c.name = 'k' - c.value = 'v' - cj.set(c) - assert cj.for_request('https://example.com/') == {'k': 'v'} - assert cj.for_request('http://example.com/') == {} - - -def test_cookienav(): - """ - Test browsing while getting new cookies - """ - b = BaseBrowser() - r = b.location(HTTPBIN + 'cookies') - assert len(json.loads(r.text)['cookies']) == 0 - - r = b.location(HTTPBIN + 'cookies/set/hello/world') - assert len(json.loads(r.text)['cookies']) == 1 - assert json.loads(r.text)['cookies']['hello'] == 'world' - r = b.location(HTTPBIN + 'cookies/set/hello2/world2') - assert len(json.loads(r.text)['cookies']) == 2 - assert json.loads(r.text)['cookies']['hello2'] == 'world2' - - r = b.location(REQUESTBIN) - assert 'session' in r.cookies # requestbin should give this by default - assert 'hello' not in r.cookies # we didn't send the wrong cookie - # return to httpbin, check we didn't give the wrong cookie - r = b.location(HTTPBIN + 'cookies') - assert 'session' not in json.loads(r.text)['cookies'] - - # override cookies temporarily - r = b.location(HTTPBIN + 'cookies', cookies={'bla': 'bli'}) - assert len(json.loads(r.text)['cookies']) == 1 - assert json.loads(r.text)['cookies']['bla'] == 'bli' - # reload, the "fake" cookie should not be there - r = b.location(HTTPBIN + 'cookies') - assert len(json.loads(r.text)['cookies']) == 2 - assert 'bla' not in json.loads(r.text)['cookies'] - - -def test_cookieredirect(): - """ - Test cookie redirection security - """ - rurl = choice(REDIRECTS2) - - b = BaseBrowser() - r = b.location(HTTPBIN + 'cookies') - assert len(json.loads(r.text)['cookies']) == 0 - - # add a cookie to the redirection service domain (not the target!) - cookie = b.cookies.build('k', 'v1', rurl) - b.cookies.set(cookie) - r = b.location(rurl) - assert r.url == HTTPBIN + 'cookies' - # the cookie was not forwarded; it's for another domain - # this is important for security reasons, - # and because python-requests tries to do it by default! - assert len(json.loads(r.text)['cookies']) == 0 - - # add a cookie for the target - cookie = b.cookies.build('k', 'v2', HTTPBIN) - b.cookies.set(cookie) - r = b.location(rurl) - assert r.url == HTTPBIN + 'cookies' - assert len(json.loads(r.text)['cookies']) == 1 - assert json.loads(r.text)['cookies']['k'] == 'v2' - - # check all cookies sent in the request chain - assert r.cookies == {'k': 'v2'} - assert r.history[0].cookies['k'] == 'v1' # some services add other cookies - - -def test_cookie_srv1(): - """ - Test cookie in real conditions (service 1) - """ - class TestBrowser(DomainBrowser): - BASEURL = 'http://www.mria-arim.ca/' - - b = TestBrowser() - b.location('testCookies.asp') - # TODO this is also a good place to test form parsing/submission - b.location('testCookies.asp', {'makeMe': 'Create Cookie'}) - r = b.location('testCookies.asp', {'testMe': 'Test Browser'}) - assert 'Your Browser accepts cookies' in r.text - - -def test_cookie_srv2(): - """ - Test cookie in real conditions (service 2) - """ - def randtext(): - return ''.join(choice(string.digits + string.letters) for _ in xrange(32)) - - class TestBrowser(DomainBrowser): - BASEURL = 'http://www.html-kit.com/tools/cookietester/' - - def cookienum(self): - return int(re.search('Number of cookies received: (\d+)', - self.response.text).groups()[0]) - - def mypost(self, **data): - return self.location('', data) - - b = TestBrowser() - b.home() - assert b.cookienum() == 0 - - r1 = randtext() - r1v = randtext() - - # TODO this is also a good place to test form parsing/submission - # get a new cookie - r = b.mypost(cn=r1, cv=r1v) - assert b.cookienum() == 1 - assert r1 in r.text - assert r1v in r.text - - # cookie deletion - r = b.mypost(cr=r1) - assert b.cookienum() == 0 - assert r1 not in r.text - assert r1v not in r.text - - # om nom nom - b.mypost(cn=randtext(), cv=randtext()) - b.mypost(cn=randtext(), cv=randtext()) - b.mypost(cn=randtext(), cv=randtext()) - b.mypost(cn=randtext(), cv=randtext()) - assert b.cookienum() == 4