handle Refresh HTTP header
This commit is contained in:
parent
04b7a9d572
commit
ecd3a9c5ab
1 changed files with 40 additions and 4 deletions
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import re
|
||||||
from urlparse import urlparse, urljoin
|
from urlparse import urlparse, urljoin
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -118,6 +119,7 @@ class BaseBrowser(object):
|
||||||
|
|
||||||
PROFILE = Firefox()
|
PROFILE = Firefox()
|
||||||
TIMEOUT = 10.0
|
TIMEOUT = 10.0
|
||||||
|
REFRESH_MAX = 0.0
|
||||||
|
|
||||||
def __init__(self, logger=None):
|
def __init__(self, logger=None):
|
||||||
self.logger = getLogger('browser', logger)
|
self.logger = getLogger('browser', logger)
|
||||||
|
|
@ -157,7 +159,7 @@ class BaseBrowser(object):
|
||||||
self.url = self.response.url
|
self.url = self.response.url
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def open(self, url, referrer=None, **kwargs):
|
def open(self, url, referrer=None, allow_redirects=True, **kwargs):
|
||||||
"""
|
"""
|
||||||
Make an HTTP request like a browser does:
|
Make an HTTP request like a browser does:
|
||||||
* follow redirects (unless disabled)
|
* follow redirects (unless disabled)
|
||||||
|
|
@ -188,6 +190,9 @@ class BaseBrowser(object):
|
||||||
|
|
||||||
:rtype: :class:`requests.Response`
|
:rtype: :class:`requests.Response`
|
||||||
"""
|
"""
|
||||||
|
return self._open(url, referrer, allow_redirects, **kwargs)
|
||||||
|
|
||||||
|
def _open(self, url, referrer=None, allow_redirects=True, **kwargs):
|
||||||
if isinstance(url, requests.Request):
|
if isinstance(url, requests.Request):
|
||||||
req = url
|
req = url
|
||||||
url = req.url
|
url = req.url
|
||||||
|
|
@ -196,7 +201,7 @@ class BaseBrowser(object):
|
||||||
|
|
||||||
# guess method
|
# guess method
|
||||||
if req.method is None:
|
if req.method is None:
|
||||||
if req.data is None:
|
if req.data:
|
||||||
req.method = 'POST'
|
req.method = 'POST'
|
||||||
else:
|
else:
|
||||||
req.method = 'GET'
|
req.method = 'GET'
|
||||||
|
|
@ -217,7 +222,38 @@ class BaseBrowser(object):
|
||||||
preq = self.session.prepare_request(req)
|
preq = self.session.prepare_request(req)
|
||||||
|
|
||||||
# call python-requests
|
# call python-requests
|
||||||
response = self.session.send(preq)
|
response = self.session.send(preq, allow_redirects=allow_redirects)
|
||||||
|
|
||||||
|
if allow_redirects:
|
||||||
|
response = self.handle_refresh(response)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
REFRESH_RE = re.compile("^(?P<sleep>[\d\.]+)(; url=[\"']?(?P<url>.*?)[\"']?)?$", re.IGNORECASE)
|
||||||
|
def handle_refresh(self, response):
|
||||||
|
"""
|
||||||
|
Called by _open, to handle Refresh HTTP header.
|
||||||
|
|
||||||
|
It only redirect to the refresh URL if the sleep time is inferior to
|
||||||
|
REFRESH_MAX.
|
||||||
|
"""
|
||||||
|
if not 'Refresh' in response.headers:
|
||||||
|
return response
|
||||||
|
|
||||||
|
m = self.REFRESH_RE.match(response.headers['Refresh'])
|
||||||
|
if m:
|
||||||
|
# XXX perhaps we should not redirect if the refresh url is equal to the current url.
|
||||||
|
url = m.groupdict().get('url', None) or response.request.url
|
||||||
|
sleep = float(m.groupdict()['sleep'])
|
||||||
|
|
||||||
|
if sleep <= self.REFRESH_MAX:
|
||||||
|
self.logger.debug('Refresh to %s' % url)
|
||||||
|
return self._open(url)
|
||||||
|
else:
|
||||||
|
self.logger.debug('Do not refresh to %s because %s > REFRESH_MAX(%s)' % (url, sleep, self.REFRESH_MAX))
|
||||||
|
return response
|
||||||
|
|
||||||
|
self.logger.warning('Unable to handle refresh "%s"' % response.headers['Refresh'])
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
@ -332,7 +368,7 @@ class DomainBrowser(BaseBrowser):
|
||||||
req = url
|
req = url
|
||||||
return super(DomainBrowser, self).open(req, *args, **kwargs)
|
return super(DomainBrowser, self).open(req, *args, **kwargs)
|
||||||
|
|
||||||
def home(self):
|
def go_home(self):
|
||||||
"""
|
"""
|
||||||
Go to the "home" page, usually the BASEURL.
|
Go to the "home" page, usually the BASEURL.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue