add HTMLPage.REFRESH_MAX parameter (disabled by default) to configure handle of refreshes
This commit is contained in:
parent
da859b36a1
commit
6a108221db
1 changed files with 18 additions and 4 deletions
|
|
@ -457,6 +457,14 @@ class HTMLPage(Page):
|
||||||
The class to instanciate when using :meth:`HTMLPage.get_form`. Default to :class:`Form`.
|
The class to instanciate when using :meth:`HTMLPage.get_form`. Default to :class:`Form`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
REFRESH_MAX = None
|
||||||
|
"""
|
||||||
|
When handling a "Refresh" meta header, the page considers it only if the sleep
|
||||||
|
time in lesser than this value.
|
||||||
|
|
||||||
|
Default value is None, means refreshes aren't handled.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
import lxml.html as html
|
import lxml.html as html
|
||||||
ns = html.etree.FunctionNamespace(None)
|
ns = html.etree.FunctionNamespace(None)
|
||||||
|
|
@ -469,16 +477,22 @@ class HTMLPage(Page):
|
||||||
self.handle_refresh()
|
self.handle_refresh()
|
||||||
|
|
||||||
def handle_refresh(self):
|
def handle_refresh(self):
|
||||||
|
if self.REFRESH_MAX is None:
|
||||||
|
return
|
||||||
|
|
||||||
for refresh in self.doc.xpath('//head/meta[@http-equiv="Refresh"]'):
|
for refresh in self.doc.xpath('//head/meta[@http-equiv="Refresh"]'):
|
||||||
m = self.browser.REFRESH_RE.match(refresh.get('content', ''))
|
m = self.browser.REFRESH_RE.match(refresh.get('content', ''))
|
||||||
if not m:
|
if not m:
|
||||||
continue
|
continue
|
||||||
url = urlparse.urljoin(self.url, m.groupdict().get('url', None))
|
url = urlparse.urljoin(self.url, m.groupdict().get('url', None))
|
||||||
|
sleep = float(m.groupdict()['sleep'])
|
||||||
|
|
||||||
self.logger.info('Redirecting to %s', url)
|
if sleep <= self.REFRESH_MAX:
|
||||||
self.browser.location(url)
|
self.logger.info('Redirecting to %s', url)
|
||||||
break
|
self.browser.location(url)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.logger.debug('Do not refresh to %s because %s > REFRESH_MAX(%s)' % (url, sleep, self.REFRESH_MAX))
|
||||||
|
|
||||||
def define_xpath_functions(self, ns):
|
def define_xpath_functions(self, ns):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue