From fca0ab8733a117a041411f4508fb19f0ac264390 Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Thu, 13 Mar 2014 22:00:50 +0100 Subject: [PATCH] Quick and dirty -a and -I support for Browser2 --- weboob/tools/application/base.py | 5 ++++ weboob/tools/browser2/browser.py | 45 ++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/weboob/tools/application/base.py b/weboob/tools/application/base.py index 56853211..0903b035 100644 --- a/weboob/tools/application/base.py +++ b/weboob/tools/application/base.py @@ -328,6 +328,7 @@ class BaseApplication(object): if self.options.debug or self.options.save_responses: level = logging.DEBUG from weboob.tools.browser import StandardBrowser + from weboob.tools.browser2 import BaseBrowser as Browser2 StandardBrowser.DEBUG_MECHANIZE = True # required to actually display or save the stuff logger = logging.getLogger("mechanize") @@ -340,7 +341,9 @@ class BaseApplication(object): level = logging.WARNING if self.options.insecure: from weboob.tools.browser import StandardBrowser + from weboob.tools.browser2 import BaseBrowser as Browser2 StandardBrowser.INSECURE = True + Browser2.VERIFY = False # this only matters to developers if not self.options.debug and not self.options.save_responses: @@ -353,7 +356,9 @@ class BaseApplication(object): responses_dirname = tempfile.mkdtemp(prefix='weboob_session_') print >>sys.stderr, 'Debug data will be saved in this directory: %s' % responses_dirname StandardBrowser.SAVE_RESPONSES = True + Browser2.SAVE_RESPONSES = True StandardBrowser.responses_dirname = responses_dirname + Browser2.responses_dirname = responses_dirname handlers.append(self.create_logging_file_handler(os.path.join(responses_dirname, 'debug.log'))) # file logger diff --git a/weboob/tools/browser2/browser.py b/weboob/tools/browser2/browser.py index 223011c4..9159db1e 100644 --- a/weboob/tools/browser2/browser.py +++ b/weboob/tools/browser2/browser.py @@ -21,6 +21,10 @@ from __future__ import absolute_import import re from urlparse import urlparse, urljoin +import mimetypes +import os +import tempfile +import sys try: import requests @@ -121,7 +125,10 @@ class BaseBrowser(object): TIMEOUT = 10.0 REFRESH_MAX = 0.0 - def __init__(self, logger=None, proxy=None): + VERIFY = True + SAVE_RESPONSES = False + + def __init__(self, logger=None, proxy=None, responses_dirname=None): self.logger = getLogger('browser', logger) self._setup_session(self.PROFILE) if proxy is not None: @@ -129,20 +136,54 @@ class BaseBrowser(object): self.url = None self.response = None + self.responses_dirname = responses_dirname + self.responses_count = 0 + + def _save(self, response, warning=False, **kwargs): + if self.responses_dirname is None: + self.responses_dirname = tempfile.mkdtemp(prefix='weboob_session_') + print >>sys.stderr, 'Debug data will be saved in this directory: %s' % self.responses_dirname + elif not os.path.isdir(self.responses_dirname): + os.makedirs(self.responses_dirname) + # get the content-type, remove optionnal charset part + mimetype = response.headers.get('Content-Type', '').split(';')[0] + # due to http://bugs.python.org/issue1043134 + if mimetype == 'text/plain': + ext = '.txt' + else: + # try to get an extension (and avoid adding 'None') + ext = mimetypes.guess_extension(mimetype, False) or '' + response_filepath = os.path.join(self.responses_dirname, unicode(self.responses_count)+ext) + with open(response_filepath, 'w') as f: + f.write(response.content) + match_filepath = os.path.join(self.responses_dirname, 'url_response_match.txt') + with open(match_filepath, 'a') as f: + f.write('%s\t%s\n' % (response.url, os.path.basename(response_filepath))) + self.responses_count += 1 + + msg = u'Response saved to %s' % response_filepath + if warning: + self.logger.warning(msg) + else: + self.logger.info(msg) + def _setup_session(self, profile): """ Set up a python-requests session for our usage. """ session = requests.Session() + session.verify = self.VERIFY if self.TIMEOUT: session.timeout = self.TIMEOUT ## weboob only can provide proxy and HTTP auth options session.trust_env = False - # TODO connect config['verbose'] to our logger profile.setup_session(session) + if self.SAVE_RESPONSES: + session.hooks['response'].append(self._save) + self.session = session def location(self, url, **kwargs):