From 14b1b56914249ee0f0c636674fbb48ba91c6abae Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Mon, 19 May 2014 01:01:25 +0200 Subject: [PATCH] browser2: Add an option to convert POST data to the proper encoding And autodetect it on forms. There is no other way to know what is the expected encoding. --- modules/pastealacon/browser.py | 7 ++----- weboob/tools/browser2/browser.py | 12 ++++++++++-- weboob/tools/browser2/page.py | 4 +++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/modules/pastealacon/browser.py b/modules/pastealacon/browser.py index f9c4e586..63993698 100644 --- a/modules/pastealacon/browser.py +++ b/modules/pastealacon/browser.py @@ -63,13 +63,10 @@ class CaptchaPage(HTMLPage): class PostPage(HTMLPage): - # TODO handle encoding in Browser2 def post(self, paste, expiration=None): - encoding = 'ISO-8859-1' - form = self.get_form(name='editor') - form['code2'] = paste.contents.encode(encoding) - form['poster'] = paste.title.encode(encoding) + form['code2'] = paste.contents + form['poster'] = paste.title if expiration: form['expiry'] = expiration form.submit() diff --git a/weboob/tools/browser2/browser.py b/weboob/tools/browser2/browser.py index cdbd0196..0ff2e715 100644 --- a/weboob/tools/browser2/browser.py +++ b/weboob/tools/browser2/browser.py @@ -257,6 +257,7 @@ class BaseBrowser(object): verify=None, cert=None, proxies=None, + data_encoding=None, **kwargs): """ Make an HTTP request like a browser does: @@ -287,7 +288,7 @@ class BaseBrowser(object): :rtype: :class:`requests.Response` """ - req = self.build_request(url, referrer, **kwargs) + req = self.build_request(url, referrer, data_encoding=data_encoding, **kwargs) preq = self.prepare_request(req) if hasattr(preq, '_cookies'): @@ -321,7 +322,7 @@ class BaseBrowser(object): return response - def build_request(self, url, referrer=None, **kwargs): + def build_request(self, url, referrer=None, data_encoding=None, **kwargs): """ Does the same job as open(), but returns a Request without submitting it. @@ -340,6 +341,13 @@ class BaseBrowser(object): else: req.method = 'GET' + # convert unicode strings to proper encoding + if isinstance(req.data, unicode) and data_encoding: + req.data = req.data.encode(data_encoding) + if isinstance(req.data, dict) and data_encoding: + req.data = {k: v.encode(data_encoding) if isinstance(v, unicode) else v + for k, v in req.data.iteritems()} + if referrer is None: referrer = self.get_referrer(self.url, url) if referrer: diff --git a/weboob/tools/browser2/page.py b/weboob/tools/browser2/page.py index 7835d074..c3d90511 100644 --- a/weboob/tools/browser2/page.py +++ b/weboob/tools/browser2/page.py @@ -512,6 +512,7 @@ class Form(OrderedDict): """ Submit the form and tell browser to be located to the new page. """ + kwargs.setdefault('data_encoding', self.page.encoding) return self.page.browser.location(self.request, **kwargs) @@ -554,7 +555,8 @@ class HTMLPage(BasePage): def __init__(self, browser, response, *args, **kwargs): super(HTMLPage, self).__init__(browser, response, *args, **kwargs) - parser = html.HTMLParser(encoding=self.ENCODING or response.encoding) + self.encoding = self.ENCODING or response.encoding + parser = html.HTMLParser(encoding=self.encoding) self.doc = html.parse(BytesIO(response.content), parser) def get_form(self, xpath='//form', name=None, nr=None):