browser2: Add an option to convert POST data to the proper encoding

And autodetect it on forms. There is no other way to know what is the
expected encoding.
This commit is contained in:
Laurent Bachelier 2014-05-19 01:01:25 +02:00
commit 14b1b56914
3 changed files with 15 additions and 8 deletions

View file

@ -63,13 +63,10 @@ class CaptchaPage(HTMLPage):
class PostPage(HTMLPage): class PostPage(HTMLPage):
# TODO handle encoding in Browser2
def post(self, paste, expiration=None): def post(self, paste, expiration=None):
encoding = 'ISO-8859-1'
form = self.get_form(name='editor') form = self.get_form(name='editor')
form['code2'] = paste.contents.encode(encoding) form['code2'] = paste.contents
form['poster'] = paste.title.encode(encoding) form['poster'] = paste.title
if expiration: if expiration:
form['expiry'] = expiration form['expiry'] = expiration
form.submit() form.submit()

View file

@ -257,6 +257,7 @@ class BaseBrowser(object):
verify=None, verify=None,
cert=None, cert=None,
proxies=None, proxies=None,
data_encoding=None,
**kwargs): **kwargs):
""" """
Make an HTTP request like a browser does: Make an HTTP request like a browser does:
@ -287,7 +288,7 @@ class BaseBrowser(object):
:rtype: :class:`requests.Response` :rtype: :class:`requests.Response`
""" """
req = self.build_request(url, referrer, **kwargs) req = self.build_request(url, referrer, data_encoding=data_encoding, **kwargs)
preq = self.prepare_request(req) preq = self.prepare_request(req)
if hasattr(preq, '_cookies'): if hasattr(preq, '_cookies'):
@ -321,7 +322,7 @@ class BaseBrowser(object):
return response return response
def build_request(self, url, referrer=None, **kwargs): def build_request(self, url, referrer=None, data_encoding=None, **kwargs):
""" """
Does the same job as open(), but returns a Request without Does the same job as open(), but returns a Request without
submitting it. submitting it.
@ -340,6 +341,13 @@ class BaseBrowser(object):
else: else:
req.method = 'GET' req.method = 'GET'
# convert unicode strings to proper encoding
if isinstance(req.data, unicode) and data_encoding:
req.data = req.data.encode(data_encoding)
if isinstance(req.data, dict) and data_encoding:
req.data = {k: v.encode(data_encoding) if isinstance(v, unicode) else v
for k, v in req.data.iteritems()}
if referrer is None: if referrer is None:
referrer = self.get_referrer(self.url, url) referrer = self.get_referrer(self.url, url)
if referrer: if referrer:

View file

@ -512,6 +512,7 @@ class Form(OrderedDict):
""" """
Submit the form and tell browser to be located to the new page. Submit the form and tell browser to be located to the new page.
""" """
kwargs.setdefault('data_encoding', self.page.encoding)
return self.page.browser.location(self.request, **kwargs) return self.page.browser.location(self.request, **kwargs)
@ -554,7 +555,8 @@ class HTMLPage(BasePage):
def __init__(self, browser, response, *args, **kwargs): def __init__(self, browser, response, *args, **kwargs):
super(HTMLPage, self).__init__(browser, response, *args, **kwargs) super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
parser = html.HTMLParser(encoding=self.ENCODING or response.encoding) self.encoding = self.ENCODING or response.encoding
parser = html.HTMLParser(encoding=self.encoding)
self.doc = html.parse(BytesIO(response.content), parser) self.doc = html.parse(BytesIO(response.content), parser)
def get_form(self, xpath='//form', name=None, nr=None): def get_form(self, xpath='//form', name=None, nr=None):