browser2: Add an option to convert POST data to the proper encoding

And autodetect it on forms. There is no other way to know what is the
expected encoding.
This commit is contained in:
Laurent Bachelier 2014-05-19 01:01:25 +02:00
commit 14b1b56914
3 changed files with 15 additions and 8 deletions

View file

@ -63,13 +63,10 @@ class CaptchaPage(HTMLPage):
class PostPage(HTMLPage):
# TODO handle encoding in Browser2
def post(self, paste, expiration=None):
encoding = 'ISO-8859-1'
form = self.get_form(name='editor')
form['code2'] = paste.contents.encode(encoding)
form['poster'] = paste.title.encode(encoding)
form['code2'] = paste.contents
form['poster'] = paste.title
if expiration:
form['expiry'] = expiration
form.submit()

View file

@ -257,6 +257,7 @@ class BaseBrowser(object):
verify=None,
cert=None,
proxies=None,
data_encoding=None,
**kwargs):
"""
Make an HTTP request like a browser does:
@ -287,7 +288,7 @@ class BaseBrowser(object):
:rtype: :class:`requests.Response`
"""
req = self.build_request(url, referrer, **kwargs)
req = self.build_request(url, referrer, data_encoding=data_encoding, **kwargs)
preq = self.prepare_request(req)
if hasattr(preq, '_cookies'):
@ -321,7 +322,7 @@ class BaseBrowser(object):
return response
def build_request(self, url, referrer=None, **kwargs):
def build_request(self, url, referrer=None, data_encoding=None, **kwargs):
"""
Does the same job as open(), but returns a Request without
submitting it.
@ -340,6 +341,13 @@ class BaseBrowser(object):
else:
req.method = 'GET'
# convert unicode strings to proper encoding
if isinstance(req.data, unicode) and data_encoding:
req.data = req.data.encode(data_encoding)
if isinstance(req.data, dict) and data_encoding:
req.data = {k: v.encode(data_encoding) if isinstance(v, unicode) else v
for k, v in req.data.iteritems()}
if referrer is None:
referrer = self.get_referrer(self.url, url)
if referrer:

View file

@ -512,6 +512,7 @@ class Form(OrderedDict):
"""
Submit the form and tell browser to be located to the new page.
"""
kwargs.setdefault('data_encoding', self.page.encoding)
return self.page.browser.location(self.request, **kwargs)
@ -554,7 +555,8 @@ class HTMLPage(BasePage):
def __init__(self, browser, response, *args, **kwargs):
super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
parser = html.HTMLParser(encoding=self.ENCODING or response.encoding)
self.encoding = self.ENCODING or response.encoding
parser = html.HTMLParser(encoding=self.encoding)
self.doc = html.parse(BytesIO(response.content), parser)
def get_form(self, xpath='//form', name=None, nr=None):