From 82f47bff88c29f98d382b0676484daab6096739b Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Fri, 16 May 2014 15:37:24 +0200 Subject: [PATCH] Allow forcing a Page content encoding --- weboob/tools/browser2/page.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/weboob/tools/browser2/page.py b/weboob/tools/browser2/page.py index 68e59cc2..86889dbe 100644 --- a/weboob/tools/browser2/page.py +++ b/weboob/tools/browser2/page.py @@ -517,9 +517,15 @@ class JsonPage(BasePage): class XMLPage(BasePage): + ENCODING = None + """ + Force a page encoding. + It is recommended to use None for autodetection. + """ + def __init__(self, browser, response, *args, **kwargs): super(XMLPage, self).__init__(browser, response, *args, **kwargs) - parser = etree.XMLParser(encoding=response.encoding) + parser = etree.XMLParser(encoding=self.ENCODING or response.encoding) self.doc = etree.parse(StringIO(response.content), parser) @@ -535,9 +541,15 @@ class HTMLPage(BasePage): """ FORM_CLASS = Form + ENCODING = None + """ + Force a page encoding. + It is recommended to use None for autodetection. + """ + def __init__(self, browser, response, *args, **kwargs): super(HTMLPage, self).__init__(browser, response, *args, **kwargs) - parser = html.HTMLParser(encoding=response.encoding) + parser = html.HTMLParser(encoding=self.ENCODING or response.encoding) self.doc = html.parse(StringIO(response.content), parser) def get_form(self, xpath='//form', name=None, nr=None):