force UTF-8 encoding with the standard mechanism

This commit is contained in:
Romain Bignon 2015-08-03 08:48:52 +02:00
commit 8798312e04

View file

@ -19,8 +19,6 @@
import calendar import calendar
from StringIO import StringIO
import lxml.html as html
from datetime import datetime from datetime import datetime
from decimal import Decimal from decimal import Decimal
@ -38,12 +36,7 @@ class FormatDate(Filter):
class BadUTF8Page(HTMLPage): class BadUTF8Page(HTMLPage):
def __init__(self, browser, response, *args, **kwargs): ENCODING = 'UTF-8'
# XXX it is volontary the parent class of HTMLPage's constructor which
# is called, but that's ugly.
super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
parser = html.HTMLParser(encoding='UTF-8')
self.doc = html.parse(StringIO(response.content), parser)
class DetailsPage(LoggedPage, BadUTF8Page): class DetailsPage(LoggedPage, BadUTF8Page):