From d5e1d22c2dc9da007d8190900cace62c904912ee Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Tue, 13 Apr 2010 15:03:50 +0200 Subject: [PATCH] bad fix when page is strangely encoded (for example fucking DLFP) --- weboob/tools/parser.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/weboob/tools/parser.py b/weboob/tools/parser.py index c8cab83d..cf35d4c8 100644 --- a/weboob/tools/parser.py +++ b/weboob/tools/parser.py @@ -93,5 +93,14 @@ def tostring(element): e.tail = element.tail for sub in element.getchildren(): e.append(sub) - s = ElementTree.tostring(e, 'utf-8') + + s = '' + # XXX OK if it doesn't work with utf-8, the result will be fucking ugly. + for encoding in ('utf-8', 'ISO-8859-1'): + try: + s = ElementTree.tostring(e, encoding) + except UnicodeError: + continue + else: + break return unicode(s)