From 1b2031796b5913f4fb10f6645511e1a11cf000a1 Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Sun, 5 May 2013 20:43:58 +0200 Subject: [PATCH] boobot: Force shitty Windows encoding Because web browsers actually do that! Test URL: http://www.universfreebox.com/article/20715/Deutche-Telekom-met-fin-a-l-internet-illimite-bientot-le-tour-de-la-France --- contrib/boobot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/boobot.py b/contrib/boobot.py index f4e7b62c..0325c52b 100755 --- a/contrib/boobot.py +++ b/contrib/boobot.py @@ -101,7 +101,7 @@ class BoobotBrowser(StandardBrowser): title = None if is_html: r = self.openurl(url) - encoding = EncodingFinder('windows-1252').encoding(r) + encoding = EncodingFinder('windows-1252').encoding(r).lower().replace('iso-8859-1', 'windows-1252') h = self.get_document(r, parser='lxml', encoding=encoding) for title in h.xpath('//head/title'): title = to_unicode(title.text_content())