boobot: Try to use HTML encoding

This commit is contained in:
Laurent Bachelier 2013-06-23 20:26:12 +02:00
commit b6021ec90d

View file

@ -116,6 +116,14 @@ class BoobotBrowser(StandardBrowser):
hsize = self.human_size(size)
r.seek(0)
encoding = EncodingFinder('windows-1252').encoding(r).lower()
try:
h = self.get_document(r, parser='lxml', encoding=encoding)
for meta in h.xpath('//head/meta'):
encoding = meta.attrib.get('charset', encoding).lower()
except Exception as e:
print e
finally:
r.seek(0)
if encoding == 'iso-8859-1':
encoding = 'windows-1252'
try: