diff --git a/weboob/tools/browser/browser.py b/weboob/tools/browser/browser.py index ed779a9f..1d38e0ec 100644 --- a/weboob/tools/browser/browser.py +++ b/weboob/tools/browser/browser.py @@ -397,6 +397,29 @@ class BaseBrowser(mechanize.Browser): def get_document(self, result): return self.parser.parse(result, self.ENCODING) + # DO NOT ENABLE THIS FUCKING PEACE OF CODE EVEN IF IT WOULD BE BETTER + # TO SANITARIZE FUCKING HTML. + #def _set_response(self, response, *args, **kwargs): + # import time + # if response and hasattr(response, 'set_data'): + # print time.time() + # r = response.read() + # start = 0 + # end = 0 + # new = '' + # lowr = r.lower() + # start = lowr[end:].find('= end: + # start_stop = start + lowr[start:].find('>') + 1 + # new += r[end:start_stop] + # end = start + lowr[start:].find('') + # new += r[start_stop:end].replace('<', '<').replace('>', '>') + # start = end + lowr[end:].find('= '5.0': + # feedparser >= 5.0 replaces this regexp on sgmllib and mechanize < 2.0 + # fails with malformated webpages. + import sgmllib + import re + sgmllib.endbracket = re.compile('[<>]') __all__ = ['Entry', 'Newsfeed'] @@ -59,8 +64,6 @@ class Entry: self.content.append(i.value) elif self.summary: self.content.append(self.summary) - else: - self.content = None if rssid_func: self.id = rssid_func(self)