fix to remove fucking hack from feedparser >= 5.0 (closes #649)

This commit is contained in:
Romain Bignon 2011-04-24 16:56:49 +02:00
commit fa37ef38e3
2 changed files with 29 additions and 3 deletions

View file

@ -397,6 +397,29 @@ class BaseBrowser(mechanize.Browser):
def get_document(self, result):
return self.parser.parse(result, self.ENCODING)
# DO NOT ENABLE THIS FUCKING PEACE OF CODE EVEN IF IT WOULD BE BETTER
# TO SANITARIZE FUCKING HTML.
#def _set_response(self, response, *args, **kwargs):
# import time
# if response and hasattr(response, 'set_data'):
# print time.time()
# r = response.read()
# start = 0
# end = 0
# new = ''
# lowr = r.lower()
# start = lowr[end:].find('<script')
# while start >= end:
# start_stop = start + lowr[start:].find('>') + 1
# new += r[end:start_stop]
# end = start + lowr[start:].find('</script>')
# new += r[start_stop:end].replace('<', '&lt;').replace('>', '&gt;')
# start = end + lowr[end:].find('<script')
# new += r[end:]
# response.set_data(new)
# print time.time()
# mechanize.Browser._set_response(self, response, *args, **kwargs)
def _change_location(self, result, no_login=False):
"""
This function is called when we have moved to a page, to load a Page

View file

@ -19,7 +19,12 @@
import datetime
import feedparser
if feedparser.__version__ >= '5.0':
# feedparser >= 5.0 replaces this regexp on sgmllib and mechanize < 2.0
# fails with malformated webpages.
import sgmllib
import re
sgmllib.endbracket = re.compile('[<>]')
__all__ = ['Entry', 'Newsfeed']
@ -59,8 +64,6 @@ class Entry:
self.content.append(i.value)
elif self.summary:
self.content.append(self.summary)
else:
self.content = None
if rssid_func:
self.id = rssid_func(self)