fix to remove fucking hack from feedparser >= 5.0 (closes #649)
This commit is contained in:
parent
30f2f8e374
commit
fa37ef38e3
2 changed files with 29 additions and 3 deletions
|
|
@ -397,6 +397,29 @@ class BaseBrowser(mechanize.Browser):
|
||||||
def get_document(self, result):
|
def get_document(self, result):
|
||||||
return self.parser.parse(result, self.ENCODING)
|
return self.parser.parse(result, self.ENCODING)
|
||||||
|
|
||||||
|
# DO NOT ENABLE THIS FUCKING PEACE OF CODE EVEN IF IT WOULD BE BETTER
|
||||||
|
# TO SANITARIZE FUCKING HTML.
|
||||||
|
#def _set_response(self, response, *args, **kwargs):
|
||||||
|
# import time
|
||||||
|
# if response and hasattr(response, 'set_data'):
|
||||||
|
# print time.time()
|
||||||
|
# r = response.read()
|
||||||
|
# start = 0
|
||||||
|
# end = 0
|
||||||
|
# new = ''
|
||||||
|
# lowr = r.lower()
|
||||||
|
# start = lowr[end:].find('<script')
|
||||||
|
# while start >= end:
|
||||||
|
# start_stop = start + lowr[start:].find('>') + 1
|
||||||
|
# new += r[end:start_stop]
|
||||||
|
# end = start + lowr[start:].find('</script>')
|
||||||
|
# new += r[start_stop:end].replace('<', '<').replace('>', '>')
|
||||||
|
# start = end + lowr[end:].find('<script')
|
||||||
|
# new += r[end:]
|
||||||
|
# response.set_data(new)
|
||||||
|
# print time.time()
|
||||||
|
# mechanize.Browser._set_response(self, response, *args, **kwargs)
|
||||||
|
|
||||||
def _change_location(self, result, no_login=False):
|
def _change_location(self, result, no_login=False):
|
||||||
"""
|
"""
|
||||||
This function is called when we have moved to a page, to load a Page
|
This function is called when we have moved to a page, to load a Page
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,12 @@
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import feedparser
|
import feedparser
|
||||||
|
if feedparser.__version__ >= '5.0':
|
||||||
|
# feedparser >= 5.0 replaces this regexp on sgmllib and mechanize < 2.0
|
||||||
|
# fails with malformated webpages.
|
||||||
|
import sgmllib
|
||||||
|
import re
|
||||||
|
sgmllib.endbracket = re.compile('[<>]')
|
||||||
|
|
||||||
__all__ = ['Entry', 'Newsfeed']
|
__all__ = ['Entry', 'Newsfeed']
|
||||||
|
|
||||||
|
|
@ -59,8 +64,6 @@ class Entry:
|
||||||
self.content.append(i.value)
|
self.content.append(i.value)
|
||||||
elif self.summary:
|
elif self.summary:
|
||||||
self.content.append(self.summary)
|
self.content.append(self.summary)
|
||||||
else:
|
|
||||||
self.content = None
|
|
||||||
|
|
||||||
if rssid_func:
|
if rssid_func:
|
||||||
self.id = rssid_func(self)
|
self.id = rssid_func(self)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue