fucking workaround to fix bad HTML on DLFP

This commit is contained in:
Romain Bignon 2010-04-04 18:56:01 +02:00
commit d0588a4c6c

View file

@ -18,11 +18,24 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
from cStringIO import StringIO
from weboob.tools.browser import Browser
from .pages.index import IndexPage, LoginPage
from .pages.news import ContentPage
from .tools import id2url
from weboob.tools.parser import StandardParser
# Parser
class DLFParser(StandardParser):
def parse(self, data, encoding):
s = data.read()
s = s.replace('<<', '<')
data = StringIO(s)
return StandardParser.parse(self, data, encoding)
# Browser
class DLFP(Browser):
DOMAIN = 'linuxfr.org'
PROTOCOL = 'https'
@ -33,6 +46,10 @@ class DLFP(Browser):
'https://linuxfr.org/.*/\d+.html': ContentPage
}
def __init__(self, *args, **kwargs):
kwargs['parser'] = DLFParser
Browser.__init__(self, *args, **kwargs)
def home(self):
return self.location('https://linuxfr.org')