fucking workaround to fix bad HTML on DLFP

This commit is contained in:
Romain Bignon 2010-04-04 18:56:01 +02:00
commit d0588a4c6c

View file

@ -18,11 +18,24 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
from cStringIO import StringIO
from weboob.tools.browser import Browser from weboob.tools.browser import Browser
from .pages.index import IndexPage, LoginPage from .pages.index import IndexPage, LoginPage
from .pages.news import ContentPage from .pages.news import ContentPage
from .tools import id2url from .tools import id2url
from weboob.tools.parser import StandardParser
# Parser
class DLFParser(StandardParser):
def parse(self, data, encoding):
s = data.read()
s = s.replace('<<', '<')
data = StringIO(s)
return StandardParser.parse(self, data, encoding)
# Browser
class DLFP(Browser): class DLFP(Browser):
DOMAIN = 'linuxfr.org' DOMAIN = 'linuxfr.org'
PROTOCOL = 'https' PROTOCOL = 'https'
@ -33,6 +46,10 @@ class DLFP(Browser):
'https://linuxfr.org/.*/\d+.html': ContentPage 'https://linuxfr.org/.*/\d+.html': ContentPage
} }
def __init__(self, *args, **kwargs):
kwargs['parser'] = DLFParser
Browser.__init__(self, *args, **kwargs)
def home(self): def home(self):
return self.location('https://linuxfr.org') return self.location('https://linuxfr.org')