From d0588a4c6c25da68906efb44f476fa76e0885eb7 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Sun, 4 Apr 2010 18:56:01 +0200 Subject: [PATCH] fucking workaround to fix bad HTML on DLFP --- weboob/backends/dlfp/browser.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/weboob/backends/dlfp/browser.py b/weboob/backends/dlfp/browser.py index d71f0cba..5eb4f960 100644 --- a/weboob/backends/dlfp/browser.py +++ b/weboob/backends/dlfp/browser.py @@ -18,11 +18,24 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ +from cStringIO import StringIO + from weboob.tools.browser import Browser from .pages.index import IndexPage, LoginPage from .pages.news import ContentPage from .tools import id2url +from weboob.tools.parser import StandardParser + +# Parser +class DLFParser(StandardParser): + def parse(self, data, encoding): + s = data.read() + s = s.replace('<<', '<') + data = StringIO(s) + return StandardParser.parse(self, data, encoding) + +# Browser class DLFP(Browser): DOMAIN = 'linuxfr.org' PROTOCOL = 'https' @@ -33,6 +46,10 @@ class DLFP(Browser): 'https://linuxfr.org/.*/\d+.html': ContentPage } + def __init__(self, *args, **kwargs): + kwargs['parser'] = DLFParser + Browser.__init__(self, *args, **kwargs) + def home(self): return self.location('https://linuxfr.org')