fucking workaround to fix bad HTML on DLFP
This commit is contained in:
parent
e59af132ba
commit
d0588a4c6c
1 changed files with 17 additions and 0 deletions
|
|
@ -18,11 +18,24 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|||
|
||||
"""
|
||||
|
||||
from cStringIO import StringIO
|
||||
|
||||
from weboob.tools.browser import Browser
|
||||
from .pages.index import IndexPage, LoginPage
|
||||
from .pages.news import ContentPage
|
||||
from .tools import id2url
|
||||
|
||||
from weboob.tools.parser import StandardParser
|
||||
|
||||
# Parser
|
||||
class DLFParser(StandardParser):
|
||||
def parse(self, data, encoding):
|
||||
s = data.read()
|
||||
s = s.replace('<<', '<')
|
||||
data = StringIO(s)
|
||||
return StandardParser.parse(self, data, encoding)
|
||||
|
||||
# Browser
|
||||
class DLFP(Browser):
|
||||
DOMAIN = 'linuxfr.org'
|
||||
PROTOCOL = 'https'
|
||||
|
|
@ -33,6 +46,10 @@ class DLFP(Browser):
|
|||
'https://linuxfr.org/.*/\d+.html': ContentPage
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['parser'] = DLFParser
|
||||
Browser.__init__(self, *args, **kwargs)
|
||||
|
||||
def home(self):
|
||||
return self.location('https://linuxfr.org')
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue