fucking workaround to fix bad HTML on DLFP
This commit is contained in:
parent
e59af132ba
commit
d0588a4c6c
1 changed files with 17 additions and 0 deletions
|
|
@ -18,11 +18,24 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from cStringIO import StringIO
|
||||||
|
|
||||||
from weboob.tools.browser import Browser
|
from weboob.tools.browser import Browser
|
||||||
from .pages.index import IndexPage, LoginPage
|
from .pages.index import IndexPage, LoginPage
|
||||||
from .pages.news import ContentPage
|
from .pages.news import ContentPage
|
||||||
from .tools import id2url
|
from .tools import id2url
|
||||||
|
|
||||||
|
from weboob.tools.parser import StandardParser
|
||||||
|
|
||||||
|
# Parser
|
||||||
|
class DLFParser(StandardParser):
|
||||||
|
def parse(self, data, encoding):
|
||||||
|
s = data.read()
|
||||||
|
s = s.replace('<<', '<')
|
||||||
|
data = StringIO(s)
|
||||||
|
return StandardParser.parse(self, data, encoding)
|
||||||
|
|
||||||
|
# Browser
|
||||||
class DLFP(Browser):
|
class DLFP(Browser):
|
||||||
DOMAIN = 'linuxfr.org'
|
DOMAIN = 'linuxfr.org'
|
||||||
PROTOCOL = 'https'
|
PROTOCOL = 'https'
|
||||||
|
|
@ -33,6 +46,10 @@ class DLFP(Browser):
|
||||||
'https://linuxfr.org/.*/\d+.html': ContentPage
|
'https://linuxfr.org/.*/\d+.html': ContentPage
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
kwargs['parser'] = DLFParser
|
||||||
|
Browser.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
def home(self):
|
def home(self):
|
||||||
return self.location('https://linuxfr.org')
|
return self.location('https://linuxfr.org')
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue