ability to use filters as classes in chain (refs #1426)

This commit is contained in:
Romain Bignon 2014-07-05 20:22:03 +02:00
commit 983ed221e2
4 changed files with 18 additions and 15 deletions

View file

@ -23,7 +23,7 @@ from weboob.capabilities.video import BaseVideo
from datetime import timedelta from datetime import timedelta
from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, JsonPage from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, JsonPage
from weboob.tools.browser2.filters import Filter, Link, CleanText, Regexp, Attr, Format, DateTime, Env, Dict, Duration from weboob.tools.browser2.filters import Filter, Link, CleanText, Regexp, Attr, Format, DateTime, Env, Dict, Duration, XPath
__all__ = ['IndexPage', 'VideoPage'] __all__ = ['IndexPage', 'VideoPage']
@ -31,7 +31,7 @@ __all__ = ['IndexPage', 'VideoPage']
class DurationPluzz(Filter): class DurationPluzz(Filter):
def filter(self, el): def filter(self, el):
duration = Regexp(CleanText('.'), '.+\|(.+)')(el[0]) duration = Regexp(CleanText('.'), r'.+\|(.+)')(el[0])
if duration[-1:] == "'": if duration[-1:] == "'":
t = [0, int(duration[:-1])] t = [0, int(duration[:-1])]
else: else:
@ -49,10 +49,11 @@ class IndexPage(HTMLPage):
klass = BaseVideo klass = BaseVideo
obj_title = Format('%s - %s', CleanText('h3/a'), CleanText('div[@class="rs-cell-details"]/a')) obj_title = Format('%s - %s', CleanText('h3/a'), CleanText('div[@class="rs-cell-details"]/a'))
obj_id = Regexp(Link('h3/a'), '^http://pluzz.francetv.fr/videos/.+,(.+).html$') obj_id = Link('h3/a') & Regexp(pattern=r'^http://pluzz.francetv.fr/videos/.+,(.+).html$')
obj_date = DateTime(Regexp(CleanText('div/p[@class="diffusion"]', obj_date = XPath('div/p[@class="diffusion"]') \
replace=[(u'à', u''), (u' ', u' ')]), & CleanText(replace=[(u'à', u''), (u' ', u' ')]) \
'.+(\d{2}-\d{2}-\d{2}.+\d{1,2}:\d{1,2}).+')) & Regexp(pattern=r'.+(\d{2}-\d{2}-\d{2}.+\d{1,2}:\d{1,2}).+') \
& DateTime
obj_duration = DurationPluzz('div/span[@class="type-duree"]') obj_duration = DurationPluzz('div/span[@class="type-duree"]')
def obj_thumbnail(self): def obj_thumbnail(self):
@ -75,15 +76,15 @@ class VideoPage(JsonPage):
self.env['url'] = video['url'] self.env['url'] = video['url']
obj_id = Env('id') obj_id = Env('id')
obj_title = Format(u'%s - %s', Dict('titre'), Dict('sous_titre')) obj_title = Format(u'%s - %s', Dict['titre'], Dict['sous_titre'])
obj_url = Env('url') obj_url = Env('url')
obj_date = DateTime(Dict('diffusion/date_debut')) obj_date = Dict['diffusion']['date_debut'] & DateTime
obj_duration = Duration(Dict('duree')) obj_duration = Dict['duree'] & Duration
obj_description = Dict('synopsis') obj_description = Dict['synopsis']
obj_ext = u'm3u8' obj_ext = u'm3u8'
def obj_thumbnail(self): def obj_thumbnail(self):
url = Format('http://pluzz.francetv.fr%s', Dict('image'))(self) url = Format('http://pluzz.francetv.fr%s', Dict['image'])(self)
thumbnail = BaseImage(url) thumbnail = BaseImage(url)
thumbnail.url = thumbnail.id thumbnail.url = thumbnail.id
return thumbnail return thumbnail

View file

@ -40,9 +40,9 @@ class IndexPage(HTMLPage):
class item(ItemElement): class item(ItemElement):
klass = BaseVideo klass = BaseVideo
obj_id = CSS('a') & Link() & Regexp(pattern=r'/videos/(.+)\.html') obj_id = CSS('a') & Link & Regexp(pattern=r'/videos/(.+)\.html')
obj_title = CSS('span#title1') & CleanText() obj_title = CSS('span#title1') & CleanText
obj_duration = CSS('span.thumbtime span') & CleanText() & Duration() | NotAvailable obj_duration = CSS('span.thumbtime span') & CleanText & Duration | NotAvailable
obj_nsfw = True obj_nsfw = True
def obj_thumbnail(self): def obj_thumbnail(self):

View file

@ -39,7 +39,7 @@ class VideoPage(HTMLPage):
obj_title = CleanText('//title') obj_title = CleanText('//title')
obj_nsfw = True obj_nsfw = True
obj_ext = u'flv' obj_ext = u'flv'
obj_duration = CleanText('//div[@id="video_text"]') & Duration() obj_duration = CleanText('//div[@id="video_text"]') & Duration
def obj_url(self): def obj_url(self):
real_id = int(self.env['id'].split('-')[-1]) real_id = int(self.env['id'].split('-')[-1])

View file

@ -71,6 +71,8 @@ class _Filter(object):
return self return self
def __and__(self, o): def __and__(self, o):
if isinstance(o, type) and issubclass(o, _Filter):
o = o()
o.selector = self o.selector = self
return o return o