diff --git a/modules/arte/pages.py b/modules/arte/pages.py index 9e1b4abe..43b88e08 100644 --- a/modules/arte/pages.py +++ b/modules/arte/pages.py @@ -23,6 +23,8 @@ import re import urllib from weboob.tools.browser import BasePage, BrokenPageError +from weboob.tools.capabilities.thumbnail import Thumbnail +from weboob.capabilities import NotAvailable from .video import ArteVideo @@ -53,7 +55,7 @@ class IndexPage(BasePage): video.rating_max = rating_max thumb = self.parser.select(div, 'img[class=thumbnail]', 1) - video.thumbnail_url = 'http://videos.arte.tv' + thumb.attrib['src'] + video.thumbnail = Thumbnail('http://videos.arte.tv' + thumb.attrib['src']) try: parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':') @@ -69,6 +71,8 @@ class IndexPage(BasePage): else: video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) + video.set_empty_fields(NotAvailable, ('url',)) + yield video class VideoPage(BasePage): @@ -77,6 +81,7 @@ class VideoPage(BasePage): video = ArteVideo(self.group_dict['id']) video.title = self.get_title() video.url = self.get_url(lang, quality) + video.set_empty_fields(NotAvailable) return video def get_title(self): diff --git a/modules/dailymotion/pages.py b/modules/dailymotion/pages.py index 5a4a3b1d..8a720ea7 100644 --- a/modules/dailymotion/pages.py +++ b/modules/dailymotion/pages.py @@ -22,7 +22,7 @@ import urllib import re from weboob.tools.capabilities.thumbnail import Thumbnail -from weboob.capabilities.base import NotAvailable +from weboob.capabilities import NotAvailable from weboob.tools.misc import html2text from weboob.tools.browser import BasePage, BrokenPageError @@ -73,8 +73,8 @@ class IndexPage(BasePage): rating_div = self.parser.select(div, 'div.small_stars', 1) video.rating_max = self.get_rate(rating_div) video.rating = self.get_rate(rating_div.find('div')) - # XXX missing date - video.date = NotAvailable + + video.set_empty_fields(NotAvailable, ('url',)) yield video def get_rate(self, div): @@ -109,4 +109,6 @@ class VideoPage(BasePage): mediaURL = urllib.unquote(mobj.group(1)) video.url = mediaURL + video.set_empty_fields(NotAvailable) + return video diff --git a/modules/ina/backend.py b/modules/ina/backend.py index 6e5fa712..bbdb6878 100644 --- a/modules/ina/backend.py +++ b/modules/ina/backend.py @@ -47,7 +47,6 @@ class InaBackend(BaseBackend, ICapVideo): return self.browser.search_videos(pattern) def fill_video(self, video, fields): - return video if fields != ['thumbnail']: # if we don't want only the thumbnail, we probably want also every fields with self.browser: diff --git a/modules/ina/pages/search.py b/modules/ina/pages/search.py index 2479c316..426ff34e 100644 --- a/modules/ina/pages/search.py +++ b/modules/ina/pages/search.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2010-2012 Romain Bignon # # This file is part of weboob. # @@ -21,8 +21,8 @@ import datetime import re -from weboob.tools.browser import BasePage -from weboob.tools.browser import BrokenPageError +from weboob.tools.browser import BasePage, BrokenPageError +from weboob.tools.capabilities.thumbnail import Thumbnail from ..video import InaVideo @@ -42,24 +42,21 @@ class SearchPage(BasePage): for li in ul.findall('li'): id = re.sub(self.URL_REGEXP, r'\1', li.find('a').attrib['href']) - thumbnail = 'http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src'] + video = InaVideo('boutique.%s' % id) - title = self.parser.select(li, 'p.titre', 1).text + video.thumbnail = Thumbnail('http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src']) + + video.title = self.parser.select(li, 'p.titre', 1).text date = self.parser.select(li, 'p.date', 1).text day, month, year = [int(s) for s in date.split('/')] - date = datetime.datetime(year, month, day) + video.date = datetime.datetime(year, month, day) duration = self.parser.select(li, 'p.duree', 1).text m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration) if m: - duration = datetime.timedelta(hours=int(m.group(2) or 0), minutes=int(m.group(4) or 0), seconds=int(m.group(5))) + video.duration = datetime.timedelta(hours=int(m.group(2) or 0), minutes=int(m.group(4) or 0), seconds=int(m.group(5))) else: raise BrokenPageError('Unable to match duration (%r)' % duration) - yield InaVideo('boutique.%s' % id, - title=title, - date=date, - duration=duration, - thumbnail_url=thumbnail, - ) + yield video diff --git a/modules/ina/pages/video.py b/modules/ina/pages/video.py index 55f5c729..37c28050 100644 --- a/modules/ina/pages/video.py +++ b/modules/ina/pages/video.py @@ -25,8 +25,8 @@ try: except ImportError: from cgi import parse_qs -from weboob.tools.browser import BasePage -from weboob.tools.browser import BrokenPageError +from weboob.capabilities import NotAvailable +from weboob.tools.browser import BasePage, BrokenPageError from ..video import InaVideo @@ -45,6 +45,8 @@ class BaseVideoPage(BasePage): video.date = date video.duration = duration video.description = self.get_description() + + video.set_empty_fields(NotAvailable) return video def get_id(self): diff --git a/modules/youjizz/pages/index.py b/modules/youjizz/pages/index.py index 3b9ac594..42c99275 100644 --- a/modules/youjizz/pages/index.py +++ b/modules/youjizz/pages/index.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2010-2011 Roger Philibert +# Copyright(C) 2010-2012 Roger Philibert # # This file is part of weboob. # @@ -21,8 +21,8 @@ import datetime import re -from weboob.tools.browser import BasePage -from weboob.tools.browser import BrokenPageError +from weboob.tools.browser import BasePage, BrokenPageError +from weboob.tools.capabilities.thumbnail import Thumbnail from ..video import YoujizzVideo @@ -38,10 +38,12 @@ class IndexPage(BasePage): url = a.attrib['href'] _id = re.sub(r'/videos/(.+)\.html', r'\1', url) - thumbnail_url = span.find('.//img').attrib['src'] + video = YoujizzVideo(_id) + + video.thumbnail = Thumbnail(span.find('.//img').attrib['src']) title_el = self.parser.select(span, 'span#title1', 1) - title = title_el.text.strip() + video.title = title_el.text.strip() time_span = self.parser.select(span, 'span.thumbtime span', 1) time_txt = time_span.text.strip().replace(';', ':') @@ -52,9 +54,6 @@ class IndexPage(BasePage): else: raise BrokenPageError('Unable to parse the video duration: %s' % time_txt) + video.duration = datetime.timedelta(minutes=minutes, seconds=seconds) - yield YoujizzVideo(_id, - title=title, - duration=datetime.timedelta(minutes=minutes, seconds=seconds), - thumbnail_url=thumbnail_url, - ) + yield video diff --git a/modules/youporn/pages/index.py b/modules/youporn/pages/index.py index 42cae84d..f746971a 100644 --- a/modules/youporn/pages/index.py +++ b/modules/youporn/pages/index.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2010-2012 Romain Bignon # # This file is part of weboob. # @@ -21,6 +21,9 @@ import re import datetime +from weboob.capabilities.base import NotAvailable +from weboob.tools.capabilities.thumbnail import Thumbnail + from .base import PornPage from ..video import YoupornVideo @@ -45,7 +48,10 @@ class IndexPage(PornPage): url = a.attrib['href'] _id = url[len('/watch/'):] _id = _id[:_id.find('/')] - title = a.text.strip() + + video = YoupornVideo(int(_id)) + video.title = a.text.strip() + video.thumbnail = Thumbnail(thumbnail_url) hours = minutes = seconds = 0 div = li.cssselect('h2[class=duration]') @@ -56,19 +62,15 @@ class IndexPage(PornPage): elif len(pack) == 2: minutes, seconds = pack - rating = 0 - rating_max = 0 + video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds) + div = li.cssselect('div.stars') if div: m = re.match('.*star-(\d).*', div[0].attrib.get('class', '')) if m: - rating = int(m.group(1)) - rating_max = 5 + video.rating = int(m.group(1)) + video.rating_max = 5 - yield YoupornVideo(int(_id), - title=title, - rating=rating, - rating_max=rating_max, - duration=datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds), - thumbnail_url=thumbnail_url, - ) + video.set_empty_fields(NotAvailable, ('url', 'author')) + + yield video diff --git a/modules/youporn/pages/video.py b/modules/youporn/pages/video.py index cf9337cb..ea782672 100644 --- a/modules/youporn/pages/video.py +++ b/modules/youporn/pages/video.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2010-2012 Romain Bignon # # This file is part of weboob. # @@ -22,6 +22,7 @@ import re import datetime from dateutil.parser import parse as parse_dt +from weboob.capabilities.base import NotAvailable from weboob.tools.browser import BrokenPageError from .base import PornPage @@ -37,6 +38,8 @@ class VideoPage(PornPage): video.title = self.get_title() video.url, video.ext = self.get_url() self.set_details(video) + + video.set_empty_fields(NotAvailable) return video def get_url(self): diff --git a/modules/youtube/backend.py b/modules/youtube/backend.py index ebf87496..b4d17f9a 100644 --- a/modules/youtube/backend.py +++ b/modules/youtube/backend.py @@ -25,7 +25,9 @@ import gdata.youtube.service import re import urllib +from weboob.capabilities.base import NotAvailable from weboob.capabilities.video import ICapVideo +from weboob.tools.capabilities.thumbnail import Thumbnail from weboob.tools.backend import BaseBackend, BackendConfig from weboob.tools.misc import to_unicode from weboob.tools.value import ValueBackendPassword, Value @@ -61,11 +63,11 @@ class YoutubeBackend(BaseBackend, ICapVideo): """ Parse an entry returned by gdata and return a Video object. """ - video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()), - title=to_unicode(entry.media.title.text.strip()), - duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))), - thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()), - ) + video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip())) + video.title = to_unicode(entry.media.title.text.strip()) + video.duration = datetime.timedelta(seconds=int(entry.media.duration.seconds.strip())) + video.thumbnail = Thumbnail(to_unicode(entry.media.thumbnail[0].url.strip())) + if entry.author[0].name.text: video.author = entry.author[0].name.text.strip() if entry.media.name: @@ -104,6 +106,8 @@ class YoutubeBackend(BaseBackend, ICapVideo): video = self._entry2video(entry) self._set_video_url(video) + + video.set_empty_fields(NotAvailable) return video def search_videos(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): diff --git a/weboob/capabilities/__init__.py b/weboob/capabilities/__init__.py index 8b137891..88b00a16 100644 --- a/weboob/capabilities/__init__.py +++ b/weboob/capabilities/__init__.py @@ -1 +1,5 @@ +# -*- coding: utf-8 -*- +from .base import NotLoaded, NotAvailable, CapBaseObject, IBaseCap + +__all__ = ['NotLoaded', 'NotAvailable', 'CapBaseObject', 'IBaseCap'] diff --git a/weboob/capabilities/video.py b/weboob/capabilities/video.py index bf4bb27d..fb77d986 100644 --- a/weboob/capabilities/video.py +++ b/weboob/capabilities/video.py @@ -20,7 +20,7 @@ from datetime import datetime, timedelta -from .base import IBaseCap, CapBaseObject, NotLoaded +from .base import IBaseCap, CapBaseObject, NotAvailable from weboob.tools.capabilities.thumbnail import Thumbnail @@ -31,25 +31,21 @@ class BaseVideo(CapBaseObject): Represents a video. This object has to be inherited to specify how to calculate the URL of the video from its ID. """ - def __init__(self, _id, title=NotLoaded, url=NotLoaded, author=NotLoaded, duration=NotLoaded, date=NotLoaded, - rating=NotLoaded, rating_max=NotLoaded, thumbnail=NotLoaded, thumbnail_url=None, nsfw=False): + + def __init__(self, _id): CapBaseObject.__init__(self, unicode(_id)) - self.add_field('title', basestring, title) - self.add_field('url', basestring, url) + self.add_field('title', basestring) + self.add_field('url', basestring) self.add_field('ext', basestring) - self.add_field('author', basestring, author) + self.add_field('author', basestring) self.add_field('description', basestring) - self.add_field('duration', (int,long,timedelta), duration) - self.add_field('date', datetime, date) - self.add_field('rating', (int,long,float), rating) - self.add_field('rating_max', (int,long,float), rating_max) - self.add_field('thumbnail', Thumbnail, thumbnail) - self.add_field('nsfw', bool, nsfw) - - # XXX remove this and fix all backends - if thumbnail_url is not None and self.thumbnail is NotLoaded: - self.thumbnail = Thumbnail(thumbnail_url) + self.add_field('duration', (int,long,timedelta)) + self.add_field('date', datetime) + self.add_field('rating', (int,long,float), NotAvailable) + self.add_field('rating_max', (int,long,float), NotAvailable) + self.add_field('thumbnail', Thumbnail) + self.add_field('nsfw', bool, False) @classmethod def id2url(cls, _id):