do not use thumbnail_url anymore (closes #810), use set_empty_fields() method, do not give fields in constructor of BaseVideo anymore

This commit is contained in:
Romain Bignon 2012-03-12 17:05:55 +01:00
commit ef07a9e795
11 changed files with 79 additions and 66 deletions

View file

@ -23,6 +23,8 @@ import re
import urllib import urllib
from weboob.tools.browser import BasePage, BrokenPageError from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.capabilities import NotAvailable
from .video import ArteVideo from .video import ArteVideo
@ -53,7 +55,7 @@ class IndexPage(BasePage):
video.rating_max = rating_max video.rating_max = rating_max
thumb = self.parser.select(div, 'img[class=thumbnail]', 1) thumb = self.parser.select(div, 'img[class=thumbnail]', 1)
video.thumbnail_url = 'http://videos.arte.tv' + thumb.attrib['src'] video.thumbnail = Thumbnail('http://videos.arte.tv' + thumb.attrib['src'])
try: try:
parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':') parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':')
@ -69,6 +71,8 @@ class IndexPage(BasePage):
else: else:
video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
video.set_empty_fields(NotAvailable, ('url',))
yield video yield video
class VideoPage(BasePage): class VideoPage(BasePage):
@ -77,6 +81,7 @@ class VideoPage(BasePage):
video = ArteVideo(self.group_dict['id']) video = ArteVideo(self.group_dict['id'])
video.title = self.get_title() video.title = self.get_title()
video.url = self.get_url(lang, quality) video.url = self.get_url(lang, quality)
video.set_empty_fields(NotAvailable)
return video return video
def get_title(self): def get_title(self):

View file

@ -22,7 +22,7 @@ import urllib
import re import re
from weboob.tools.capabilities.thumbnail import Thumbnail from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.capabilities.base import NotAvailable from weboob.capabilities import NotAvailable
from weboob.tools.misc import html2text from weboob.tools.misc import html2text
from weboob.tools.browser import BasePage, BrokenPageError from weboob.tools.browser import BasePage, BrokenPageError
@ -73,8 +73,8 @@ class IndexPage(BasePage):
rating_div = self.parser.select(div, 'div.small_stars', 1) rating_div = self.parser.select(div, 'div.small_stars', 1)
video.rating_max = self.get_rate(rating_div) video.rating_max = self.get_rate(rating_div)
video.rating = self.get_rate(rating_div.find('div')) video.rating = self.get_rate(rating_div.find('div'))
# XXX missing date
video.date = NotAvailable video.set_empty_fields(NotAvailable, ('url',))
yield video yield video
def get_rate(self, div): def get_rate(self, div):
@ -109,4 +109,6 @@ class VideoPage(BasePage):
mediaURL = urllib.unquote(mobj.group(1)) mediaURL = urllib.unquote(mobj.group(1))
video.url = mediaURL video.url = mediaURL
video.set_empty_fields(NotAvailable)
return video return video

View file

@ -47,7 +47,6 @@ class InaBackend(BaseBackend, ICapVideo):
return self.browser.search_videos(pattern) return self.browser.search_videos(pattern)
def fill_video(self, video, fields): def fill_video(self, video, fields):
return video
if fields != ['thumbnail']: if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields # if we don't want only the thumbnail, we probably want also every fields
with self.browser: with self.browser:

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon # Copyright(C) 2010-2012 Romain Bignon
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -21,8 +21,8 @@
import datetime import datetime
import re import re
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.browser import BrokenPageError from weboob.tools.capabilities.thumbnail import Thumbnail
from ..video import InaVideo from ..video import InaVideo
@ -42,24 +42,21 @@ class SearchPage(BasePage):
for li in ul.findall('li'): for li in ul.findall('li'):
id = re.sub(self.URL_REGEXP, r'\1', li.find('a').attrib['href']) id = re.sub(self.URL_REGEXP, r'\1', li.find('a').attrib['href'])
thumbnail = 'http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src'] video = InaVideo('boutique.%s' % id)
title = self.parser.select(li, 'p.titre', 1).text video.thumbnail = Thumbnail('http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src'])
video.title = self.parser.select(li, 'p.titre', 1).text
date = self.parser.select(li, 'p.date', 1).text date = self.parser.select(li, 'p.date', 1).text
day, month, year = [int(s) for s in date.split('/')] day, month, year = [int(s) for s in date.split('/')]
date = datetime.datetime(year, month, day) video.date = datetime.datetime(year, month, day)
duration = self.parser.select(li, 'p.duree', 1).text duration = self.parser.select(li, 'p.duree', 1).text
m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration) m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration)
if m: if m:
duration = datetime.timedelta(hours=int(m.group(2) or 0), minutes=int(m.group(4) or 0), seconds=int(m.group(5))) video.duration = datetime.timedelta(hours=int(m.group(2) or 0), minutes=int(m.group(4) or 0), seconds=int(m.group(5)))
else: else:
raise BrokenPageError('Unable to match duration (%r)' % duration) raise BrokenPageError('Unable to match duration (%r)' % duration)
yield InaVideo('boutique.%s' % id, yield video
title=title,
date=date,
duration=duration,
thumbnail_url=thumbnail,
)

View file

@ -25,8 +25,8 @@ try:
except ImportError: except ImportError:
from cgi import parse_qs from cgi import parse_qs
from weboob.tools.browser import BasePage from weboob.capabilities import NotAvailable
from weboob.tools.browser import BrokenPageError from weboob.tools.browser import BasePage, BrokenPageError
from ..video import InaVideo from ..video import InaVideo
@ -45,6 +45,8 @@ class BaseVideoPage(BasePage):
video.date = date video.date = date
video.duration = duration video.duration = duration
video.description = self.get_description() video.description = self.get_description()
video.set_empty_fields(NotAvailable)
return video return video
def get_id(self): def get_id(self):

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert # Copyright(C) 2010-2012 Roger Philibert
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -21,8 +21,8 @@
import datetime import datetime
import re import re
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.browser import BrokenPageError from weboob.tools.capabilities.thumbnail import Thumbnail
from ..video import YoujizzVideo from ..video import YoujizzVideo
@ -38,10 +38,12 @@ class IndexPage(BasePage):
url = a.attrib['href'] url = a.attrib['href']
_id = re.sub(r'/videos/(.+)\.html', r'\1', url) _id = re.sub(r'/videos/(.+)\.html', r'\1', url)
thumbnail_url = span.find('.//img').attrib['src'] video = YoujizzVideo(_id)
video.thumbnail = Thumbnail(span.find('.//img').attrib['src'])
title_el = self.parser.select(span, 'span#title1', 1) title_el = self.parser.select(span, 'span#title1', 1)
title = title_el.text.strip() video.title = title_el.text.strip()
time_span = self.parser.select(span, 'span.thumbtime span', 1) time_span = self.parser.select(span, 'span.thumbtime span', 1)
time_txt = time_span.text.strip().replace(';', ':') time_txt = time_span.text.strip().replace(';', ':')
@ -52,9 +54,6 @@ class IndexPage(BasePage):
else: else:
raise BrokenPageError('Unable to parse the video duration: %s' % time_txt) raise BrokenPageError('Unable to parse the video duration: %s' % time_txt)
video.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
yield YoujizzVideo(_id, yield video
title=title,
duration=datetime.timedelta(minutes=minutes, seconds=seconds),
thumbnail_url=thumbnail_url,
)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon # Copyright(C) 2010-2012 Romain Bignon
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -21,6 +21,9 @@
import re import re
import datetime import datetime
from weboob.capabilities.base import NotAvailable
from weboob.tools.capabilities.thumbnail import Thumbnail
from .base import PornPage from .base import PornPage
from ..video import YoupornVideo from ..video import YoupornVideo
@ -45,7 +48,10 @@ class IndexPage(PornPage):
url = a.attrib['href'] url = a.attrib['href']
_id = url[len('/watch/'):] _id = url[len('/watch/'):]
_id = _id[:_id.find('/')] _id = _id[:_id.find('/')]
title = a.text.strip()
video = YoupornVideo(int(_id))
video.title = a.text.strip()
video.thumbnail = Thumbnail(thumbnail_url)
hours = minutes = seconds = 0 hours = minutes = seconds = 0
div = li.cssselect('h2[class=duration]') div = li.cssselect('h2[class=duration]')
@ -56,19 +62,15 @@ class IndexPage(PornPage):
elif len(pack) == 2: elif len(pack) == 2:
minutes, seconds = pack minutes, seconds = pack
rating = 0 video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
rating_max = 0
div = li.cssselect('div.stars') div = li.cssselect('div.stars')
if div: if div:
m = re.match('.*star-(\d).*', div[0].attrib.get('class', '')) m = re.match('.*star-(\d).*', div[0].attrib.get('class', ''))
if m: if m:
rating = int(m.group(1)) video.rating = int(m.group(1))
rating_max = 5 video.rating_max = 5
yield YoupornVideo(int(_id), video.set_empty_fields(NotAvailable, ('url', 'author'))
title=title,
rating=rating, yield video
rating_max=rating_max,
duration=datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds),
thumbnail_url=thumbnail_url,
)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon # Copyright(C) 2010-2012 Romain Bignon
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -22,6 +22,7 @@ import re
import datetime import datetime
from dateutil.parser import parse as parse_dt from dateutil.parser import parse as parse_dt
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BrokenPageError from weboob.tools.browser import BrokenPageError
from .base import PornPage from .base import PornPage
@ -37,6 +38,8 @@ class VideoPage(PornPage):
video.title = self.get_title() video.title = self.get_title()
video.url, video.ext = self.get_url() video.url, video.ext = self.get_url()
self.set_details(video) self.set_details(video)
video.set_empty_fields(NotAvailable)
return video return video
def get_url(self): def get_url(self):

View file

@ -25,7 +25,9 @@ import gdata.youtube.service
import re import re
import urllib import urllib
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.video import ICapVideo from weboob.capabilities.video import ICapVideo
from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.tools.backend import BaseBackend, BackendConfig from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.tools.misc import to_unicode from weboob.tools.misc import to_unicode
from weboob.tools.value import ValueBackendPassword, Value from weboob.tools.value import ValueBackendPassword, Value
@ -61,11 +63,11 @@ class YoutubeBackend(BaseBackend, ICapVideo):
""" """
Parse an entry returned by gdata and return a Video object. Parse an entry returned by gdata and return a Video object.
""" """
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()), video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()))
title=to_unicode(entry.media.title.text.strip()), video.title = to_unicode(entry.media.title.text.strip())
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))), video.duration = datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()), video.thumbnail = Thumbnail(to_unicode(entry.media.thumbnail[0].url.strip()))
)
if entry.author[0].name.text: if entry.author[0].name.text:
video.author = entry.author[0].name.text.strip() video.author = entry.author[0].name.text.strip()
if entry.media.name: if entry.media.name:
@ -104,6 +106,8 @@ class YoutubeBackend(BaseBackend, ICapVideo):
video = self._entry2video(entry) video = self._entry2video(entry)
self._set_video_url(video) self._set_video_url(video)
video.set_empty_fields(NotAvailable)
return video return video
def search_videos(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): def search_videos(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):

View file

@ -1 +1,5 @@
# -*- coding: utf-8 -*-
from .base import NotLoaded, NotAvailable, CapBaseObject, IBaseCap
__all__ = ['NotLoaded', 'NotAvailable', 'CapBaseObject', 'IBaseCap']

View file

@ -20,7 +20,7 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from .base import IBaseCap, CapBaseObject, NotLoaded from .base import IBaseCap, CapBaseObject, NotAvailable
from weboob.tools.capabilities.thumbnail import Thumbnail from weboob.tools.capabilities.thumbnail import Thumbnail
@ -31,25 +31,21 @@ class BaseVideo(CapBaseObject):
Represents a video. Represents a video.
This object has to be inherited to specify how to calculate the URL of the video from its ID. This object has to be inherited to specify how to calculate the URL of the video from its ID.
""" """
def __init__(self, _id, title=NotLoaded, url=NotLoaded, author=NotLoaded, duration=NotLoaded, date=NotLoaded,
rating=NotLoaded, rating_max=NotLoaded, thumbnail=NotLoaded, thumbnail_url=None, nsfw=False): def __init__(self, _id):
CapBaseObject.__init__(self, unicode(_id)) CapBaseObject.__init__(self, unicode(_id))
self.add_field('title', basestring, title) self.add_field('title', basestring)
self.add_field('url', basestring, url) self.add_field('url', basestring)
self.add_field('ext', basestring) self.add_field('ext', basestring)
self.add_field('author', basestring, author) self.add_field('author', basestring)
self.add_field('description', basestring) self.add_field('description', basestring)
self.add_field('duration', (int,long,timedelta), duration) self.add_field('duration', (int,long,timedelta))
self.add_field('date', datetime, date) self.add_field('date', datetime)
self.add_field('rating', (int,long,float), rating) self.add_field('rating', (int,long,float), NotAvailable)
self.add_field('rating_max', (int,long,float), rating_max) self.add_field('rating_max', (int,long,float), NotAvailable)
self.add_field('thumbnail', Thumbnail, thumbnail) self.add_field('thumbnail', Thumbnail)
self.add_field('nsfw', bool, nsfw) self.add_field('nsfw', bool, False)
# XXX remove this and fix all backends
if thumbnail_url is not None and self.thumbnail is NotLoaded:
self.thumbnail = Thumbnail(thumbnail_url)
@classmethod @classmethod
def id2url(cls, _id): def id2url(cls, _id):