do not use thumbnail_url anymore (closes #810), use set_empty_fields() method, do not give fields in constructor of BaseVideo anymore

This commit is contained in:
Romain Bignon 2012-03-12 17:05:55 +01:00
commit ef07a9e795
11 changed files with 79 additions and 66 deletions

View file

@ -23,6 +23,8 @@ import re
import urllib
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.capabilities import NotAvailable
from .video import ArteVideo
@ -53,7 +55,7 @@ class IndexPage(BasePage):
video.rating_max = rating_max
thumb = self.parser.select(div, 'img[class=thumbnail]', 1)
video.thumbnail_url = 'http://videos.arte.tv' + thumb.attrib['src']
video.thumbnail = Thumbnail('http://videos.arte.tv' + thumb.attrib['src'])
try:
parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':')
@ -69,6 +71,8 @@ class IndexPage(BasePage):
else:
video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
video.set_empty_fields(NotAvailable, ('url',))
yield video
class VideoPage(BasePage):
@ -77,6 +81,7 @@ class VideoPage(BasePage):
video = ArteVideo(self.group_dict['id'])
video.title = self.get_title()
video.url = self.get_url(lang, quality)
video.set_empty_fields(NotAvailable)
return video
def get_title(self):

View file

@ -22,7 +22,7 @@ import urllib
import re
from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.capabilities.base import NotAvailable
from weboob.capabilities import NotAvailable
from weboob.tools.misc import html2text
from weboob.tools.browser import BasePage, BrokenPageError
@ -73,8 +73,8 @@ class IndexPage(BasePage):
rating_div = self.parser.select(div, 'div.small_stars', 1)
video.rating_max = self.get_rate(rating_div)
video.rating = self.get_rate(rating_div.find('div'))
# XXX missing date
video.date = NotAvailable
video.set_empty_fields(NotAvailable, ('url',))
yield video
def get_rate(self, div):
@ -109,4 +109,6 @@ class VideoPage(BasePage):
mediaURL = urllib.unquote(mobj.group(1))
video.url = mediaURL
video.set_empty_fields(NotAvailable)
return video

View file

@ -47,7 +47,6 @@ class InaBackend(BaseBackend, ICapVideo):
return self.browser.search_videos(pattern)
def fill_video(self, video, fields):
return video
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
with self.browser:

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
# Copyright(C) 2010-2012 Romain Bignon
#
# This file is part of weboob.
#
@ -21,8 +21,8 @@
import datetime
import re
from weboob.tools.browser import BasePage
from weboob.tools.browser import BrokenPageError
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.capabilities.thumbnail import Thumbnail
from ..video import InaVideo
@ -42,24 +42,21 @@ class SearchPage(BasePage):
for li in ul.findall('li'):
id = re.sub(self.URL_REGEXP, r'\1', li.find('a').attrib['href'])
thumbnail = 'http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src']
video = InaVideo('boutique.%s' % id)
title = self.parser.select(li, 'p.titre', 1).text
video.thumbnail = Thumbnail('http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src'])
video.title = self.parser.select(li, 'p.titre', 1).text
date = self.parser.select(li, 'p.date', 1).text
day, month, year = [int(s) for s in date.split('/')]
date = datetime.datetime(year, month, day)
video.date = datetime.datetime(year, month, day)
duration = self.parser.select(li, 'p.duree', 1).text
m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration)
if m:
duration = datetime.timedelta(hours=int(m.group(2) or 0), minutes=int(m.group(4) or 0), seconds=int(m.group(5)))
video.duration = datetime.timedelta(hours=int(m.group(2) or 0), minutes=int(m.group(4) or 0), seconds=int(m.group(5)))
else:
raise BrokenPageError('Unable to match duration (%r)' % duration)
yield InaVideo('boutique.%s' % id,
title=title,
date=date,
duration=duration,
thumbnail_url=thumbnail,
)
yield video

View file

@ -25,8 +25,8 @@ try:
except ImportError:
from cgi import parse_qs
from weboob.tools.browser import BasePage
from weboob.tools.browser import BrokenPageError
from weboob.capabilities import NotAvailable
from weboob.tools.browser import BasePage, BrokenPageError
from ..video import InaVideo
@ -45,6 +45,8 @@ class BaseVideoPage(BasePage):
video.date = date
video.duration = duration
video.description = self.get_description()
video.set_empty_fields(NotAvailable)
return video
def get_id(self):

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
# Copyright(C) 2010-2012 Roger Philibert
#
# This file is part of weboob.
#
@ -21,8 +21,8 @@
import datetime
import re
from weboob.tools.browser import BasePage
from weboob.tools.browser import BrokenPageError
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.capabilities.thumbnail import Thumbnail
from ..video import YoujizzVideo
@ -38,10 +38,12 @@ class IndexPage(BasePage):
url = a.attrib['href']
_id = re.sub(r'/videos/(.+)\.html', r'\1', url)
thumbnail_url = span.find('.//img').attrib['src']
video = YoujizzVideo(_id)
video.thumbnail = Thumbnail(span.find('.//img').attrib['src'])
title_el = self.parser.select(span, 'span#title1', 1)
title = title_el.text.strip()
video.title = title_el.text.strip()
time_span = self.parser.select(span, 'span.thumbtime span', 1)
time_txt = time_span.text.strip().replace(';', ':')
@ -52,9 +54,6 @@ class IndexPage(BasePage):
else:
raise BrokenPageError('Unable to parse the video duration: %s' % time_txt)
video.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
yield YoujizzVideo(_id,
title=title,
duration=datetime.timedelta(minutes=minutes, seconds=seconds),
thumbnail_url=thumbnail_url,
)
yield video

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
# Copyright(C) 2010-2012 Romain Bignon
#
# This file is part of weboob.
#
@ -21,6 +21,9 @@
import re
import datetime
from weboob.capabilities.base import NotAvailable
from weboob.tools.capabilities.thumbnail import Thumbnail
from .base import PornPage
from ..video import YoupornVideo
@ -45,7 +48,10 @@ class IndexPage(PornPage):
url = a.attrib['href']
_id = url[len('/watch/'):]
_id = _id[:_id.find('/')]
title = a.text.strip()
video = YoupornVideo(int(_id))
video.title = a.text.strip()
video.thumbnail = Thumbnail(thumbnail_url)
hours = minutes = seconds = 0
div = li.cssselect('h2[class=duration]')
@ -56,19 +62,15 @@ class IndexPage(PornPage):
elif len(pack) == 2:
minutes, seconds = pack
rating = 0
rating_max = 0
video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
div = li.cssselect('div.stars')
if div:
m = re.match('.*star-(\d).*', div[0].attrib.get('class', ''))
if m:
rating = int(m.group(1))
rating_max = 5
video.rating = int(m.group(1))
video.rating_max = 5
yield YoupornVideo(int(_id),
title=title,
rating=rating,
rating_max=rating_max,
duration=datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds),
thumbnail_url=thumbnail_url,
)
video.set_empty_fields(NotAvailable, ('url', 'author'))
yield video

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
# Copyright(C) 2010-2012 Romain Bignon
#
# This file is part of weboob.
#
@ -22,6 +22,7 @@ import re
import datetime
from dateutil.parser import parse as parse_dt
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BrokenPageError
from .base import PornPage
@ -37,6 +38,8 @@ class VideoPage(PornPage):
video.title = self.get_title()
video.url, video.ext = self.get_url()
self.set_details(video)
video.set_empty_fields(NotAvailable)
return video
def get_url(self):

View file

@ -25,7 +25,9 @@ import gdata.youtube.service
import re
import urllib
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.video import ICapVideo
from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.tools.misc import to_unicode
from weboob.tools.value import ValueBackendPassword, Value
@ -61,11 +63,11 @@ class YoutubeBackend(BaseBackend, ICapVideo):
"""
Parse an entry returned by gdata and return a Video object.
"""
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
title=to_unicode(entry.media.title.text.strip()),
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
)
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()))
video.title = to_unicode(entry.media.title.text.strip())
video.duration = datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))
video.thumbnail = Thumbnail(to_unicode(entry.media.thumbnail[0].url.strip()))
if entry.author[0].name.text:
video.author = entry.author[0].name.text.strip()
if entry.media.name:
@ -104,6 +106,8 @@ class YoutubeBackend(BaseBackend, ICapVideo):
video = self._entry2video(entry)
self._set_video_url(video)
video.set_empty_fields(NotAvailable)
return video
def search_videos(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):