implement max_results option and infinite search for youtube

This commit is contained in:
Christophe Benz 2010-08-11 21:35:20 +02:00
commit 53f6571fee
7 changed files with 82 additions and 31 deletions

View file

@ -51,5 +51,6 @@ class Videoob(ConsoleApplication):
def command_search(self, pattern=None):
self.load_backends(ICapVideo)
self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos')
for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw):
for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw,
max_results=self.options.count):
self.format(video, backend.name)

View file

@ -20,10 +20,14 @@ from __future__ import with_statement
import datetime
import gdata.youtube.service
from weboob.capabilities.video import ICapVideo
from weboob.tools.backend import BaseBackend
from weboob.tools.backend import BaseBackend, ObjectNotAvailable
from weboob.tools.misc import to_unicode
from .browser import YoutubeBrowser
from .pages import ForbiddenVideo
from .video import YoutubeVideo
@ -41,36 +45,61 @@ class YoutubeBackend(BaseBackend, ICapVideo):
def get_video(self, _id):
with self.browser:
return self.browser.get_video(_id)
try:
return self.browser.get_video(_id)
except ForbiddenVideo, e:
raise ObjectNotAvailable(e)
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False):
import gdata.youtube.service
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
YOUTUBE_MAX_RESULTS = 50
YOUTUBE_MAX_START_INDEX = 1000
yt_service = gdata.youtube.service.YouTubeService()
query = gdata.youtube.service.YouTubeVideoQuery()
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
query.racy = 'include' if nsfw else 'exclude'
if pattern:
query.categories.extend('/%s' % search_term.lower().encode('utf-8') for search_term in pattern.split())
feed = yt_service.YouTubeQuery(query)
for entry in feed.entry:
video = YoutubeVideo(entry.id.text.split('/')[-1].decode('utf-8'),
title=entry.media.title.text.decode('utf-8').strip(),
duration=datetime.timedelta(seconds=int(entry.media.duration.seconds.decode('utf-8').strip())),
thumbnail_url=entry.media.thumbnail[0].url.decode('utf-8').strip(),
)
if entry.media.name:
video.author = entry.media.name.text.decode('utf-8').strip()
yield video
start_index = 1
nb_yielded = 0
while True:
query = gdata.youtube.service.YouTubeVideoQuery()
if pattern is not None:
query.vq = pattern
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
query.racy = 'include' if nsfw else 'exclude'
if max_results is None or max_results > YOUTUBE_MAX_RESULTS:
query_max_results = YOUTUBE_MAX_RESULTS
else:
query_max_results = max_results
query.max_results = query_max_results
if start_index > YOUTUBE_MAX_START_INDEX:
return
query.start_index = start_index
start_index += query_max_results
feed = yt_service.YouTubeQuery(query)
for entry in feed.entry:
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
title=to_unicode(entry.media.title.text.strip()),
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
)
if entry.media.name:
video.author = to_unicode(entry.media.name.text.strip())
yield video
nb_yielded += 1
if nb_yielded == max_results:
return
def fill_video(self, video, fields):
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
with self.browser:
video = self.browser.get_video(YoutubeVideo.id2url(video.id), video)
try:
video = self.browser.get_video(YoutubeVideo.id2url(video.id), video)
except ForbiddenVideo, e:
raise ObjectNotAvailable(e)
if 'thumbnail' in fields:
with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
return video
OBJECTS = {YoutubeVideo: fill_video}

View file

@ -20,6 +20,7 @@ import re
from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select
from weboob.tools.misc import to_unicode
from .video import YoutubeVideo
@ -59,7 +60,7 @@ class VideoPage(BasePage):
def get_title(self):
element = select(self.document.getroot(), 'meta[name=title]', 1)
return unicode(element.attrib['content']).strip()
return to_unicode(element.attrib['content'].strip())
def get_url(self, _id):
video_signature = None

View file

@ -72,7 +72,7 @@ class ICapVideo(IBaseCap):
SEARCH_VIEWS,
SEARCH_DATE) = range(4)
def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False):
def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False, max_results=None):
"""
Iter results of a search on a pattern. Note that if pattern is None,
it get the latest videos.
@ -80,6 +80,7 @@ class ICapVideo(IBaseCap):
@param pattern [str] pattern to search on
@param sortby [enum] sort by...
@param nsfw [bool] include non-suitable for work videos if True
@param max_results [int] maximum number of results to return
"""
raise NotImplementedError()

View file

@ -23,7 +23,7 @@ from optparse import OptionGroup, OptionParser
from weboob.core.ouiboube import Weboob
from weboob.tools.config.iconfig import ConfigError
from weboob.tools.backend import ObjectNotSupported
from weboob.tools.backend import ObjectNotAvailable, ObjectNotSupported
from weboob.tools.misc import iter_fields
@ -201,7 +201,7 @@ class BaseApplication(object):
fields = [k for k, v in iter_fields(obj)]
try:
backend.fillobj(obj, fields)
except ObjectNotSupported, e:
except (ObjectNotAvailable, ObjectNotSupported), e:
logging.warning(u'Could not retrieve required fields (%s): %s' % (','.join(fields), e))
return obj
@ -213,6 +213,7 @@ class BaseApplication(object):
yield sub
def complete(self, backend, count, selected_fields, function, *args, **kwargs):
assert count is None or count > 0
res = getattr(backend, function)(*args, **kwargs)
if self.selected_fields:

View file

@ -29,6 +29,7 @@ import sys
from weboob.core import CallErrors
from weboob.core.backends import BackendsConfig
from weboob.tools.backend import ObjectNotSupported
from .base import BackendNotFound, BaseApplication
from .formatters.load import formatters, load_formatter
@ -69,12 +70,14 @@ class ConsoleApplication(BaseApplication):
results_options = OptionGroup(self._parser, 'Results Options')
results_options.add_option('-c', '--condition', help='filter result items to display given a boolean condition')
results_options.add_option('-n', '--count', type='int', help='get a maximum number of results (all backends merged)')
results_options.add_option('-n', '--count', default='10', type='int',
help='get a maximum number of results (all backends merged)')
results_options.add_option('-s', '--select', help='select result item keys to display (comma separated)')
self._parser.add_option_group(results_options)
formatting_options = OptionGroup(self._parser, 'Formatting Options')
formatting_options.add_option('-f', '--formatter', choices=formatters, help='select output formatter (%s)' % u','.join(formatters))
formatting_options.add_option('-f', '--formatter', choices=formatters,
help='select output formatter (%s)' % u','.join(formatters))
formatting_options.add_option('--no-header', dest='no_header', action='store_true', help='do not display header')
formatting_options.add_option('--no-keys', dest='no_keys', action='store_true', help='do not display item keys')
self._parser.add_option_group(formatting_options)
@ -106,6 +109,11 @@ class ConsoleApplication(BaseApplication):
else:
self.condition = None
if self.options.count == 0:
self._parser.error('Count must be at least 1, or negative for infinite')
elif self.options.count < 0:
self.options.count = None
def _get_completions(self):
return set(name for name, arguments, doc_string in self._commands)

View file

@ -23,10 +23,17 @@ from logging import debug
from weboob.capabilities.base import IBaseCap, NotLoaded
__all__ = ['BaseBackend', 'ObjectNotSupported']
__all__ = ['BaseBackend', 'ObjectNotAvailable', 'ObjectNotSupported']
class ObjectNotSupported(Exception): pass
class ObjectNotAvailable(Exception):
pass
class ObjectNotSupported(Exception):
pass
class BackendStorage(object):
def __init__(self, name, storage):
@ -212,9 +219,12 @@ class BaseBackend(object):
if missing:
missing_fields.append(field)
if not missing_fields:
return obj
for key, value in self.OBJECTS.iteritems():
if isinstance(obj, key):
debug('Complete %r with fields: %s' % (obj, missing_fields))
debug(u'Fill %r with fields: %s' % (obj, missing_fields))
return value(self, obj, missing_fields) or obj
raise ObjectNotSupported('The object of type %s is not supported by the backend %s' % (type(obj), self))