implement max_results option and infinite search for youtube

This commit is contained in:
Christophe Benz 2010-08-11 21:35:20 +02:00
commit 53f6571fee
7 changed files with 82 additions and 31 deletions

View file

@ -51,5 +51,6 @@ class Videoob(ConsoleApplication):
def command_search(self, pattern=None): def command_search(self, pattern=None):
self.load_backends(ICapVideo) self.load_backends(ICapVideo)
self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos') self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos')
for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw): for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw,
max_results=self.options.count):
self.format(video, backend.name) self.format(video, backend.name)

View file

@ -20,10 +20,14 @@ from __future__ import with_statement
import datetime import datetime
import gdata.youtube.service
from weboob.capabilities.video import ICapVideo from weboob.capabilities.video import ICapVideo
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend, ObjectNotAvailable
from weboob.tools.misc import to_unicode
from .browser import YoutubeBrowser from .browser import YoutubeBrowser
from .pages import ForbiddenVideo
from .video import YoutubeVideo from .video import YoutubeVideo
@ -41,36 +45,61 @@ class YoutubeBackend(BaseBackend, ICapVideo):
def get_video(self, _id): def get_video(self, _id):
with self.browser: with self.browser:
try:
return self.browser.get_video(_id) return self.browser.get_video(_id)
except ForbiddenVideo, e:
raise ObjectNotAvailable(e)
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False): def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
import gdata.youtube.service YOUTUBE_MAX_RESULTS = 50
YOUTUBE_MAX_START_INDEX = 1000
yt_service = gdata.youtube.service.YouTubeService() yt_service = gdata.youtube.service.YouTubeService()
start_index = 1
nb_yielded = 0
while True:
query = gdata.youtube.service.YouTubeVideoQuery() query = gdata.youtube.service.YouTubeVideoQuery()
if pattern is not None:
query.vq = pattern
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby] query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
query.racy = 'include' if nsfw else 'exclude' query.racy = 'include' if nsfw else 'exclude'
if pattern:
query.categories.extend('/%s' % search_term.lower().encode('utf-8') for search_term in pattern.split()) if max_results is None or max_results > YOUTUBE_MAX_RESULTS:
query_max_results = YOUTUBE_MAX_RESULTS
else:
query_max_results = max_results
query.max_results = query_max_results
if start_index > YOUTUBE_MAX_START_INDEX:
return
query.start_index = start_index
start_index += query_max_results
feed = yt_service.YouTubeQuery(query) feed = yt_service.YouTubeQuery(query)
for entry in feed.entry: for entry in feed.entry:
video = YoutubeVideo(entry.id.text.split('/')[-1].decode('utf-8'), video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
title=entry.media.title.text.decode('utf-8').strip(), title=to_unicode(entry.media.title.text.strip()),
duration=datetime.timedelta(seconds=int(entry.media.duration.seconds.decode('utf-8').strip())), duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
thumbnail_url=entry.media.thumbnail[0].url.decode('utf-8').strip(), thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
) )
if entry.media.name: if entry.media.name:
video.author = entry.media.name.text.decode('utf-8').strip() video.author = to_unicode(entry.media.name.text.strip())
yield video yield video
nb_yielded += 1
if nb_yielded == max_results:
return
def fill_video(self, video, fields): def fill_video(self, video, fields):
if fields != ['thumbnail']: if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields # if we don't want only the thumbnail, we probably want also every fields
with self.browser: with self.browser:
try:
video = self.browser.get_video(YoutubeVideo.id2url(video.id), video) video = self.browser.get_video(YoutubeVideo.id2url(video.id), video)
except ForbiddenVideo, e:
raise ObjectNotAvailable(e)
if 'thumbnail' in fields: if 'thumbnail' in fields:
with self.browser: with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url) video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
return video return video
OBJECTS = {YoutubeVideo: fill_video} OBJECTS = {YoutubeVideo: fill_video}

View file

@ -20,6 +20,7 @@ import re
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select from weboob.tools.parsers.lxmlparser import select
from weboob.tools.misc import to_unicode
from .video import YoutubeVideo from .video import YoutubeVideo
@ -59,7 +60,7 @@ class VideoPage(BasePage):
def get_title(self): def get_title(self):
element = select(self.document.getroot(), 'meta[name=title]', 1) element = select(self.document.getroot(), 'meta[name=title]', 1)
return unicode(element.attrib['content']).strip() return to_unicode(element.attrib['content'].strip())
def get_url(self, _id): def get_url(self, _id):
video_signature = None video_signature = None

View file

@ -72,7 +72,7 @@ class ICapVideo(IBaseCap):
SEARCH_VIEWS, SEARCH_VIEWS,
SEARCH_DATE) = range(4) SEARCH_DATE) = range(4)
def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False): def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False, max_results=None):
""" """
Iter results of a search on a pattern. Note that if pattern is None, Iter results of a search on a pattern. Note that if pattern is None,
it get the latest videos. it get the latest videos.
@ -80,6 +80,7 @@ class ICapVideo(IBaseCap):
@param pattern [str] pattern to search on @param pattern [str] pattern to search on
@param sortby [enum] sort by... @param sortby [enum] sort by...
@param nsfw [bool] include non-suitable for work videos if True @param nsfw [bool] include non-suitable for work videos if True
@param max_results [int] maximum number of results to return
""" """
raise NotImplementedError() raise NotImplementedError()

View file

@ -23,7 +23,7 @@ from optparse import OptionGroup, OptionParser
from weboob.core.ouiboube import Weboob from weboob.core.ouiboube import Weboob
from weboob.tools.config.iconfig import ConfigError from weboob.tools.config.iconfig import ConfigError
from weboob.tools.backend import ObjectNotSupported from weboob.tools.backend import ObjectNotAvailable, ObjectNotSupported
from weboob.tools.misc import iter_fields from weboob.tools.misc import iter_fields
@ -201,7 +201,7 @@ class BaseApplication(object):
fields = [k for k, v in iter_fields(obj)] fields = [k for k, v in iter_fields(obj)]
try: try:
backend.fillobj(obj, fields) backend.fillobj(obj, fields)
except ObjectNotSupported, e: except (ObjectNotAvailable, ObjectNotSupported), e:
logging.warning(u'Could not retrieve required fields (%s): %s' % (','.join(fields), e)) logging.warning(u'Could not retrieve required fields (%s): %s' % (','.join(fields), e))
return obj return obj
@ -213,6 +213,7 @@ class BaseApplication(object):
yield sub yield sub
def complete(self, backend, count, selected_fields, function, *args, **kwargs): def complete(self, backend, count, selected_fields, function, *args, **kwargs):
assert count is None or count > 0
res = getattr(backend, function)(*args, **kwargs) res = getattr(backend, function)(*args, **kwargs)
if self.selected_fields: if self.selected_fields:

View file

@ -29,6 +29,7 @@ import sys
from weboob.core import CallErrors from weboob.core import CallErrors
from weboob.core.backends import BackendsConfig from weboob.core.backends import BackendsConfig
from weboob.tools.backend import ObjectNotSupported
from .base import BackendNotFound, BaseApplication from .base import BackendNotFound, BaseApplication
from .formatters.load import formatters, load_formatter from .formatters.load import formatters, load_formatter
@ -69,12 +70,14 @@ class ConsoleApplication(BaseApplication):
results_options = OptionGroup(self._parser, 'Results Options') results_options = OptionGroup(self._parser, 'Results Options')
results_options.add_option('-c', '--condition', help='filter result items to display given a boolean condition') results_options.add_option('-c', '--condition', help='filter result items to display given a boolean condition')
results_options.add_option('-n', '--count', type='int', help='get a maximum number of results (all backends merged)') results_options.add_option('-n', '--count', default='10', type='int',
help='get a maximum number of results (all backends merged)')
results_options.add_option('-s', '--select', help='select result item keys to display (comma separated)') results_options.add_option('-s', '--select', help='select result item keys to display (comma separated)')
self._parser.add_option_group(results_options) self._parser.add_option_group(results_options)
formatting_options = OptionGroup(self._parser, 'Formatting Options') formatting_options = OptionGroup(self._parser, 'Formatting Options')
formatting_options.add_option('-f', '--formatter', choices=formatters, help='select output formatter (%s)' % u','.join(formatters)) formatting_options.add_option('-f', '--formatter', choices=formatters,
help='select output formatter (%s)' % u','.join(formatters))
formatting_options.add_option('--no-header', dest='no_header', action='store_true', help='do not display header') formatting_options.add_option('--no-header', dest='no_header', action='store_true', help='do not display header')
formatting_options.add_option('--no-keys', dest='no_keys', action='store_true', help='do not display item keys') formatting_options.add_option('--no-keys', dest='no_keys', action='store_true', help='do not display item keys')
self._parser.add_option_group(formatting_options) self._parser.add_option_group(formatting_options)
@ -106,6 +109,11 @@ class ConsoleApplication(BaseApplication):
else: else:
self.condition = None self.condition = None
if self.options.count == 0:
self._parser.error('Count must be at least 1, or negative for infinite')
elif self.options.count < 0:
self.options.count = None
def _get_completions(self): def _get_completions(self):
return set(name for name, arguments, doc_string in self._commands) return set(name for name, arguments, doc_string in self._commands)

View file

@ -23,10 +23,17 @@ from logging import debug
from weboob.capabilities.base import IBaseCap, NotLoaded from weboob.capabilities.base import IBaseCap, NotLoaded
__all__ = ['BaseBackend', 'ObjectNotSupported']
__all__ = ['BaseBackend', 'ObjectNotAvailable', 'ObjectNotSupported']
class ObjectNotSupported(Exception): pass class ObjectNotAvailable(Exception):
pass
class ObjectNotSupported(Exception):
pass
class BackendStorage(object): class BackendStorage(object):
def __init__(self, name, storage): def __init__(self, name, storage):
@ -212,9 +219,12 @@ class BaseBackend(object):
if missing: if missing:
missing_fields.append(field) missing_fields.append(field)
if not missing_fields:
return obj
for key, value in self.OBJECTS.iteritems(): for key, value in self.OBJECTS.iteritems():
if isinstance(obj, key): if isinstance(obj, key):
debug('Complete %r with fields: %s' % (obj, missing_fields)) debug(u'Fill %r with fields: %s' % (obj, missing_fields))
return value(self, obj, missing_fields) or obj return value(self, obj, missing_fields) or obj
raise ObjectNotSupported('The object of type %s is not supported by the backend %s' % (type(obj), self)) raise ObjectNotSupported('The object of type %s is not supported by the backend %s' % (type(obj), self))