diff --git a/weboob/applications/videoob/videoob.py b/weboob/applications/videoob/videoob.py index 19b0059e..d64302a4 100644 --- a/weboob/applications/videoob/videoob.py +++ b/weboob/applications/videoob/videoob.py @@ -51,5 +51,6 @@ class Videoob(ConsoleApplication): def command_search(self, pattern=None): self.load_backends(ICapVideo) self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos') - for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw): + for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw, + max_results=self.options.count): self.format(video, backend.name) diff --git a/weboob/backends/youtube/backend.py b/weboob/backends/youtube/backend.py index 1e5167ad..3581f4d4 100644 --- a/weboob/backends/youtube/backend.py +++ b/weboob/backends/youtube/backend.py @@ -20,10 +20,14 @@ from __future__ import with_statement import datetime +import gdata.youtube.service + from weboob.capabilities.video import ICapVideo -from weboob.tools.backend import BaseBackend +from weboob.tools.backend import BaseBackend, ObjectNotAvailable +from weboob.tools.misc import to_unicode from .browser import YoutubeBrowser +from .pages import ForbiddenVideo from .video import YoutubeVideo @@ -41,36 +45,61 @@ class YoutubeBackend(BaseBackend, ICapVideo): def get_video(self, _id): with self.browser: - return self.browser.get_video(_id) + try: + return self.browser.get_video(_id) + except ForbiddenVideo, e: + raise ObjectNotAvailable(e) - def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False): - import gdata.youtube.service + def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): + YOUTUBE_MAX_RESULTS = 50 + YOUTUBE_MAX_START_INDEX = 1000 yt_service = gdata.youtube.service.YouTubeService() - query = gdata.youtube.service.YouTubeVideoQuery() - query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby] - query.racy = 'include' if nsfw else 'exclude' - if pattern: - query.categories.extend('/%s' % search_term.lower().encode('utf-8') for search_term in pattern.split()) - feed = yt_service.YouTubeQuery(query) - for entry in feed.entry: - video = YoutubeVideo(entry.id.text.split('/')[-1].decode('utf-8'), - title=entry.media.title.text.decode('utf-8').strip(), - duration=datetime.timedelta(seconds=int(entry.media.duration.seconds.decode('utf-8').strip())), - thumbnail_url=entry.media.thumbnail[0].url.decode('utf-8').strip(), - ) - if entry.media.name: - video.author = entry.media.name.text.decode('utf-8').strip() - yield video + + start_index = 1 + nb_yielded = 0 + while True: + query = gdata.youtube.service.YouTubeVideoQuery() + if pattern is not None: + query.vq = pattern + query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby] + query.racy = 'include' if nsfw else 'exclude' + + if max_results is None or max_results > YOUTUBE_MAX_RESULTS: + query_max_results = YOUTUBE_MAX_RESULTS + else: + query_max_results = max_results + query.max_results = query_max_results + + if start_index > YOUTUBE_MAX_START_INDEX: + return + query.start_index = start_index + start_index += query_max_results + + feed = yt_service.YouTubeQuery(query) + for entry in feed.entry: + video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()), + title=to_unicode(entry.media.title.text.strip()), + duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))), + thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()), + ) + if entry.media.name: + video.author = to_unicode(entry.media.name.text.strip()) + yield video + nb_yielded += 1 + if nb_yielded == max_results: + return def fill_video(self, video, fields): if fields != ['thumbnail']: # if we don't want only the thumbnail, we probably want also every fields with self.browser: - video = self.browser.get_video(YoutubeVideo.id2url(video.id), video) + try: + video = self.browser.get_video(YoutubeVideo.id2url(video.id), video) + except ForbiddenVideo, e: + raise ObjectNotAvailable(e) if 'thumbnail' in fields: with self.browser: video.thumbnail.data = self.browser.readurl(video.thumbnail.url) - return video OBJECTS = {YoutubeVideo: fill_video} diff --git a/weboob/backends/youtube/pages.py b/weboob/backends/youtube/pages.py index 05c67018..0ed08d95 100644 --- a/weboob/backends/youtube/pages.py +++ b/weboob/backends/youtube/pages.py @@ -20,6 +20,7 @@ import re from weboob.tools.browser import BasePage from weboob.tools.parsers.lxmlparser import select +from weboob.tools.misc import to_unicode from .video import YoutubeVideo @@ -59,7 +60,7 @@ class VideoPage(BasePage): def get_title(self): element = select(self.document.getroot(), 'meta[name=title]', 1) - return unicode(element.attrib['content']).strip() + return to_unicode(element.attrib['content'].strip()) def get_url(self, _id): video_signature = None diff --git a/weboob/capabilities/video.py b/weboob/capabilities/video.py index 3b6b5ec9..196faeaf 100644 --- a/weboob/capabilities/video.py +++ b/weboob/capabilities/video.py @@ -72,7 +72,7 @@ class ICapVideo(IBaseCap): SEARCH_VIEWS, SEARCH_DATE) = range(4) - def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False): + def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False, max_results=None): """ Iter results of a search on a pattern. Note that if pattern is None, it get the latest videos. @@ -80,6 +80,7 @@ class ICapVideo(IBaseCap): @param pattern [str] pattern to search on @param sortby [enum] sort by... @param nsfw [bool] include non-suitable for work videos if True + @param max_results [int] maximum number of results to return """ raise NotImplementedError() diff --git a/weboob/tools/application/base.py b/weboob/tools/application/base.py index 4068ebf1..0127c983 100644 --- a/weboob/tools/application/base.py +++ b/weboob/tools/application/base.py @@ -23,7 +23,7 @@ from optparse import OptionGroup, OptionParser from weboob.core.ouiboube import Weboob from weboob.tools.config.iconfig import ConfigError -from weboob.tools.backend import ObjectNotSupported +from weboob.tools.backend import ObjectNotAvailable, ObjectNotSupported from weboob.tools.misc import iter_fields @@ -201,7 +201,7 @@ class BaseApplication(object): fields = [k for k, v in iter_fields(obj)] try: backend.fillobj(obj, fields) - except ObjectNotSupported, e: + except (ObjectNotAvailable, ObjectNotSupported), e: logging.warning(u'Could not retrieve required fields (%s): %s' % (','.join(fields), e)) return obj @@ -213,6 +213,7 @@ class BaseApplication(object): yield sub def complete(self, backend, count, selected_fields, function, *args, **kwargs): + assert count is None or count > 0 res = getattr(backend, function)(*args, **kwargs) if self.selected_fields: diff --git a/weboob/tools/application/console.py b/weboob/tools/application/console.py index be4580f0..b01aaf62 100644 --- a/weboob/tools/application/console.py +++ b/weboob/tools/application/console.py @@ -29,6 +29,7 @@ import sys from weboob.core import CallErrors from weboob.core.backends import BackendsConfig +from weboob.tools.backend import ObjectNotSupported from .base import BackendNotFound, BaseApplication from .formatters.load import formatters, load_formatter @@ -69,12 +70,14 @@ class ConsoleApplication(BaseApplication): results_options = OptionGroup(self._parser, 'Results Options') results_options.add_option('-c', '--condition', help='filter result items to display given a boolean condition') - results_options.add_option('-n', '--count', type='int', help='get a maximum number of results (all backends merged)') + results_options.add_option('-n', '--count', default='10', type='int', + help='get a maximum number of results (all backends merged)') results_options.add_option('-s', '--select', help='select result item keys to display (comma separated)') self._parser.add_option_group(results_options) formatting_options = OptionGroup(self._parser, 'Formatting Options') - formatting_options.add_option('-f', '--formatter', choices=formatters, help='select output formatter (%s)' % u','.join(formatters)) + formatting_options.add_option('-f', '--formatter', choices=formatters, + help='select output formatter (%s)' % u','.join(formatters)) formatting_options.add_option('--no-header', dest='no_header', action='store_true', help='do not display header') formatting_options.add_option('--no-keys', dest='no_keys', action='store_true', help='do not display item keys') self._parser.add_option_group(formatting_options) @@ -106,6 +109,11 @@ class ConsoleApplication(BaseApplication): else: self.condition = None + if self.options.count == 0: + self._parser.error('Count must be at least 1, or negative for infinite') + elif self.options.count < 0: + self.options.count = None + def _get_completions(self): return set(name for name, arguments, doc_string in self._commands) diff --git a/weboob/tools/backend.py b/weboob/tools/backend.py index 77c722ec..c2721245 100644 --- a/weboob/tools/backend.py +++ b/weboob/tools/backend.py @@ -23,10 +23,17 @@ from logging import debug from weboob.capabilities.base import IBaseCap, NotLoaded -__all__ = ['BaseBackend', 'ObjectNotSupported'] + +__all__ = ['BaseBackend', 'ObjectNotAvailable', 'ObjectNotSupported'] -class ObjectNotSupported(Exception): pass +class ObjectNotAvailable(Exception): + pass + + +class ObjectNotSupported(Exception): + pass + class BackendStorage(object): def __init__(self, name, storage): @@ -212,9 +219,12 @@ class BaseBackend(object): if missing: missing_fields.append(field) + if not missing_fields: + return obj + for key, value in self.OBJECTS.iteritems(): if isinstance(obj, key): - debug('Complete %r with fields: %s' % (obj, missing_fields)) + debug(u'Fill %r with fields: %s' % (obj, missing_fields)) return value(self, obj, missing_fields) or obj raise ObjectNotSupported('The object of type %s is not supported by the backend %s' % (type(obj), self))