implement max_results option and infinite search for youtube
This commit is contained in:
parent
6ecf722f66
commit
53f6571fee
7 changed files with 82 additions and 31 deletions
|
|
@ -51,5 +51,6 @@ class Videoob(ConsoleApplication):
|
|||
def command_search(self, pattern=None):
|
||||
self.load_backends(ICapVideo)
|
||||
self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos')
|
||||
for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw):
|
||||
for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw,
|
||||
max_results=self.options.count):
|
||||
self.format(video, backend.name)
|
||||
|
|
|
|||
|
|
@ -20,10 +20,14 @@ from __future__ import with_statement
|
|||
|
||||
import datetime
|
||||
|
||||
import gdata.youtube.service
|
||||
|
||||
from weboob.capabilities.video import ICapVideo
|
||||
from weboob.tools.backend import BaseBackend
|
||||
from weboob.tools.backend import BaseBackend, ObjectNotAvailable
|
||||
from weboob.tools.misc import to_unicode
|
||||
|
||||
from .browser import YoutubeBrowser
|
||||
from .pages import ForbiddenVideo
|
||||
from .video import YoutubeVideo
|
||||
|
||||
|
||||
|
|
@ -41,36 +45,61 @@ class YoutubeBackend(BaseBackend, ICapVideo):
|
|||
|
||||
def get_video(self, _id):
|
||||
with self.browser:
|
||||
return self.browser.get_video(_id)
|
||||
try:
|
||||
return self.browser.get_video(_id)
|
||||
except ForbiddenVideo, e:
|
||||
raise ObjectNotAvailable(e)
|
||||
|
||||
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False):
|
||||
import gdata.youtube.service
|
||||
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
|
||||
YOUTUBE_MAX_RESULTS = 50
|
||||
YOUTUBE_MAX_START_INDEX = 1000
|
||||
yt_service = gdata.youtube.service.YouTubeService()
|
||||
query = gdata.youtube.service.YouTubeVideoQuery()
|
||||
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
|
||||
query.racy = 'include' if nsfw else 'exclude'
|
||||
if pattern:
|
||||
query.categories.extend('/%s' % search_term.lower().encode('utf-8') for search_term in pattern.split())
|
||||
feed = yt_service.YouTubeQuery(query)
|
||||
for entry in feed.entry:
|
||||
video = YoutubeVideo(entry.id.text.split('/')[-1].decode('utf-8'),
|
||||
title=entry.media.title.text.decode('utf-8').strip(),
|
||||
duration=datetime.timedelta(seconds=int(entry.media.duration.seconds.decode('utf-8').strip())),
|
||||
thumbnail_url=entry.media.thumbnail[0].url.decode('utf-8').strip(),
|
||||
)
|
||||
if entry.media.name:
|
||||
video.author = entry.media.name.text.decode('utf-8').strip()
|
||||
yield video
|
||||
|
||||
start_index = 1
|
||||
nb_yielded = 0
|
||||
while True:
|
||||
query = gdata.youtube.service.YouTubeVideoQuery()
|
||||
if pattern is not None:
|
||||
query.vq = pattern
|
||||
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
|
||||
query.racy = 'include' if nsfw else 'exclude'
|
||||
|
||||
if max_results is None or max_results > YOUTUBE_MAX_RESULTS:
|
||||
query_max_results = YOUTUBE_MAX_RESULTS
|
||||
else:
|
||||
query_max_results = max_results
|
||||
query.max_results = query_max_results
|
||||
|
||||
if start_index > YOUTUBE_MAX_START_INDEX:
|
||||
return
|
||||
query.start_index = start_index
|
||||
start_index += query_max_results
|
||||
|
||||
feed = yt_service.YouTubeQuery(query)
|
||||
for entry in feed.entry:
|
||||
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
|
||||
title=to_unicode(entry.media.title.text.strip()),
|
||||
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
|
||||
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
|
||||
)
|
||||
if entry.media.name:
|
||||
video.author = to_unicode(entry.media.name.text.strip())
|
||||
yield video
|
||||
nb_yielded += 1
|
||||
if nb_yielded == max_results:
|
||||
return
|
||||
|
||||
def fill_video(self, video, fields):
|
||||
if fields != ['thumbnail']:
|
||||
# if we don't want only the thumbnail, we probably want also every fields
|
||||
with self.browser:
|
||||
video = self.browser.get_video(YoutubeVideo.id2url(video.id), video)
|
||||
try:
|
||||
video = self.browser.get_video(YoutubeVideo.id2url(video.id), video)
|
||||
except ForbiddenVideo, e:
|
||||
raise ObjectNotAvailable(e)
|
||||
if 'thumbnail' in fields:
|
||||
with self.browser:
|
||||
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
|
||||
|
||||
return video
|
||||
|
||||
OBJECTS = {YoutubeVideo: fill_video}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import re
|
|||
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.tools.parsers.lxmlparser import select
|
||||
from weboob.tools.misc import to_unicode
|
||||
|
||||
from .video import YoutubeVideo
|
||||
|
||||
|
|
@ -59,7 +60,7 @@ class VideoPage(BasePage):
|
|||
|
||||
def get_title(self):
|
||||
element = select(self.document.getroot(), 'meta[name=title]', 1)
|
||||
return unicode(element.attrib['content']).strip()
|
||||
return to_unicode(element.attrib['content'].strip())
|
||||
|
||||
def get_url(self, _id):
|
||||
video_signature = None
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class ICapVideo(IBaseCap):
|
|||
SEARCH_VIEWS,
|
||||
SEARCH_DATE) = range(4)
|
||||
|
||||
def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False):
|
||||
def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False, max_results=None):
|
||||
"""
|
||||
Iter results of a search on a pattern. Note that if pattern is None,
|
||||
it get the latest videos.
|
||||
|
|
@ -80,6 +80,7 @@ class ICapVideo(IBaseCap):
|
|||
@param pattern [str] pattern to search on
|
||||
@param sortby [enum] sort by...
|
||||
@param nsfw [bool] include non-suitable for work videos if True
|
||||
@param max_results [int] maximum number of results to return
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ from optparse import OptionGroup, OptionParser
|
|||
|
||||
from weboob.core.ouiboube import Weboob
|
||||
from weboob.tools.config.iconfig import ConfigError
|
||||
from weboob.tools.backend import ObjectNotSupported
|
||||
from weboob.tools.backend import ObjectNotAvailable, ObjectNotSupported
|
||||
from weboob.tools.misc import iter_fields
|
||||
|
||||
|
||||
|
|
@ -201,7 +201,7 @@ class BaseApplication(object):
|
|||
fields = [k for k, v in iter_fields(obj)]
|
||||
try:
|
||||
backend.fillobj(obj, fields)
|
||||
except ObjectNotSupported, e:
|
||||
except (ObjectNotAvailable, ObjectNotSupported), e:
|
||||
logging.warning(u'Could not retrieve required fields (%s): %s' % (','.join(fields), e))
|
||||
return obj
|
||||
|
||||
|
|
@ -213,6 +213,7 @@ class BaseApplication(object):
|
|||
yield sub
|
||||
|
||||
def complete(self, backend, count, selected_fields, function, *args, **kwargs):
|
||||
assert count is None or count > 0
|
||||
res = getattr(backend, function)(*args, **kwargs)
|
||||
|
||||
if self.selected_fields:
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import sys
|
|||
|
||||
from weboob.core import CallErrors
|
||||
from weboob.core.backends import BackendsConfig
|
||||
from weboob.tools.backend import ObjectNotSupported
|
||||
|
||||
from .base import BackendNotFound, BaseApplication
|
||||
from .formatters.load import formatters, load_formatter
|
||||
|
|
@ -69,12 +70,14 @@ class ConsoleApplication(BaseApplication):
|
|||
|
||||
results_options = OptionGroup(self._parser, 'Results Options')
|
||||
results_options.add_option('-c', '--condition', help='filter result items to display given a boolean condition')
|
||||
results_options.add_option('-n', '--count', type='int', help='get a maximum number of results (all backends merged)')
|
||||
results_options.add_option('-n', '--count', default='10', type='int',
|
||||
help='get a maximum number of results (all backends merged)')
|
||||
results_options.add_option('-s', '--select', help='select result item keys to display (comma separated)')
|
||||
self._parser.add_option_group(results_options)
|
||||
|
||||
formatting_options = OptionGroup(self._parser, 'Formatting Options')
|
||||
formatting_options.add_option('-f', '--formatter', choices=formatters, help='select output formatter (%s)' % u','.join(formatters))
|
||||
formatting_options.add_option('-f', '--formatter', choices=formatters,
|
||||
help='select output formatter (%s)' % u','.join(formatters))
|
||||
formatting_options.add_option('--no-header', dest='no_header', action='store_true', help='do not display header')
|
||||
formatting_options.add_option('--no-keys', dest='no_keys', action='store_true', help='do not display item keys')
|
||||
self._parser.add_option_group(formatting_options)
|
||||
|
|
@ -106,6 +109,11 @@ class ConsoleApplication(BaseApplication):
|
|||
else:
|
||||
self.condition = None
|
||||
|
||||
if self.options.count == 0:
|
||||
self._parser.error('Count must be at least 1, or negative for infinite')
|
||||
elif self.options.count < 0:
|
||||
self.options.count = None
|
||||
|
||||
def _get_completions(self):
|
||||
return set(name for name, arguments, doc_string in self._commands)
|
||||
|
||||
|
|
|
|||
|
|
@ -23,10 +23,17 @@ from logging import debug
|
|||
|
||||
from weboob.capabilities.base import IBaseCap, NotLoaded
|
||||
|
||||
__all__ = ['BaseBackend', 'ObjectNotSupported']
|
||||
|
||||
__all__ = ['BaseBackend', 'ObjectNotAvailable', 'ObjectNotSupported']
|
||||
|
||||
|
||||
class ObjectNotSupported(Exception): pass
|
||||
class ObjectNotAvailable(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ObjectNotSupported(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class BackendStorage(object):
|
||||
def __init__(self, name, storage):
|
||||
|
|
@ -212,9 +219,12 @@ class BaseBackend(object):
|
|||
if missing:
|
||||
missing_fields.append(field)
|
||||
|
||||
if not missing_fields:
|
||||
return obj
|
||||
|
||||
for key, value in self.OBJECTS.iteritems():
|
||||
if isinstance(obj, key):
|
||||
debug('Complete %r with fields: %s' % (obj, missing_fields))
|
||||
debug(u'Fill %r with fields: %s' % (obj, missing_fields))
|
||||
return value(self, obj, missing_fields) or obj
|
||||
|
||||
raise ObjectNotSupported('The object of type %s is not supported by the backend %s' % (type(obj), self))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue