implement max_results option and infinite search for youtube
This commit is contained in:
parent
6ecf722f66
commit
53f6571fee
7 changed files with 82 additions and 31 deletions
|
|
@ -51,5 +51,6 @@ class Videoob(ConsoleApplication):
|
||||||
def command_search(self, pattern=None):
|
def command_search(self, pattern=None):
|
||||||
self.load_backends(ICapVideo)
|
self.load_backends(ICapVideo)
|
||||||
self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos')
|
self.set_formatter_header(u'Search pattern: %s' % pattern if pattern else u'Latest videos')
|
||||||
for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw):
|
for backend, video in self.do('iter_search_results', pattern=pattern, nsfw=self.options.nsfw,
|
||||||
|
max_results=self.options.count):
|
||||||
self.format(video, backend.name)
|
self.format(video, backend.name)
|
||||||
|
|
|
||||||
|
|
@ -20,10 +20,14 @@ from __future__ import with_statement
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
import gdata.youtube.service
|
||||||
|
|
||||||
from weboob.capabilities.video import ICapVideo
|
from weboob.capabilities.video import ICapVideo
|
||||||
from weboob.tools.backend import BaseBackend
|
from weboob.tools.backend import BaseBackend, ObjectNotAvailable
|
||||||
|
from weboob.tools.misc import to_unicode
|
||||||
|
|
||||||
from .browser import YoutubeBrowser
|
from .browser import YoutubeBrowser
|
||||||
|
from .pages import ForbiddenVideo
|
||||||
from .video import YoutubeVideo
|
from .video import YoutubeVideo
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -41,36 +45,61 @@ class YoutubeBackend(BaseBackend, ICapVideo):
|
||||||
|
|
||||||
def get_video(self, _id):
|
def get_video(self, _id):
|
||||||
with self.browser:
|
with self.browser:
|
||||||
return self.browser.get_video(_id)
|
try:
|
||||||
|
return self.browser.get_video(_id)
|
||||||
|
except ForbiddenVideo, e:
|
||||||
|
raise ObjectNotAvailable(e)
|
||||||
|
|
||||||
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False):
|
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
|
||||||
import gdata.youtube.service
|
YOUTUBE_MAX_RESULTS = 50
|
||||||
|
YOUTUBE_MAX_START_INDEX = 1000
|
||||||
yt_service = gdata.youtube.service.YouTubeService()
|
yt_service = gdata.youtube.service.YouTubeService()
|
||||||
query = gdata.youtube.service.YouTubeVideoQuery()
|
|
||||||
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
|
start_index = 1
|
||||||
query.racy = 'include' if nsfw else 'exclude'
|
nb_yielded = 0
|
||||||
if pattern:
|
while True:
|
||||||
query.categories.extend('/%s' % search_term.lower().encode('utf-8') for search_term in pattern.split())
|
query = gdata.youtube.service.YouTubeVideoQuery()
|
||||||
feed = yt_service.YouTubeQuery(query)
|
if pattern is not None:
|
||||||
for entry in feed.entry:
|
query.vq = pattern
|
||||||
video = YoutubeVideo(entry.id.text.split('/')[-1].decode('utf-8'),
|
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
|
||||||
title=entry.media.title.text.decode('utf-8').strip(),
|
query.racy = 'include' if nsfw else 'exclude'
|
||||||
duration=datetime.timedelta(seconds=int(entry.media.duration.seconds.decode('utf-8').strip())),
|
|
||||||
thumbnail_url=entry.media.thumbnail[0].url.decode('utf-8').strip(),
|
if max_results is None or max_results > YOUTUBE_MAX_RESULTS:
|
||||||
)
|
query_max_results = YOUTUBE_MAX_RESULTS
|
||||||
if entry.media.name:
|
else:
|
||||||
video.author = entry.media.name.text.decode('utf-8').strip()
|
query_max_results = max_results
|
||||||
yield video
|
query.max_results = query_max_results
|
||||||
|
|
||||||
|
if start_index > YOUTUBE_MAX_START_INDEX:
|
||||||
|
return
|
||||||
|
query.start_index = start_index
|
||||||
|
start_index += query_max_results
|
||||||
|
|
||||||
|
feed = yt_service.YouTubeQuery(query)
|
||||||
|
for entry in feed.entry:
|
||||||
|
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
|
||||||
|
title=to_unicode(entry.media.title.text.strip()),
|
||||||
|
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
|
||||||
|
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
|
||||||
|
)
|
||||||
|
if entry.media.name:
|
||||||
|
video.author = to_unicode(entry.media.name.text.strip())
|
||||||
|
yield video
|
||||||
|
nb_yielded += 1
|
||||||
|
if nb_yielded == max_results:
|
||||||
|
return
|
||||||
|
|
||||||
def fill_video(self, video, fields):
|
def fill_video(self, video, fields):
|
||||||
if fields != ['thumbnail']:
|
if fields != ['thumbnail']:
|
||||||
# if we don't want only the thumbnail, we probably want also every fields
|
# if we don't want only the thumbnail, we probably want also every fields
|
||||||
with self.browser:
|
with self.browser:
|
||||||
video = self.browser.get_video(YoutubeVideo.id2url(video.id), video)
|
try:
|
||||||
|
video = self.browser.get_video(YoutubeVideo.id2url(video.id), video)
|
||||||
|
except ForbiddenVideo, e:
|
||||||
|
raise ObjectNotAvailable(e)
|
||||||
if 'thumbnail' in fields:
|
if 'thumbnail' in fields:
|
||||||
with self.browser:
|
with self.browser:
|
||||||
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
|
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
|
||||||
|
|
||||||
return video
|
return video
|
||||||
|
|
||||||
OBJECTS = {YoutubeVideo: fill_video}
|
OBJECTS = {YoutubeVideo: fill_video}
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ import re
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
from weboob.tools.parsers.lxmlparser import select
|
from weboob.tools.parsers.lxmlparser import select
|
||||||
|
from weboob.tools.misc import to_unicode
|
||||||
|
|
||||||
from .video import YoutubeVideo
|
from .video import YoutubeVideo
|
||||||
|
|
||||||
|
|
@ -59,7 +60,7 @@ class VideoPage(BasePage):
|
||||||
|
|
||||||
def get_title(self):
|
def get_title(self):
|
||||||
element = select(self.document.getroot(), 'meta[name=title]', 1)
|
element = select(self.document.getroot(), 'meta[name=title]', 1)
|
||||||
return unicode(element.attrib['content']).strip()
|
return to_unicode(element.attrib['content'].strip())
|
||||||
|
|
||||||
def get_url(self, _id):
|
def get_url(self, _id):
|
||||||
video_signature = None
|
video_signature = None
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,7 @@ class ICapVideo(IBaseCap):
|
||||||
SEARCH_VIEWS,
|
SEARCH_VIEWS,
|
||||||
SEARCH_DATE) = range(4)
|
SEARCH_DATE) = range(4)
|
||||||
|
|
||||||
def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False):
|
def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False, max_results=None):
|
||||||
"""
|
"""
|
||||||
Iter results of a search on a pattern. Note that if pattern is None,
|
Iter results of a search on a pattern. Note that if pattern is None,
|
||||||
it get the latest videos.
|
it get the latest videos.
|
||||||
|
|
@ -80,6 +80,7 @@ class ICapVideo(IBaseCap):
|
||||||
@param pattern [str] pattern to search on
|
@param pattern [str] pattern to search on
|
||||||
@param sortby [enum] sort by...
|
@param sortby [enum] sort by...
|
||||||
@param nsfw [bool] include non-suitable for work videos if True
|
@param nsfw [bool] include non-suitable for work videos if True
|
||||||
|
@param max_results [int] maximum number of results to return
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ from optparse import OptionGroup, OptionParser
|
||||||
|
|
||||||
from weboob.core.ouiboube import Weboob
|
from weboob.core.ouiboube import Weboob
|
||||||
from weboob.tools.config.iconfig import ConfigError
|
from weboob.tools.config.iconfig import ConfigError
|
||||||
from weboob.tools.backend import ObjectNotSupported
|
from weboob.tools.backend import ObjectNotAvailable, ObjectNotSupported
|
||||||
from weboob.tools.misc import iter_fields
|
from weboob.tools.misc import iter_fields
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -201,7 +201,7 @@ class BaseApplication(object):
|
||||||
fields = [k for k, v in iter_fields(obj)]
|
fields = [k for k, v in iter_fields(obj)]
|
||||||
try:
|
try:
|
||||||
backend.fillobj(obj, fields)
|
backend.fillobj(obj, fields)
|
||||||
except ObjectNotSupported, e:
|
except (ObjectNotAvailable, ObjectNotSupported), e:
|
||||||
logging.warning(u'Could not retrieve required fields (%s): %s' % (','.join(fields), e))
|
logging.warning(u'Could not retrieve required fields (%s): %s' % (','.join(fields), e))
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
@ -213,6 +213,7 @@ class BaseApplication(object):
|
||||||
yield sub
|
yield sub
|
||||||
|
|
||||||
def complete(self, backend, count, selected_fields, function, *args, **kwargs):
|
def complete(self, backend, count, selected_fields, function, *args, **kwargs):
|
||||||
|
assert count is None or count > 0
|
||||||
res = getattr(backend, function)(*args, **kwargs)
|
res = getattr(backend, function)(*args, **kwargs)
|
||||||
|
|
||||||
if self.selected_fields:
|
if self.selected_fields:
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import sys
|
||||||
|
|
||||||
from weboob.core import CallErrors
|
from weboob.core import CallErrors
|
||||||
from weboob.core.backends import BackendsConfig
|
from weboob.core.backends import BackendsConfig
|
||||||
|
from weboob.tools.backend import ObjectNotSupported
|
||||||
|
|
||||||
from .base import BackendNotFound, BaseApplication
|
from .base import BackendNotFound, BaseApplication
|
||||||
from .formatters.load import formatters, load_formatter
|
from .formatters.load import formatters, load_formatter
|
||||||
|
|
@ -69,12 +70,14 @@ class ConsoleApplication(BaseApplication):
|
||||||
|
|
||||||
results_options = OptionGroup(self._parser, 'Results Options')
|
results_options = OptionGroup(self._parser, 'Results Options')
|
||||||
results_options.add_option('-c', '--condition', help='filter result items to display given a boolean condition')
|
results_options.add_option('-c', '--condition', help='filter result items to display given a boolean condition')
|
||||||
results_options.add_option('-n', '--count', type='int', help='get a maximum number of results (all backends merged)')
|
results_options.add_option('-n', '--count', default='10', type='int',
|
||||||
|
help='get a maximum number of results (all backends merged)')
|
||||||
results_options.add_option('-s', '--select', help='select result item keys to display (comma separated)')
|
results_options.add_option('-s', '--select', help='select result item keys to display (comma separated)')
|
||||||
self._parser.add_option_group(results_options)
|
self._parser.add_option_group(results_options)
|
||||||
|
|
||||||
formatting_options = OptionGroup(self._parser, 'Formatting Options')
|
formatting_options = OptionGroup(self._parser, 'Formatting Options')
|
||||||
formatting_options.add_option('-f', '--formatter', choices=formatters, help='select output formatter (%s)' % u','.join(formatters))
|
formatting_options.add_option('-f', '--formatter', choices=formatters,
|
||||||
|
help='select output formatter (%s)' % u','.join(formatters))
|
||||||
formatting_options.add_option('--no-header', dest='no_header', action='store_true', help='do not display header')
|
formatting_options.add_option('--no-header', dest='no_header', action='store_true', help='do not display header')
|
||||||
formatting_options.add_option('--no-keys', dest='no_keys', action='store_true', help='do not display item keys')
|
formatting_options.add_option('--no-keys', dest='no_keys', action='store_true', help='do not display item keys')
|
||||||
self._parser.add_option_group(formatting_options)
|
self._parser.add_option_group(formatting_options)
|
||||||
|
|
@ -106,6 +109,11 @@ class ConsoleApplication(BaseApplication):
|
||||||
else:
|
else:
|
||||||
self.condition = None
|
self.condition = None
|
||||||
|
|
||||||
|
if self.options.count == 0:
|
||||||
|
self._parser.error('Count must be at least 1, or negative for infinite')
|
||||||
|
elif self.options.count < 0:
|
||||||
|
self.options.count = None
|
||||||
|
|
||||||
def _get_completions(self):
|
def _get_completions(self):
|
||||||
return set(name for name, arguments, doc_string in self._commands)
|
return set(name for name, arguments, doc_string in self._commands)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,10 +23,17 @@ from logging import debug
|
||||||
|
|
||||||
from weboob.capabilities.base import IBaseCap, NotLoaded
|
from weboob.capabilities.base import IBaseCap, NotLoaded
|
||||||
|
|
||||||
__all__ = ['BaseBackend', 'ObjectNotSupported']
|
|
||||||
|
__all__ = ['BaseBackend', 'ObjectNotAvailable', 'ObjectNotSupported']
|
||||||
|
|
||||||
|
|
||||||
class ObjectNotSupported(Exception): pass
|
class ObjectNotAvailable(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ObjectNotSupported(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class BackendStorage(object):
|
class BackendStorage(object):
|
||||||
def __init__(self, name, storage):
|
def __init__(self, name, storage):
|
||||||
|
|
@ -212,9 +219,12 @@ class BaseBackend(object):
|
||||||
if missing:
|
if missing:
|
||||||
missing_fields.append(field)
|
missing_fields.append(field)
|
||||||
|
|
||||||
|
if not missing_fields:
|
||||||
|
return obj
|
||||||
|
|
||||||
for key, value in self.OBJECTS.iteritems():
|
for key, value in self.OBJECTS.iteritems():
|
||||||
if isinstance(obj, key):
|
if isinstance(obj, key):
|
||||||
debug('Complete %r with fields: %s' % (obj, missing_fields))
|
debug(u'Fill %r with fields: %s' % (obj, missing_fields))
|
||||||
return value(self, obj, missing_fields) or obj
|
return value(self, obj, missing_fields) or obj
|
||||||
|
|
||||||
raise ObjectNotSupported('The object of type %s is not supported by the backend %s' % (type(obj), self))
|
raise ObjectNotSupported('The object of type %s is not supported by the backend %s' % (type(obj), self))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue