163 lines
5.7 KiB
Python
163 lines
5.7 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright(C) 2010-2011 Christophe Benz, Romain Bignon
|
|
#
|
|
# This file is part of weboob.
|
|
#
|
|
# weboob is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# weboob is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
from __future__ import with_statement
|
|
|
|
import datetime
|
|
import gdata.youtube.service
|
|
import re
|
|
import urllib
|
|
|
|
from weboob.capabilities.video import ICapVideo
|
|
from weboob.tools.backend import BaseBackend
|
|
from weboob.tools.misc import to_unicode
|
|
|
|
from .browser import YoutubeBrowser
|
|
from .video import YoutubeVideo
|
|
|
|
|
|
__all__ = ['YoutubeBackend']
|
|
|
|
|
|
class YoutubeBackend(BaseBackend, ICapVideo):
|
|
NAME = 'youtube'
|
|
MAINTAINER = 'Christophe Benz'
|
|
EMAIL = 'christophe.benz@gmail.com'
|
|
VERSION = '0.9'
|
|
DESCRIPTION = 'Youtube videos website'
|
|
LICENSE = 'AGPLv3+'
|
|
BROWSER = YoutubeBrowser
|
|
|
|
URL_RE = re.compile(r'^https?://(?:\w*\.?youtube\.com/watch\?v=|youtu\.be\/|\w*\.?youtube\.com\/user\/\w+#p\/u\/\d+\/)(.*)$')
|
|
AVAILABLE_FORMATS = [38, 37, 22, 45, 35, 34, 43, 18, 6, 5, 17, 13]
|
|
FORMAT_EXTENSIONS = {
|
|
13: '3gp',
|
|
17: 'mp4',
|
|
18: 'mp4',
|
|
22: 'mp4',
|
|
37: 'mp4',
|
|
38: 'video', # You actually don't know if this will be MOV, AVI or whatever
|
|
43: 'webm',
|
|
45: 'webm',
|
|
}
|
|
|
|
def _entry2video(self, entry):
|
|
"""
|
|
Parse an entry returned by gdata and return a Video object.
|
|
"""
|
|
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
|
|
title=to_unicode(entry.media.title.text.strip()),
|
|
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
|
|
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
|
|
)
|
|
video.author = entry.author[0].name.text.strip()
|
|
if entry.media.name:
|
|
video.author = to_unicode(entry.media.name.text.strip())
|
|
return video
|
|
|
|
def _set_video_url(self, video, format=18):
|
|
"""
|
|
In the case of a download, if the user-chosen format is not
|
|
available, the next available format will be used.
|
|
Much of the code for this method is borrowed from youtubeservice.py of Cutetube
|
|
http://maemo.org/packages/view/cutetube/.
|
|
"""
|
|
player_url = YoutubeVideo.id2url(video.id)
|
|
html = urllib.urlopen(player_url).read()
|
|
html = ''.join(html.split())
|
|
formats = {}
|
|
pos = html.find('","fmt_url_map":"')
|
|
if (pos != -1):
|
|
pos2 = html.find('"', pos + 17)
|
|
fmt_map = urllib.unquote(html[pos + 17:pos2]) + ','
|
|
parts = fmt_map.split('|')
|
|
key = parts[0]
|
|
for p in parts[1:]:
|
|
idx = p.rfind(',')
|
|
value = p[:idx].replace('\\/', '/').replace('\u0026', '&').replace(',', '%2C')
|
|
formats[int(key)] = value
|
|
key = p[idx + 1:]
|
|
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
|
|
if format in formats:
|
|
video.url = formats.get(format)
|
|
video.ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
|
|
return True
|
|
|
|
return False
|
|
|
|
def get_video(self, _id):
|
|
m = self.URL_RE.match(_id)
|
|
if m:
|
|
_id = m.group(1)
|
|
|
|
yt_service = gdata.youtube.service.YouTubeService()
|
|
try:
|
|
entry = yt_service.GetYouTubeVideoEntry(video_id=_id)
|
|
except gdata.service.Error, e:
|
|
if e.args[0]['status'] == 400:
|
|
return None
|
|
raise
|
|
|
|
video = self._entry2video(entry)
|
|
self._set_video_url(video)
|
|
return video
|
|
|
|
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
|
|
YOUTUBE_MAX_RESULTS = 50
|
|
YOUTUBE_MAX_START_INDEX = 1000
|
|
yt_service = gdata.youtube.service.YouTubeService()
|
|
|
|
start_index = 1
|
|
nb_yielded = 0
|
|
while True:
|
|
query = gdata.youtube.service.YouTubeVideoQuery()
|
|
if pattern is not None:
|
|
if isinstance(pattern, unicode):
|
|
pattern = pattern.encode('utf-8')
|
|
query.vq = pattern
|
|
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
|
|
query.racy = 'include' if nsfw else 'exclude'
|
|
|
|
if max_results is None or max_results > YOUTUBE_MAX_RESULTS:
|
|
query_max_results = YOUTUBE_MAX_RESULTS
|
|
else:
|
|
query_max_results = max_results
|
|
query.max_results = query_max_results
|
|
|
|
if start_index > YOUTUBE_MAX_START_INDEX:
|
|
return
|
|
query.start_index = start_index
|
|
start_index += query_max_results
|
|
|
|
feed = yt_service.YouTubeQuery(query)
|
|
for entry in feed.entry:
|
|
yield self._entry2video(entry)
|
|
nb_yielded += 1
|
|
if nb_yielded == max_results:
|
|
return
|
|
|
|
def fill_video(self, video, fields):
|
|
if 'thumbnail' in fields:
|
|
video.thumbnail.data = urllib.urlopen(video.thumbnail.url).read()
|
|
if 'url' in fields:
|
|
self._set_video_url(video)
|
|
return video
|
|
|
|
OBJECTS = {YoutubeVideo: fill_video}
|