# -*- coding: utf-8 -*- # Copyright(C) 2010-2011 Christophe Benz, Romain Bignon # # This file is part of weboob. # # weboob is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # weboob is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . from __future__ import with_statement import datetime import gdata.youtube.service import re import urllib from weboob.capabilities.video import ICapVideo from weboob.tools.backend import BaseBackend from weboob.tools.misc import to_unicode from .browser import YoutubeBrowser from .video import YoutubeVideo __all__ = ['YoutubeBackend'] class YoutubeBackend(BaseBackend, ICapVideo): NAME = 'youtube' MAINTAINER = 'Christophe Benz' EMAIL = 'christophe.benz@gmail.com' VERSION = '0.8' DESCRIPTION = 'Youtube videos website' LICENSE = 'AGPLv3+' BROWSER = YoutubeBrowser URL_RE = re.compile(r'https?://.*youtube.com/watch\?v=(.*)') AVAILABLE_FORMATS = [38, 37, 22, 45, 35, 34, 43, 18, 6, 5, 17, 13] FORMAT_EXTENSIONS = { 13: '3gp', 17: 'mp4', 18: 'mp4', 22: 'mp4', 37: 'mp4', 38: 'video', # You actually don't know if this will be MOV, AVI or whatever 43: 'webm', 45: 'webm', } def _entry2video(self, entry): """ Parse an entry returned by gdata and return a Video object. """ video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()), title=to_unicode(entry.media.title.text.strip()), duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))), thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()), ) video.author = entry.author[0].name.text.strip() if entry.media.name: video.author = to_unicode(entry.media.name.text.strip()) return video def _set_video_url(self, video, format=18): """ In the case of a download, if the user-chosen format is not available, the next available format will be used. Much of the code for this method is borrowed from youtubeservice.py of Cutetube http://maemo.org/packages/view/cutetube/. """ player_url = YoutubeVideo.id2url(video.id) html = urllib.urlopen(player_url).read() html = ''.join(html.split()) formats = {} pos = html.find('","fmt_url_map":"') if (pos != -1): pos2 = html.find('"', pos + 17) fmt_map = urllib.unquote(html[pos + 17:pos2]) + ',' parts = fmt_map.split('|') key = parts[0] for p in parts[1:]: idx = p.rfind(',') value = p[:idx].replace('\\/', '/').replace('\u0026', '&').replace(',', '%2C') formats[int(key)] = value key = p[idx + 1:] for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]: if format in formats: video.url = formats.get(format) video.ext = self.FORMAT_EXTENSIONS.get(format, 'flv') return True return False def get_video(self, _id): m = self.URL_RE.match(_id) if m: _id = m.group(1) yt_service = gdata.youtube.service.YouTubeService() try: entry = yt_service.GetYouTubeVideoEntry(video_id=_id) except gdata.service.Error, e: if e.args[0]['status'] == 400: return None raise video = self._entry2video(entry) self._set_video_url(video) return video def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): YOUTUBE_MAX_RESULTS = 50 YOUTUBE_MAX_START_INDEX = 1000 yt_service = gdata.youtube.service.YouTubeService() start_index = 1 nb_yielded = 0 while True: query = gdata.youtube.service.YouTubeVideoQuery() if pattern is not None: if isinstance(pattern, unicode): pattern = pattern.encode('utf-8') query.vq = pattern query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby] query.racy = 'include' if nsfw else 'exclude' if max_results is None or max_results > YOUTUBE_MAX_RESULTS: query_max_results = YOUTUBE_MAX_RESULTS else: query_max_results = max_results query.max_results = query_max_results if start_index > YOUTUBE_MAX_START_INDEX: return query.start_index = start_index start_index += query_max_results feed = yt_service.YouTubeQuery(query) for entry in feed.entry: yield self._entry2video(entry) nb_yielded += 1 if nb_yielded == max_results: return def fill_video(self, video, fields): if 'thumbnail' in fields: video.thumbnail.data = urllib.urlopen(video.thumbnail.url).read() if 'url' in fields: self._set_video_url(video) return video OBJECTS = {YoutubeVideo: fill_video}