diff --git a/weboob/backends/youtube/backend.py b/weboob/backends/youtube/backend.py index f33a98aa..995fd22d 100644 --- a/weboob/backends/youtube/backend.py +++ b/weboob/backends/youtube/backend.py @@ -19,10 +19,8 @@ from __future__ import with_statement import datetime -try: - import gdata.youtube.service -except ImportError: - raise ImportError("Missing dependence: python-gdata") +import gdata.youtube.service +import urllib from weboob.capabilities.video import ICapVideo from weboob.tools.backend import BaseBackend, ObjectNotAvailable @@ -36,6 +34,51 @@ from .video import YoutubeVideo __all__ = ['YoutubeBackend'] +def get_video(entry): + video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()), + title=to_unicode(entry.media.title.text.strip()), + duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))), + thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()), + ) + video.author = entry.author[0].name.text.strip() + if entry.media.name: + video.author = to_unicode(entry.media.name.text.strip()) + return video + + +def get_video_url(video, format=18): + """ + Returns the YouTube video url for download or playback. + In the case of a download, if the user-chosen format is not + available, the next available format will be used. + Much of the code for this method is borrowed from youtubeservice.py of Cutetube + http://maemo.org/packages/view/cutetube/. + """ + video_url = '' + player_url = YoutubeVideo.id2url(video.id) + html = urllib.urlopen(player_url).read() + html = ''.join(html.split()) + formats = {} + pos = html.find('","fmt_url_map":"') + if (pos != -1): + pos2 = html.find('"', pos + 17) + fmt_map = urllib.unquote(html[pos + 17:pos2]) + ',' + parts = fmt_map.split('|') + key = parts[0] + for p in parts[1:]: + idx = p.rfind(',') + value = p[:idx].replace('\\/', '/') + formats[int(key)] = value + key = p[idx + 1:] + format_list = [22, 35, 34, 18, 17] + for format in format_list[format_list.index(format):]: + if format in formats: + video_url = formats.get(format) + break + break + return video_url + + class YoutubeBackend(BaseBackend, ICapVideo): NAME = 'youtube' MAINTAINER = 'Christophe Benz' @@ -46,11 +89,11 @@ class YoutubeBackend(BaseBackend, ICapVideo): BROWSER = YoutubeBrowser def get_video(self, _id): - with self.browser: - try: - return self.browser.get_video(_id) - except ForbiddenVideo, e: - raise ObjectNotAvailable(e) + yt_service = gdata.youtube.service.YouTubeService() + entry = yt_service.GetYouTubeVideoEntry(video_id=_id) + video = get_video(entry) + video.url = get_video_url(video) + return video def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): YOUTUBE_MAX_RESULTS = 50 @@ -81,29 +124,16 @@ class YoutubeBackend(BaseBackend, ICapVideo): feed = yt_service.YouTubeQuery(query) for entry in feed.entry: - video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()), - title=to_unicode(entry.media.title.text.strip()), - duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))), - thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()), - ) - if entry.media.name: - video.author = to_unicode(entry.media.name.text.strip()) - yield video + yield get_video(entry) nb_yielded += 1 if nb_yielded == max_results: return def fill_video(self, video, fields): - if fields != ['thumbnail']: - # if we don't want only the thumbnail, we probably want also every fields - with self.browser: - try: - video = self.browser.get_video(video.id, video) - except ForbiddenVideo, e: - raise ObjectNotAvailable(e) if 'thumbnail' in fields: - with self.browser: - video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + video.thumbnail.data = urllib.urlopen(video.thumbnail.url).read() + if 'url' in fields: + video.url = get_video_url(video) return video OBJECTS = {YoutubeVideo: fill_video} diff --git a/weboob/backends/youtube/browser.py b/weboob/backends/youtube/browser.py index 548928bb..2caeb747 100644 --- a/weboob/backends/youtube/browser.py +++ b/weboob/backends/youtube/browser.py @@ -33,8 +33,3 @@ class YoutubeBrowser(BaseBrowser): r'.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage, r'.*youtube\.com/verify_age\?next_url=(?P.+)': VerifyAgePage, } - - @id2url(YoutubeVideo.id2url) - def get_video(self, url, video=None): - self.location(url) - return self.page.get_video(video) diff --git a/weboob/backends/youtube/pages.py b/weboob/backends/youtube/pages.py index 9a7f540e..f80165b1 100644 --- a/weboob/backends/youtube/pages.py +++ b/weboob/backends/youtube/pages.py @@ -44,29 +44,4 @@ class VerifyAgePage(BasePage): class VideoPage(BasePage): - VIDEO_SIGNATURE_REGEX = re.compile(r'&t=([^ ,&]*)') - - def get_video(self, video=None): - if video is None: - video = YoutubeVideo(self.group_dict['id']) - video.title = self.get_title() - video.url = self.get_url(video.id) - video.author = self.get_author() - return video - - def get_author(self): - element = select(self.document.getroot(), 'a.watch-description-username strong', 1) - return element.text.strip() - - def get_title(self): - element = select(self.document.getroot(), 'meta[name=title]', 1) - return to_unicode(element.attrib['content'].strip()) - - def get_url(self, _id): - video_signature = None - for data in self.document.getiterator('script'): - if not data.text: - continue - for m in re.finditer(self.VIDEO_SIGNATURE_REGEX, data.text): - video_signature = m.group(1) - return u'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=18' % (_id, video_signature) + pass