fix youtube backend (closes #450)

Signed-off-by: Romain Bignon <romain@peerfuse.org>
This commit is contained in:
Christophe Benz 2011-01-05 18:01:48 +01:00 committed by Romain Bignon
commit efec968f80
3 changed files with 57 additions and 57 deletions

View file

@ -19,10 +19,8 @@
from __future__ import with_statement
import datetime
try:
import gdata.youtube.service
except ImportError:
raise ImportError("Missing dependence: python-gdata")
import gdata.youtube.service
import urllib
from weboob.capabilities.video import ICapVideo
from weboob.tools.backend import BaseBackend, ObjectNotAvailable
@ -36,6 +34,51 @@ from .video import YoutubeVideo
__all__ = ['YoutubeBackend']
def get_video(entry):
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
title=to_unicode(entry.media.title.text.strip()),
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
)
video.author = entry.author[0].name.text.strip()
if entry.media.name:
video.author = to_unicode(entry.media.name.text.strip())
return video
def get_video_url(video, format=18):
"""
Returns the YouTube video url for download or playback.
In the case of a download, if the user-chosen format is not
available, the next available format will be used.
Much of the code for this method is borrowed from youtubeservice.py of Cutetube
http://maemo.org/packages/view/cutetube/.
"""
video_url = ''
player_url = YoutubeVideo.id2url(video.id)
html = urllib.urlopen(player_url).read()
html = ''.join(html.split())
formats = {}
pos = html.find('","fmt_url_map":"')
if (pos != -1):
pos2 = html.find('"', pos + 17)
fmt_map = urllib.unquote(html[pos + 17:pos2]) + ','
parts = fmt_map.split('|')
key = parts[0]
for p in parts[1:]:
idx = p.rfind(',')
value = p[:idx].replace('\\/', '/')
formats[int(key)] = value
key = p[idx + 1:]
format_list = [22, 35, 34, 18, 17]
for format in format_list[format_list.index(format):]:
if format in formats:
video_url = formats.get(format)
break
break
return video_url
class YoutubeBackend(BaseBackend, ICapVideo):
NAME = 'youtube'
MAINTAINER = 'Christophe Benz'
@ -46,11 +89,11 @@ class YoutubeBackend(BaseBackend, ICapVideo):
BROWSER = YoutubeBrowser
def get_video(self, _id):
with self.browser:
try:
return self.browser.get_video(_id)
except ForbiddenVideo, e:
raise ObjectNotAvailable(e)
yt_service = gdata.youtube.service.YouTubeService()
entry = yt_service.GetYouTubeVideoEntry(video_id=_id)
video = get_video(entry)
video.url = get_video_url(video)
return video
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
YOUTUBE_MAX_RESULTS = 50
@ -81,29 +124,16 @@ class YoutubeBackend(BaseBackend, ICapVideo):
feed = yt_service.YouTubeQuery(query)
for entry in feed.entry:
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
title=to_unicode(entry.media.title.text.strip()),
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
)
if entry.media.name:
video.author = to_unicode(entry.media.name.text.strip())
yield video
yield get_video(entry)
nb_yielded += 1
if nb_yielded == max_results:
return
def fill_video(self, video, fields):
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
with self.browser:
try:
video = self.browser.get_video(video.id, video)
except ForbiddenVideo, e:
raise ObjectNotAvailable(e)
if 'thumbnail' in fields:
with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
video.thumbnail.data = urllib.urlopen(video.thumbnail.url).read()
if 'url' in fields:
video.url = get_video_url(video)
return video
OBJECTS = {YoutubeVideo: fill_video}

View file

@ -33,8 +33,3 @@ class YoutubeBrowser(BaseBrowser):
r'.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage,
r'.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage,
}
@id2url(YoutubeVideo.id2url)
def get_video(self, url, video=None):
self.location(url)
return self.page.get_video(video)

View file

@ -44,29 +44,4 @@ class VerifyAgePage(BasePage):
class VideoPage(BasePage):
VIDEO_SIGNATURE_REGEX = re.compile(r'&t=([^ ,&]*)')
def get_video(self, video=None):
if video is None:
video = YoutubeVideo(self.group_dict['id'])
video.title = self.get_title()
video.url = self.get_url(video.id)
video.author = self.get_author()
return video
def get_author(self):
element = select(self.document.getroot(), 'a.watch-description-username strong', 1)
return element.text.strip()
def get_title(self):
element = select(self.document.getroot(), 'meta[name=title]', 1)
return to_unicode(element.attrib['content'].strip())
def get_url(self, _id):
video_signature = None
for data in self.document.getiterator('script'):
if not data.text:
continue
for m in re.finditer(self.VIDEO_SIGNATURE_REGEX, data.text):
video_signature = m.group(1)
return u'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=18' % (_id, video_signature)
pass