fix youtube backend (closes #450)
Signed-off-by: Romain Bignon <romain@peerfuse.org>
This commit is contained in:
parent
bc606d3640
commit
efec968f80
3 changed files with 57 additions and 57 deletions
|
|
@ -19,10 +19,8 @@
|
||||||
from __future__ import with_statement
|
from __future__ import with_statement
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
try:
|
import gdata.youtube.service
|
||||||
import gdata.youtube.service
|
import urllib
|
||||||
except ImportError:
|
|
||||||
raise ImportError("Missing dependence: python-gdata")
|
|
||||||
|
|
||||||
from weboob.capabilities.video import ICapVideo
|
from weboob.capabilities.video import ICapVideo
|
||||||
from weboob.tools.backend import BaseBackend, ObjectNotAvailable
|
from weboob.tools.backend import BaseBackend, ObjectNotAvailable
|
||||||
|
|
@ -36,6 +34,51 @@ from .video import YoutubeVideo
|
||||||
__all__ = ['YoutubeBackend']
|
__all__ = ['YoutubeBackend']
|
||||||
|
|
||||||
|
|
||||||
|
def get_video(entry):
|
||||||
|
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
|
||||||
|
title=to_unicode(entry.media.title.text.strip()),
|
||||||
|
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
|
||||||
|
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
|
||||||
|
)
|
||||||
|
video.author = entry.author[0].name.text.strip()
|
||||||
|
if entry.media.name:
|
||||||
|
video.author = to_unicode(entry.media.name.text.strip())
|
||||||
|
return video
|
||||||
|
|
||||||
|
|
||||||
|
def get_video_url(video, format=18):
|
||||||
|
"""
|
||||||
|
Returns the YouTube video url for download or playback.
|
||||||
|
In the case of a download, if the user-chosen format is not
|
||||||
|
available, the next available format will be used.
|
||||||
|
Much of the code for this method is borrowed from youtubeservice.py of Cutetube
|
||||||
|
http://maemo.org/packages/view/cutetube/.
|
||||||
|
"""
|
||||||
|
video_url = ''
|
||||||
|
player_url = YoutubeVideo.id2url(video.id)
|
||||||
|
html = urllib.urlopen(player_url).read()
|
||||||
|
html = ''.join(html.split())
|
||||||
|
formats = {}
|
||||||
|
pos = html.find('","fmt_url_map":"')
|
||||||
|
if (pos != -1):
|
||||||
|
pos2 = html.find('"', pos + 17)
|
||||||
|
fmt_map = urllib.unquote(html[pos + 17:pos2]) + ','
|
||||||
|
parts = fmt_map.split('|')
|
||||||
|
key = parts[0]
|
||||||
|
for p in parts[1:]:
|
||||||
|
idx = p.rfind(',')
|
||||||
|
value = p[:idx].replace('\\/', '/')
|
||||||
|
formats[int(key)] = value
|
||||||
|
key = p[idx + 1:]
|
||||||
|
format_list = [22, 35, 34, 18, 17]
|
||||||
|
for format in format_list[format_list.index(format):]:
|
||||||
|
if format in formats:
|
||||||
|
video_url = formats.get(format)
|
||||||
|
break
|
||||||
|
break
|
||||||
|
return video_url
|
||||||
|
|
||||||
|
|
||||||
class YoutubeBackend(BaseBackend, ICapVideo):
|
class YoutubeBackend(BaseBackend, ICapVideo):
|
||||||
NAME = 'youtube'
|
NAME = 'youtube'
|
||||||
MAINTAINER = 'Christophe Benz'
|
MAINTAINER = 'Christophe Benz'
|
||||||
|
|
@ -46,11 +89,11 @@ class YoutubeBackend(BaseBackend, ICapVideo):
|
||||||
BROWSER = YoutubeBrowser
|
BROWSER = YoutubeBrowser
|
||||||
|
|
||||||
def get_video(self, _id):
|
def get_video(self, _id):
|
||||||
with self.browser:
|
yt_service = gdata.youtube.service.YouTubeService()
|
||||||
try:
|
entry = yt_service.GetYouTubeVideoEntry(video_id=_id)
|
||||||
return self.browser.get_video(_id)
|
video = get_video(entry)
|
||||||
except ForbiddenVideo, e:
|
video.url = get_video_url(video)
|
||||||
raise ObjectNotAvailable(e)
|
return video
|
||||||
|
|
||||||
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
|
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
|
||||||
YOUTUBE_MAX_RESULTS = 50
|
YOUTUBE_MAX_RESULTS = 50
|
||||||
|
|
@ -81,29 +124,16 @@ class YoutubeBackend(BaseBackend, ICapVideo):
|
||||||
|
|
||||||
feed = yt_service.YouTubeQuery(query)
|
feed = yt_service.YouTubeQuery(query)
|
||||||
for entry in feed.entry:
|
for entry in feed.entry:
|
||||||
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
|
yield get_video(entry)
|
||||||
title=to_unicode(entry.media.title.text.strip()),
|
|
||||||
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
|
|
||||||
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
|
|
||||||
)
|
|
||||||
if entry.media.name:
|
|
||||||
video.author = to_unicode(entry.media.name.text.strip())
|
|
||||||
yield video
|
|
||||||
nb_yielded += 1
|
nb_yielded += 1
|
||||||
if nb_yielded == max_results:
|
if nb_yielded == max_results:
|
||||||
return
|
return
|
||||||
|
|
||||||
def fill_video(self, video, fields):
|
def fill_video(self, video, fields):
|
||||||
if fields != ['thumbnail']:
|
|
||||||
# if we don't want only the thumbnail, we probably want also every fields
|
|
||||||
with self.browser:
|
|
||||||
try:
|
|
||||||
video = self.browser.get_video(video.id, video)
|
|
||||||
except ForbiddenVideo, e:
|
|
||||||
raise ObjectNotAvailable(e)
|
|
||||||
if 'thumbnail' in fields:
|
if 'thumbnail' in fields:
|
||||||
with self.browser:
|
video.thumbnail.data = urllib.urlopen(video.thumbnail.url).read()
|
||||||
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
|
if 'url' in fields:
|
||||||
|
video.url = get_video_url(video)
|
||||||
return video
|
return video
|
||||||
|
|
||||||
OBJECTS = {YoutubeVideo: fill_video}
|
OBJECTS = {YoutubeVideo: fill_video}
|
||||||
|
|
|
||||||
|
|
@ -33,8 +33,3 @@ class YoutubeBrowser(BaseBrowser):
|
||||||
r'.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage,
|
r'.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage,
|
||||||
r'.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage,
|
r'.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage,
|
||||||
}
|
}
|
||||||
|
|
||||||
@id2url(YoutubeVideo.id2url)
|
|
||||||
def get_video(self, url, video=None):
|
|
||||||
self.location(url)
|
|
||||||
return self.page.get_video(video)
|
|
||||||
|
|
|
||||||
|
|
@ -44,29 +44,4 @@ class VerifyAgePage(BasePage):
|
||||||
|
|
||||||
|
|
||||||
class VideoPage(BasePage):
|
class VideoPage(BasePage):
|
||||||
VIDEO_SIGNATURE_REGEX = re.compile(r'&t=([^ ,&]*)')
|
pass
|
||||||
|
|
||||||
def get_video(self, video=None):
|
|
||||||
if video is None:
|
|
||||||
video = YoutubeVideo(self.group_dict['id'])
|
|
||||||
video.title = self.get_title()
|
|
||||||
video.url = self.get_url(video.id)
|
|
||||||
video.author = self.get_author()
|
|
||||||
return video
|
|
||||||
|
|
||||||
def get_author(self):
|
|
||||||
element = select(self.document.getroot(), 'a.watch-description-username strong', 1)
|
|
||||||
return element.text.strip()
|
|
||||||
|
|
||||||
def get_title(self):
|
|
||||||
element = select(self.document.getroot(), 'meta[name=title]', 1)
|
|
||||||
return to_unicode(element.attrib['content'].strip())
|
|
||||||
|
|
||||||
def get_url(self, _id):
|
|
||||||
video_signature = None
|
|
||||||
for data in self.document.getiterator('script'):
|
|
||||||
if not data.text:
|
|
||||||
continue
|
|
||||||
for m in re.finditer(self.VIDEO_SIGNATURE_REGEX, data.text):
|
|
||||||
video_signature = m.group(1)
|
|
||||||
return u'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=18' % (_id, video_signature)
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue