fix youtube backend (closes #450)

Signed-off-by: Romain Bignon <romain@peerfuse.org>
2011-01-05 18:01:48 +01:00 · 2011-01-05 18:01:48 +01:00 · efec968f80
commit efec968f80
parent bc606d3640
3 changed files with 57 additions and 57 deletions
--- a/weboob/backends/youtube/backend.py
+++ b/weboob/backends/youtube/backend.py
@ -19,10 +19,8 @@
 from __future__ import with_statement

 import datetime
-try:
-    import gdata.youtube.service
-except ImportError:
-    raise ImportError("Missing dependence: python-gdata")
+import gdata.youtube.service
+import urllib

 from weboob.capabilities.video import ICapVideo
 from weboob.tools.backend import BaseBackend, ObjectNotAvailable
@ -36,6 +34,51 @@ from .video import YoutubeVideo
 __all__ = ['YoutubeBackend']


+def get_video(entry):
+    video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
+                         title=to_unicode(entry.media.title.text.strip()),
+                         duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
+                         thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
+                         )
+    video.author = entry.author[0].name.text.strip()
+    if entry.media.name:
+        video.author = to_unicode(entry.media.name.text.strip())
+    return video
+
+
+def get_video_url(video, format=18):
+    """
+    Returns the YouTube video url for download or playback.
+    In the case of a download, if the user-chosen format is not
+    available, the next available format will be used.
+    Much of the code for this method is borrowed from youtubeservice.py of Cutetube
+    http://maemo.org/packages/view/cutetube/.
+    """
+    video_url = ''
+    player_url = YoutubeVideo.id2url(video.id)
+    html = urllib.urlopen(player_url).read()
+    html = ''.join(html.split())
+    formats = {}
+    pos = html.find('","fmt_url_map":"')
+    if (pos != -1):
+        pos2 = html.find('"', pos + 17)
+        fmt_map = urllib.unquote(html[pos + 17:pos2]) + ','
+        parts = fmt_map.split('|')
+        key = parts[0]
+        for p in parts[1:]:
+            idx = p.rfind(',')
+            value = p[:idx].replace('\\/', '/')
+            formats[int(key)] = value
+            key = p[idx + 1:]
+    format_list = [22, 35, 34, 18, 17]
+    for format in format_list[format_list.index(format):]:
+        if format in formats:
+            video_url = formats.get(format)
+            break
+        break
+    return video_url
+
+
 class YoutubeBackend(BaseBackend, ICapVideo):
    NAME = 'youtube'
    MAINTAINER = 'Christophe Benz'
@ -46,11 +89,11 @@ class YoutubeBackend(BaseBackend, ICapVideo):
    BROWSER = YoutubeBrowser

    def get_video(self, _id):
-        with self.browser:
-            try:
-                return self.browser.get_video(_id)
-            except ForbiddenVideo, e:
-                raise ObjectNotAvailable(e)
+        yt_service = gdata.youtube.service.YouTubeService()
+        entry = yt_service.GetYouTubeVideoEntry(video_id=_id)
+        video = get_video(entry)
+        video.url = get_video_url(video)
+        return video

    def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
        YOUTUBE_MAX_RESULTS = 50
@ -81,29 +124,16 @@ class YoutubeBackend(BaseBackend, ICapVideo):

            feed = yt_service.YouTubeQuery(query)
            for entry in feed.entry:
-                video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
-                                     title=to_unicode(entry.media.title.text.strip()),
-                                     duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
-                                     thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
-                                     )
-                if entry.media.name:
-                    video.author = to_unicode(entry.media.name.text.strip())
-                yield video
+                yield get_video(entry)
                nb_yielded += 1
                if nb_yielded == max_results:
                    return

    def fill_video(self, video, fields):
-        if fields != ['thumbnail']:
-            # if we don't want only the thumbnail, we probably want also every fields
-            with self.browser:
-                try:
-                    video = self.browser.get_video(video.id, video)
-                except ForbiddenVideo, e:
-                    raise ObjectNotAvailable(e)
        if 'thumbnail' in fields:
-            with self.browser:
-                video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
+            video.thumbnail.data = urllib.urlopen(video.thumbnail.url).read()
+        if 'url' in fields:
+            video.url = get_video_url(video)
        return video

    OBJECTS = {YoutubeVideo: fill_video}
--- a/weboob/backends/youtube/browser.py
+++ b/weboob/backends/youtube/browser.py
@ -33,8 +33,3 @@ class YoutubeBrowser(BaseBrowser):
             r'.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage,
             r'.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage,
            }
-
-    @id2url(YoutubeVideo.id2url)
-    def get_video(self, url, video=None):
-        self.location(url)
-        return self.page.get_video(video)
--- a/weboob/backends/youtube/pages.py
+++ b/weboob/backends/youtube/pages.py
@ -44,29 +44,4 @@ class VerifyAgePage(BasePage):


 class VideoPage(BasePage):
-    VIDEO_SIGNATURE_REGEX = re.compile(r'&t=([^ ,&]*)')
-
-    def get_video(self, video=None):
-        if video is None:
-            video = YoutubeVideo(self.group_dict['id'])
-        video.title = self.get_title()
-        video.url = self.get_url(video.id)
-        video.author = self.get_author()
-        return video
-
-    def get_author(self):
-        element = select(self.document.getroot(), 'a.watch-description-username strong', 1)
-        return element.text.strip()
-
-    def get_title(self):
-        element = select(self.document.getroot(), 'meta[name=title]', 1)
-        return to_unicode(element.attrib['content'].strip())
-
-    def get_url(self, _id):
-        video_signature = None
-        for data in self.document.getiterator('script'):
-            if not data.text:
-                continue
-            for m in re.finditer(self.VIDEO_SIGNATURE_REGEX, data.text):
-                video_signature = m.group(1)
-        return u'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=18' % (_id, video_signature)
+    pass