Add a video module for gdcvault.com

For now it only fetches the speaker video, but each page can have both a speaker and slides video feed. TODO: search Signed-off-by: François Revol <revol@free.fr> Signed-off-by: Romain Bignon <romain@symlink.me>
2012-08-31 19:23:29 +02:00 · 2012-08-31 19:23:29 +02:00 · d24ca46ef6
commit d24ca46ef6
parent bb9a62b566
8 changed files with 382 additions and 0 deletions
--- a/modules/gdcvault/init.py
+++ b/modules/gdcvault/init.py
@ -0,0 +1,3 @@
+from .backend import GDCVaultBackend
+
+__all__ = ['GDCVaultBackend']
--- a/modules/gdcvault/backend.py
+++ b/modules/gdcvault/backend.py
@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from __future__ import with_statement
+
+from weboob.capabilities.video import ICapVideo, BaseVideo
+from weboob.tools.backend import BaseBackend
+from weboob.capabilities.collection import ICapCollection, CollectionNotFound
+
+from .browser import GDCVaultBrowser
+from .video import GDCVaultVideo
+
+
+__all__ = ['GDCVaultBackend']
+
+
+class GDCVaultBackend(BaseBackend, ICapVideo, ICapCollection):
+    NAME = 'gdcvault'
+    MAINTAINER = u'François Revol'
+    EMAIL = 'revol@free.fr'
+    VERSION = '0.d'
+    DESCRIPTION = 'Game Developers Conferences Vault video streaming website'
+    LICENSE = 'AGPLv3+'
+    BROWSER = GDCVaultBrowser
+
+    def get_video(self, _id):
+        with self.browser:
+            return self.browser.get_video(_id)
+
+    SORTBY = ['relevance', 'rating', 'views', 'time']
+
+    # def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
+    #     with self.browser:
+    #         return self.browser.search_videos(pattern, self.SORTBY[sortby])
+
+    def fill_video(self, video, fields):
+        if fields != ['thumbnail']:
+            # if we don't want only the thumbnail, we probably want also every fields
+            with self.browser:
+                video = self.browser.get_video(GDCVaultVideo.id2url(video.id), video)
+        if 'thumbnail' in fields and video.thumbnail:
+            with self.browser:
+                video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
+
+        return video
+
+    def iter_resources(self, objs, split_path):
+        if BaseVideo in objs:
+            collection = self.get_collection(objs, split_path)
+            if collection.path_level == 0:
+                yield self.get_collection(objs, [u'latest'])
+            if collection.split_path == [u'latest']:
+                for video in self.browser.latest_videos():
+                    yield video
+
+    def validate_collection(self, objs, collection):
+        if collection.path_level == 0:
+            return
+        if BaseVideo in objs and collection.split_path == [u'latest']:
+            collection.title = u'Latest GDCVault videos'
+            return
+        raise CollectionNotFound(collection.split_path)
+
+    OBJECTS = {GDCVaultVideo: fill_video}
--- a/modules/gdcvault/browser.py
+++ b/modules/gdcvault/browser.py
@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.tools.browser import BaseBrowser
+from weboob.tools.browser.decorators import id2url
+
+#from .pages.index import IndexPage
+from .pages import VideoPage
+from .video import GDCVaultVideo
+
+
+__all__ = ['GDCVaultBrowser']
+
+
+class GDCVaultBrowser(BaseBrowser):
+    DOMAIN = 'gdcvault.com'
+    ENCODING = None
+    PAGES = {r'http://[w\.]*gdcvault.com/play/(?P<id>[\d]+)/?.*': VideoPage,
+            }
+
+    @id2url(GDCVaultVideo.id2url)
+    def get_video(self, url, video=None):
+        self.location(url)
+        return self.page.get_video(video)
+
+    # def search_videos(self, pattern, sortby):
+    #     return None
+    #     self.location(self.buildurl('http://gdcvault.com/en/search%s' % sortby, query=pattern.encode('utf-8')))
+    #     assert self.is_on_page(IndexPage)
+    #     return self.page.iter_videos()
+
+    # def latest_videos(self):
+    #     self.home()
+    #     assert self.is_on_page(IndexPage)
+    #     return self.page.iter_videos()
--- a/modules/gdcvault/favicon.png
+++ b/modules/gdcvault/favicon.png
--- a/modules/gdcvault/favicon.xcf
+++ b/modules/gdcvault/favicon.xcf
--- a/modules/gdcvault/pages.py
+++ b/modules/gdcvault/pages.py
@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.tools.mech import ClientForm
+ControlNotFoundError = ClientForm.ControlNotFoundError
+
+from weboob.tools.browser import BasePage
+
+import re
+import datetime
+from dateutil.parser import parse as parse_dt
+
+from weboob.capabilities.base import NotAvailable
+from weboob.tools.browser import BrokenPageError
+
+from .video import GDCVaultVideo
+
+#import lxml.etree
+
+
+
+
+__all__ = ['VideoPage']
+
+class VideoPage(BasePage):
+    def get_video(self, video=None):
+        if video is None:
+            video = GDCVaultVideo(self.group_dict['id'])
+
+        # the config file has it too, but in CDATA
+        obj = self.parser.select(self.document.getroot(), 'title')
+        if len(obj) > 0:
+            title = obj[0].text.strip()
+            m = re.match('GDC Vault\s+-\s+(.*)', title)
+            if m:
+                title = m.group(1)
+        video.title = unicode(title)
+
+        # get the config file for the rest
+        obj = self.parser.select(self.document.getroot(), 'iframe', 1)
+        if obj is None:
+            return None
+        iframe_url = obj.attrib['src']
+        m = re.match('(http:.*)player.html\?.*xmlURL=([^&]+)\&token=([^&]+)', iframe_url)
+        if not m:
+            return None
+        config_url = m.group(1) + m.group(2)
+
+        #config = self.browser.openurl(config_url).read()
+        config = self.browser.get_document(self.browser.openurl(config_url))
+
+        obj = self.parser.select(config.getroot(), 'akamaihost', 1)
+        host = obj.text
+        if host is None:
+            raise BrokenPageError('Missing tag in xml config file')
+
+        videos = {}
+
+        obj = self.parser.select(config.getroot(), 'speakervideo', 1)
+        videos['speaker'] = 'rtmp://' + host + '/' + obj.text
+
+        obj = self.parser.select(config.getroot(), 'slidevideo', 1)
+        videos['slides'] = 'rtmp://' + host + '/' + obj.text
+
+        #print videos
+
+        obj = self.parser.select(config.getroot(), 'date', 1)
+        video.date = parse_dt(obj.text)
+
+        obj = self.parser.select(config.getroot(), 'duration', 1)
+        m = re.match('(\d\d):(\d\d):(\d\d)', obj.text)
+        if m:
+            video.duration = datetime.timedelta(hours = int(m.group(1)),
+                                                minutes = int(m.group(2)),
+                                                seconds = int(m.group(3)))
+
+        obj = self.parser.select(config.getroot(), 'speaker', 1)
+        #print obj.text_content()
+
+        #TODO: speaker as CDATA
+        #video.author = u'European Parliament'
+
+        #XXX
+        video.url = unicode(videos['speaker'])
+        #self.set_details(video)
+
+        video.set_empty_fields(NotAvailable)
+        return video
+
+        obj = self.parser.select(self.document.getroot(), 'title')
+        if len(obj) < 1:
+            return None
+        title = obj[0].text.strip()
+        m = re.match('GDC Vault\s+-\s+(.*)', title)
+        if m:
+            title = m.group(1)
+
+    def set_details(self, v):
+        obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1)
+        if obj is not None:
+            value = obj.attrib['content']
+            m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value)
+            if not m:
+                raise BrokenPageError('Unable to parse datetime: %r' % value)
+            day = m.group(1)
+            month = m.group(2)
+            year = m.group(3)
+            hour = m.group(4)
+            minute = m.group(5)
+            v.date = datetime.datetime(year=int(year),
+                                       month=int(month),
+                                       day=int(day),
+                                       hour=int(hour),
+                                       minute=int(minute))
+            
+        obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1)
+        if obj is not None:
+            span = self.parser.select(obj, 'span.ep_date', 1)
+            value = span.text
+            m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value)
+            if not m:
+                raise BrokenPageError('Unable to parse datetime: %r' % value)
+            bhour = m.group(1)
+            bminute = m.group(2)
+            ehour = m.group(3)
+            eminute = m.group(4)
+            day = m.group(5)
+            month = m.group(6)
+            year = m.group(7)
+            
+            start = datetime.datetime(year=int(year),
+                                      month=int(month),
+                                      day=int(day),
+                                      hour=int(bhour),
+                                      minute=int(bminute))
+            end = datetime.datetime(year=int(year),
+                                    month=int(month),
+                                    day=int(day),
+                                    hour=int(ehour),
+                                    minute=int(eminute))
+
+            v.duration = end - start
--- a/modules/gdcvault/test.py
+++ b/modules/gdcvault/test.py
@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.tools.test import BackendTest
+#from weboob.capabilities.video import BaseVideo
+
+
+class GDCVaultTest(BackendTest):
+    BACKEND = 'gdcvault'
+
+    # def test_search(self):
+    #     l = list(self.backend.search_videos('linux'))
+    #     self.assertTrue(len(l) > 0)
+    #     v = l[0]
+    #     self.backend.fillobj(v, ('url',))
+    #     self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+    #     self.backend.browser.openurl(v.url)
+
+    # def test_latest(self):
+    #     l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
+    #     self.assertTrue(len(l) > 0)
+    #     v = l[0]
+    #     self.backend.fillobj(v, ('url',))
+    #     self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
--- a/modules/gdcvault/video.py
+++ b/modules/gdcvault/video.py
@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Roger Philibert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.capabilities.video import BaseVideo
+
+import re
+
+__all__ = ['GDCVaultVideo']
+
+
+class GDCVaultVideo(BaseVideo):
+    def __init__(self, *args, **kwargs):
+        BaseVideo.__init__(self, *args, **kwargs)
+        self.ext = u'flv'
+
+    @classmethod
+    def id2url(cls, _id):
+        # attempt to enlarge the id namespace to differentiate
+        # videos from the same page
+        m = re.match('\d+#speaker', _id)
+        if m:
+            return u'http://www.gdcvault.com/play/%s#speaker' % _id
+        m = re.match('\d+#slides', _id)
+        if m:
+            return u'http://www.gdcvault.com/play/%s#slides' % _id
+        return u'http://www.gdcvault.com/play/%s' % _id
+