diff --git a/modules/gdcvault/__init__.py b/modules/gdcvault/__init__.py new file mode 100644 index 00000000..c6833aff --- /dev/null +++ b/modules/gdcvault/__init__.py @@ -0,0 +1,3 @@ +from .backend import GDCVaultBackend + +__all__ = ['GDCVaultBackend'] diff --git a/modules/gdcvault/backend.py b/modules/gdcvault/backend.py new file mode 100644 index 00000000..f031d0e3 --- /dev/null +++ b/modules/gdcvault/backend.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from __future__ import with_statement + +from weboob.capabilities.video import ICapVideo, BaseVideo +from weboob.tools.backend import BaseBackend +from weboob.capabilities.collection import ICapCollection, CollectionNotFound + +from .browser import GDCVaultBrowser +from .video import GDCVaultVideo + + +__all__ = ['GDCVaultBackend'] + + +class GDCVaultBackend(BaseBackend, ICapVideo, ICapCollection): + NAME = 'gdcvault' + MAINTAINER = u'François Revol' + EMAIL = 'revol@free.fr' + VERSION = '0.d' + DESCRIPTION = 'Game Developers Conferences Vault video streaming website' + LICENSE = 'AGPLv3+' + BROWSER = GDCVaultBrowser + + def get_video(self, _id): + with self.browser: + return self.browser.get_video(_id) + + SORTBY = ['relevance', 'rating', 'views', 'time'] + + # def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): + # with self.browser: + # return self.browser.search_videos(pattern, self.SORTBY[sortby]) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + with self.browser: + video = self.browser.get_video(GDCVaultVideo.id2url(video.id), video) + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + def iter_resources(self, objs, split_path): + if BaseVideo in objs: + collection = self.get_collection(objs, split_path) + if collection.path_level == 0: + yield self.get_collection(objs, [u'latest']) + if collection.split_path == [u'latest']: + for video in self.browser.latest_videos(): + yield video + + def validate_collection(self, objs, collection): + if collection.path_level == 0: + return + if BaseVideo in objs and collection.split_path == [u'latest']: + collection.title = u'Latest GDCVault videos' + return + raise CollectionNotFound(collection.split_path) + + OBJECTS = {GDCVaultVideo: fill_video} diff --git a/modules/gdcvault/browser.py b/modules/gdcvault/browser.py new file mode 100644 index 00000000..a4f5d971 --- /dev/null +++ b/modules/gdcvault/browser.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser import BaseBrowser +from weboob.tools.browser.decorators import id2url + +#from .pages.index import IndexPage +from .pages import VideoPage +from .video import GDCVaultVideo + + +__all__ = ['GDCVaultBrowser'] + + +class GDCVaultBrowser(BaseBrowser): + DOMAIN = 'gdcvault.com' + ENCODING = None + PAGES = {r'http://[w\.]*gdcvault.com/play/(?P[\d]+)/?.*': VideoPage, + } + + @id2url(GDCVaultVideo.id2url) + def get_video(self, url, video=None): + self.location(url) + return self.page.get_video(video) + + # def search_videos(self, pattern, sortby): + # return None + # self.location(self.buildurl('http://gdcvault.com/en/search%s' % sortby, query=pattern.encode('utf-8'))) + # assert self.is_on_page(IndexPage) + # return self.page.iter_videos() + + # def latest_videos(self): + # self.home() + # assert self.is_on_page(IndexPage) + # return self.page.iter_videos() diff --git a/modules/gdcvault/favicon.png b/modules/gdcvault/favicon.png new file mode 100644 index 00000000..70ef33e2 Binary files /dev/null and b/modules/gdcvault/favicon.png differ diff --git a/modules/gdcvault/favicon.xcf b/modules/gdcvault/favicon.xcf new file mode 100644 index 00000000..32695aaa Binary files /dev/null and b/modules/gdcvault/favicon.xcf differ diff --git a/modules/gdcvault/pages.py b/modules/gdcvault/pages.py new file mode 100644 index 00000000..73ac80a7 --- /dev/null +++ b/modules/gdcvault/pages.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.mech import ClientForm +ControlNotFoundError = ClientForm.ControlNotFoundError + +from weboob.tools.browser import BasePage + +import re +import datetime +from dateutil.parser import parse as parse_dt + +from weboob.capabilities.base import NotAvailable +from weboob.tools.browser import BrokenPageError + +from .video import GDCVaultVideo + +#import lxml.etree + + + + +__all__ = ['VideoPage'] + +class VideoPage(BasePage): + def get_video(self, video=None): + if video is None: + video = GDCVaultVideo(self.group_dict['id']) + + # the config file has it too, but in CDATA + obj = self.parser.select(self.document.getroot(), 'title') + if len(obj) > 0: + title = obj[0].text.strip() + m = re.match('GDC Vault\s+-\s+(.*)', title) + if m: + title = m.group(1) + video.title = unicode(title) + + # get the config file for the rest + obj = self.parser.select(self.document.getroot(), 'iframe', 1) + if obj is None: + return None + iframe_url = obj.attrib['src'] + m = re.match('(http:.*)player.html\?.*xmlURL=([^&]+)\&token=([^&]+)', iframe_url) + if not m: + return None + config_url = m.group(1) + m.group(2) + + #config = self.browser.openurl(config_url).read() + config = self.browser.get_document(self.browser.openurl(config_url)) + + obj = self.parser.select(config.getroot(), 'akamaihost', 1) + host = obj.text + if host is None: + raise BrokenPageError('Missing tag in xml config file') + + videos = {} + + obj = self.parser.select(config.getroot(), 'speakervideo', 1) + videos['speaker'] = 'rtmp://' + host + '/' + obj.text + + obj = self.parser.select(config.getroot(), 'slidevideo', 1) + videos['slides'] = 'rtmp://' + host + '/' + obj.text + + #print videos + + obj = self.parser.select(config.getroot(), 'date', 1) + video.date = parse_dt(obj.text) + + obj = self.parser.select(config.getroot(), 'duration', 1) + m = re.match('(\d\d):(\d\d):(\d\d)', obj.text) + if m: + video.duration = datetime.timedelta(hours = int(m.group(1)), + minutes = int(m.group(2)), + seconds = int(m.group(3))) + + obj = self.parser.select(config.getroot(), 'speaker', 1) + #print obj.text_content() + + #TODO: speaker as CDATA + #video.author = u'European Parliament' + + #XXX + video.url = unicode(videos['speaker']) + #self.set_details(video) + + video.set_empty_fields(NotAvailable) + return video + + obj = self.parser.select(self.document.getroot(), 'title') + if len(obj) < 1: + return None + title = obj[0].text.strip() + m = re.match('GDC Vault\s+-\s+(.*)', title) + if m: + title = m.group(1) + + def set_details(self, v): + obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1) + if obj is not None: + value = obj.attrib['content'] + m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value) + if not m: + raise BrokenPageError('Unable to parse datetime: %r' % value) + day = m.group(1) + month = m.group(2) + year = m.group(3) + hour = m.group(4) + minute = m.group(5) + v.date = datetime.datetime(year=int(year), + month=int(month), + day=int(day), + hour=int(hour), + minute=int(minute)) + + obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1) + if obj is not None: + span = self.parser.select(obj, 'span.ep_date', 1) + value = span.text + m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value) + if not m: + raise BrokenPageError('Unable to parse datetime: %r' % value) + bhour = m.group(1) + bminute = m.group(2) + ehour = m.group(3) + eminute = m.group(4) + day = m.group(5) + month = m.group(6) + year = m.group(7) + + start = datetime.datetime(year=int(year), + month=int(month), + day=int(day), + hour=int(bhour), + minute=int(bminute)) + end = datetime.datetime(year=int(year), + month=int(month), + day=int(day), + hour=int(ehour), + minute=int(eminute)) + + v.duration = end - start diff --git a/modules/gdcvault/test.py b/modules/gdcvault/test.py new file mode 100644 index 00000000..54290448 --- /dev/null +++ b/modules/gdcvault/test.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +#from weboob.capabilities.video import BaseVideo + + +class GDCVaultTest(BackendTest): + BACKEND = 'gdcvault' + + # def test_search(self): + # l = list(self.backend.search_videos('linux')) + # self.assertTrue(len(l) > 0) + # v = l[0] + # self.backend.fillobj(v, ('url',)) + # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + # self.backend.browser.openurl(v.url) + + # def test_latest(self): + # l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) + # self.assertTrue(len(l) > 0) + # v = l[0] + # self.backend.fillobj(v, ('url',)) + # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) diff --git a/modules/gdcvault/video.py b/modules/gdcvault/video.py new file mode 100644 index 00000000..a65fd471 --- /dev/null +++ b/modules/gdcvault/video.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Roger Philibert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import BaseVideo + +import re + +__all__ = ['GDCVaultVideo'] + + +class GDCVaultVideo(BaseVideo): + def __init__(self, *args, **kwargs): + BaseVideo.__init__(self, *args, **kwargs) + self.ext = u'flv' + + @classmethod + def id2url(cls, _id): + # attempt to enlarge the id namespace to differentiate + # videos from the same page + m = re.match('\d+#speaker', _id) + if m: + return u'http://www.gdcvault.com/play/%s#speaker' % _id + m = re.match('\d+#slides', _id) + if m: + return u'http://www.gdcvault.com/play/%s#slides' % _id + return u'http://www.gdcvault.com/play/%s' % _id +