Add a video module for gdcvault.com
For now it only fetches the speaker video, but each page can have both a speaker and slides video feed. TODO: search Signed-off-by: François Revol <revol@free.fr> Signed-off-by: Romain Bignon <romain@symlink.me>
This commit is contained in:
parent
bb9a62b566
commit
d24ca46ef6
8 changed files with 382 additions and 0 deletions
159
modules/gdcvault/pages.py
Normal file
159
modules/gdcvault/pages.py
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
# Copyright(C) 2012 François Revol
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from weboob.tools.mech import ClientForm
|
||||
ControlNotFoundError = ClientForm.ControlNotFoundError
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
|
||||
import re
|
||||
import datetime
|
||||
from dateutil.parser import parse as parse_dt
|
||||
|
||||
from weboob.capabilities.base import NotAvailable
|
||||
from weboob.tools.browser import BrokenPageError
|
||||
|
||||
from .video import GDCVaultVideo
|
||||
|
||||
#import lxml.etree
|
||||
|
||||
|
||||
|
||||
|
||||
__all__ = ['VideoPage']
|
||||
|
||||
class VideoPage(BasePage):
|
||||
def get_video(self, video=None):
|
||||
if video is None:
|
||||
video = GDCVaultVideo(self.group_dict['id'])
|
||||
|
||||
# the config file has it too, but in CDATA
|
||||
obj = self.parser.select(self.document.getroot(), 'title')
|
||||
if len(obj) > 0:
|
||||
title = obj[0].text.strip()
|
||||
m = re.match('GDC Vault\s+-\s+(.*)', title)
|
||||
if m:
|
||||
title = m.group(1)
|
||||
video.title = unicode(title)
|
||||
|
||||
# get the config file for the rest
|
||||
obj = self.parser.select(self.document.getroot(), 'iframe', 1)
|
||||
if obj is None:
|
||||
return None
|
||||
iframe_url = obj.attrib['src']
|
||||
m = re.match('(http:.*)player.html\?.*xmlURL=([^&]+)\&token=([^&]+)', iframe_url)
|
||||
if not m:
|
||||
return None
|
||||
config_url = m.group(1) + m.group(2)
|
||||
|
||||
#config = self.browser.openurl(config_url).read()
|
||||
config = self.browser.get_document(self.browser.openurl(config_url))
|
||||
|
||||
obj = self.parser.select(config.getroot(), 'akamaihost', 1)
|
||||
host = obj.text
|
||||
if host is None:
|
||||
raise BrokenPageError('Missing tag in xml config file')
|
||||
|
||||
videos = {}
|
||||
|
||||
obj = self.parser.select(config.getroot(), 'speakervideo', 1)
|
||||
videos['speaker'] = 'rtmp://' + host + '/' + obj.text
|
||||
|
||||
obj = self.parser.select(config.getroot(), 'slidevideo', 1)
|
||||
videos['slides'] = 'rtmp://' + host + '/' + obj.text
|
||||
|
||||
#print videos
|
||||
|
||||
obj = self.parser.select(config.getroot(), 'date', 1)
|
||||
video.date = parse_dt(obj.text)
|
||||
|
||||
obj = self.parser.select(config.getroot(), 'duration', 1)
|
||||
m = re.match('(\d\d):(\d\d):(\d\d)', obj.text)
|
||||
if m:
|
||||
video.duration = datetime.timedelta(hours = int(m.group(1)),
|
||||
minutes = int(m.group(2)),
|
||||
seconds = int(m.group(3)))
|
||||
|
||||
obj = self.parser.select(config.getroot(), 'speaker', 1)
|
||||
#print obj.text_content()
|
||||
|
||||
#TODO: speaker as CDATA
|
||||
#video.author = u'European Parliament'
|
||||
|
||||
#XXX
|
||||
video.url = unicode(videos['speaker'])
|
||||
#self.set_details(video)
|
||||
|
||||
video.set_empty_fields(NotAvailable)
|
||||
return video
|
||||
|
||||
obj = self.parser.select(self.document.getroot(), 'title')
|
||||
if len(obj) < 1:
|
||||
return None
|
||||
title = obj[0].text.strip()
|
||||
m = re.match('GDC Vault\s+-\s+(.*)', title)
|
||||
if m:
|
||||
title = m.group(1)
|
||||
|
||||
def set_details(self, v):
|
||||
obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1)
|
||||
if obj is not None:
|
||||
value = obj.attrib['content']
|
||||
m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value)
|
||||
if not m:
|
||||
raise BrokenPageError('Unable to parse datetime: %r' % value)
|
||||
day = m.group(1)
|
||||
month = m.group(2)
|
||||
year = m.group(3)
|
||||
hour = m.group(4)
|
||||
minute = m.group(5)
|
||||
v.date = datetime.datetime(year=int(year),
|
||||
month=int(month),
|
||||
day=int(day),
|
||||
hour=int(hour),
|
||||
minute=int(minute))
|
||||
|
||||
obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1)
|
||||
if obj is not None:
|
||||
span = self.parser.select(obj, 'span.ep_date', 1)
|
||||
value = span.text
|
||||
m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value)
|
||||
if not m:
|
||||
raise BrokenPageError('Unable to parse datetime: %r' % value)
|
||||
bhour = m.group(1)
|
||||
bminute = m.group(2)
|
||||
ehour = m.group(3)
|
||||
eminute = m.group(4)
|
||||
day = m.group(5)
|
||||
month = m.group(6)
|
||||
year = m.group(7)
|
||||
|
||||
start = datetime.datetime(year=int(year),
|
||||
month=int(month),
|
||||
day=int(day),
|
||||
hour=int(bhour),
|
||||
minute=int(bminute))
|
||||
end = datetime.datetime(year=int(year),
|
||||
month=int(month),
|
||||
day=int(day),
|
||||
hour=int(ehour),
|
||||
minute=int(eminute))
|
||||
|
||||
v.duration = end - start
|
||||
Loading…
Add table
Add a link
Reference in a new issue