# -*- coding: utf-8 -*- # Copyright(C) 2010-2011 Romain Bignon # Copyright(C) 2012 François Revol # # This file is part of weboob. # # weboob is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # weboob is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . from weboob.tools.mech import ClientForm ControlNotFoundError = ClientForm.ControlNotFoundError from weboob.tools.browser import BasePage import re import datetime from dateutil.parser import parse as parse_dt from weboob.capabilities.base import NotAvailable from weboob.tools.browser import BrokenPageError from .video import GDCVaultVideo #import lxml.etree __all__ = ['VideoPage'] class VideoPage(BasePage): def get_video(self, video=None): if video is None: video = GDCVaultVideo(self.group_dict['id']) # the config file has it too, but in CDATA obj = self.parser.select(self.document.getroot(), 'title') if len(obj) > 0: title = obj[0].text.strip() m = re.match('GDC Vault\s+-\s+(.*)', title) if m: title = m.group(1) video.title = unicode(title) # get the config file for the rest obj = self.parser.select(self.document.getroot(), 'iframe', 1) if obj is None: return None iframe_url = obj.attrib['src'] m = re.match('(http:.*)player.html\?.*xmlURL=([^&]+)\&token=([^&]+)', iframe_url) if not m: return None config_url = m.group(1) + m.group(2) #config = self.browser.openurl(config_url).read() config = self.browser.get_document(self.browser.openurl(config_url)) obj = self.parser.select(config.getroot(), 'akamaihost', 1) host = obj.text if host is None: raise BrokenPageError('Missing tag in xml config file') videos = {} obj = self.parser.select(config.getroot(), 'speakervideo', 1) videos['speaker'] = 'rtmp://' + host + '/' + obj.text obj = self.parser.select(config.getroot(), 'slidevideo', 1) videos['slides'] = 'rtmp://' + host + '/' + obj.text #print videos obj = self.parser.select(config.getroot(), 'date', 1) video.date = parse_dt(obj.text) obj = self.parser.select(config.getroot(), 'duration', 1) m = re.match('(\d\d):(\d\d):(\d\d)', obj.text) if m: video.duration = datetime.timedelta(hours = int(m.group(1)), minutes = int(m.group(2)), seconds = int(m.group(3))) obj = self.parser.select(config.getroot(), 'speaker', 1) #print obj.text_content() #TODO: speaker as CDATA #video.author = u'European Parliament' #XXX video.url = unicode(videos['speaker']) #self.set_details(video) video.set_empty_fields(NotAvailable) return video obj = self.parser.select(self.document.getroot(), 'title') if len(obj) < 1: return None title = obj[0].text.strip() m = re.match('GDC Vault\s+-\s+(.*)', title) if m: title = m.group(1) def set_details(self, v): obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1) if obj is not None: value = obj.attrib['content'] m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value) if not m: raise BrokenPageError('Unable to parse datetime: %r' % value) day = m.group(1) month = m.group(2) year = m.group(3) hour = m.group(4) minute = m.group(5) v.date = datetime.datetime(year=int(year), month=int(month), day=int(day), hour=int(hour), minute=int(minute)) obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1) if obj is not None: span = self.parser.select(obj, 'span.ep_date', 1) value = span.text m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value) if not m: raise BrokenPageError('Unable to parse datetime: %r' % value) bhour = m.group(1) bminute = m.group(2) ehour = m.group(3) eminute = m.group(4) day = m.group(5) month = m.group(6) year = m.group(7) start = datetime.datetime(year=int(year), month=int(month), day=int(day), hour=int(bhour), minute=int(bminute)) end = datetime.datetime(year=int(year), month=int(month), day=int(day), hour=int(ehour), minute=int(eminute)) v.duration = end - start