# -*- coding: utf-8 -*- # Copyright(C) 2010-2011 Romain Bignon # Copyright(C) 2012 François Revol # # This file is part of weboob. # # weboob is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # weboob is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . from weboob.tools.mech import ClientForm ControlNotFoundError = ClientForm.ControlNotFoundError from weboob.tools.browser import BasePage import re import datetime from weboob.capabilities.base import NotAvailable from weboob.tools.browser import BrokenPageError from .video import EuroparlVideo __all__ = ['VideoPage'] class VideoPage(BasePage): def get_video(self, video=None): if video is None: video = EuroparlVideo(self.group_dict['id']) video.title = unicode(self.get_title()) video.url = unicode(self.get_url()) self.set_details(video) video.set_empty_fields(NotAvailable) return video def get_url(self): # search for # TODO: plenaries can be downloaded as mp4... obj = self.parser.select(self.document.getroot(), 'input#codeUrl', 1) if obj is None: return None return obj.attrib['value'] def get_title(self): obj = self.parser.select(self.document.getroot(), 'h1#player_subjectTitle') if len(obj) < 1: obj = self.parser.select(self.document.getroot(), 'title') if len(obj) < 1: return None title = obj[0].text.strip() obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle') if len(obj) < 1: return title for span in self.parser.select(obj[0], 'span.ep_acronym, span.ep_theme'): if span.text_content(): title += ' ' + span.text_content().strip() return title def set_details(self, v): v.author = u'European Parliament' obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1) if obj is not None: value = obj.attrib['content'] print value m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value) if not m: raise BrokenPageError('Unable to parse datetime: %r' % value) day = m.group(1) month = m.group(2) year = m.group(3) hour = m.group(4) minute = m.group(5) v.date = datetime.datetime(year=int(year), month=int(month), day=int(day), hour=int(hour), minute=int(minute)) obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1) if obj is not None: span = self.parser.select(obj, 'span.ep_date', 1) value = span.text m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value) if not m: raise BrokenPageError('Unable to parse datetime: %r' % value) bhour = m.group(1) bminute = m.group(2) ehour = m.group(3) eminute = m.group(4) day = m.group(5) month = m.group(6) year = m.group(7) start = datetime.datetime(year=int(year), month=int(month), day=int(day), hour=int(bhour), minute=int(bminute)) end = datetime.datetime(year=int(year), month=int(month), day=int(day), hour=int(ehour), minute=int(eminute)) v.duration = end - start