Add module for Europarl videos
We currently support committees and other events. TODO: support plenary sessions TODO: latest and search Signed-off-by: François Revol <revol@free.fr> Signed-off-by: Romain Bignon <romain@symlink.me>
This commit is contained in:
parent
e04bd9c8eb
commit
a5a5489872
8 changed files with 354 additions and 0 deletions
3
modules/europarl/__init__.py
Normal file
3
modules/europarl/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
from .backend import EuroparlBackend
|
||||||
|
|
||||||
|
__all__ = ['EuroparlBackend']
|
||||||
82
modules/europarl/backend.py
Normal file
82
modules/europarl/backend.py
Normal file
|
|
@ -0,0 +1,82 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2010-2011 Romain Bignon
|
||||||
|
# Copyright(C) 2012 François Revol
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
from weboob.capabilities.video import ICapVideo, BaseVideo
|
||||||
|
from weboob.tools.backend import BaseBackend
|
||||||
|
from weboob.capabilities.collection import ICapCollection, CollectionNotFound
|
||||||
|
|
||||||
|
from .browser import EuroparlBrowser
|
||||||
|
from .video import EuroparlVideo
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['EuroparlBackend']
|
||||||
|
|
||||||
|
|
||||||
|
class EuroparlBackend(BaseBackend, ICapVideo, ICapCollection):
|
||||||
|
NAME = 'europarl'
|
||||||
|
MAINTAINER = u'François Revol'
|
||||||
|
EMAIL = 'revol@free.fr'
|
||||||
|
VERSION = '0.d'
|
||||||
|
DESCRIPTION = 'Europarl parliamentary video streaming website'
|
||||||
|
LICENSE = 'AGPLv3+'
|
||||||
|
BROWSER = EuroparlBrowser
|
||||||
|
|
||||||
|
def get_video(self, _id):
|
||||||
|
with self.browser:
|
||||||
|
return self.browser.get_video(_id)
|
||||||
|
|
||||||
|
SORTBY = ['relevance', 'rating', 'views', 'time']
|
||||||
|
|
||||||
|
# def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
|
||||||
|
# with self.browser:
|
||||||
|
# return self.browser.search_videos(pattern, self.SORTBY[sortby])
|
||||||
|
|
||||||
|
def fill_video(self, video, fields):
|
||||||
|
if fields != ['thumbnail']:
|
||||||
|
# if we don't want only the thumbnail, we probably want also every fields
|
||||||
|
with self.browser:
|
||||||
|
video = self.browser.get_video(EuroparlVideo.id2url(video.id), video)
|
||||||
|
if 'thumbnail' in fields and video.thumbnail:
|
||||||
|
with self.browser:
|
||||||
|
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
|
||||||
|
|
||||||
|
return video
|
||||||
|
|
||||||
|
def iter_resources(self, objs, split_path):
|
||||||
|
if BaseVideo in objs:
|
||||||
|
collection = self.get_collection(objs, split_path)
|
||||||
|
if collection.path_level == 0:
|
||||||
|
yield self.get_collection(objs, [u'latest'])
|
||||||
|
if collection.split_path == [u'latest']:
|
||||||
|
for video in self.browser.latest_videos():
|
||||||
|
yield video
|
||||||
|
|
||||||
|
def validate_collection(self, objs, collection):
|
||||||
|
if collection.path_level == 0:
|
||||||
|
return
|
||||||
|
if BaseVideo in objs and collection.split_path == [u'latest']:
|
||||||
|
collection.title = u'Latest Europarl videos'
|
||||||
|
return
|
||||||
|
raise CollectionNotFound(collection.split_path)
|
||||||
|
|
||||||
|
OBJECTS = {EuroparlVideo: fill_video}
|
||||||
57
modules/europarl/browser.py
Normal file
57
modules/europarl/browser.py
Normal file
|
|
@ -0,0 +1,57 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2010-2011 Romain Bignon
|
||||||
|
# Copyright(C) 2012 François Revol
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from weboob.tools.browser import BaseBrowser
|
||||||
|
from weboob.tools.browser.decorators import id2url
|
||||||
|
|
||||||
|
#from .pages.index import IndexPage
|
||||||
|
from .pages import VideoPage
|
||||||
|
from .video import EuroparlVideo
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['EuroparlBrowser']
|
||||||
|
|
||||||
|
|
||||||
|
class EuroparlBrowser(BaseBrowser):
|
||||||
|
DOMAIN = 'europarl.europa.eu'
|
||||||
|
ENCODING = None
|
||||||
|
PAGES = {r'http://[w\.]*europarl\.europa\.eu/ep-live/(?P<lang>\w+)/committees/video\?.*event=(?P<id>[^&]+).*': VideoPage,
|
||||||
|
r'http://[w\.]*europarl\.europa\.eu/ep-live/(?P<lang>\w+)/other-events/video\?.*event=(?P<id>[^&]+).*': VideoPage
|
||||||
|
#TODO:plenaries
|
||||||
|
# r'http://[w\.]*europarl\.europa\.eu/ep-live/(?P<lang>\w+)/plenary/video\?.*date=(?P<id>[^&]+).*': VideoPage
|
||||||
|
# r'http://[w\.]*europarl\.europa\.eu/ep-live/(?P<lang>\w+)/plenary/video\?.*debate=(?P<id>[^&]+).*': VideoPage
|
||||||
|
}
|
||||||
|
|
||||||
|
@id2url(EuroparlVideo.id2url)
|
||||||
|
def get_video(self, url, video=None):
|
||||||
|
self.location(url)
|
||||||
|
return self.page.get_video(video)
|
||||||
|
|
||||||
|
# def search_videos(self, pattern, sortby):
|
||||||
|
# return None
|
||||||
|
# self.location(self.buildurl('http://europarltv.europa.eu/en/search%s' % sortby, query=pattern.encode('utf-8')))
|
||||||
|
# assert self.is_on_page(IndexPage)
|
||||||
|
# return self.page.iter_videos()
|
||||||
|
|
||||||
|
# def latest_videos(self):
|
||||||
|
# self.home()
|
||||||
|
# assert self.is_on_page(IndexPage)
|
||||||
|
# return self.page.iter_videos()
|
||||||
BIN
modules/europarl/favicon.png
Normal file
BIN
modules/europarl/favicon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 526 B |
BIN
modules/europarl/favicon_europarl.xcf
Normal file
BIN
modules/europarl/favicon_europarl.xcf
Normal file
Binary file not shown.
120
modules/europarl/pages.py
Normal file
120
modules/europarl/pages.py
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2010-2011 Romain Bignon
|
||||||
|
# Copyright(C) 2012 François Revol
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from weboob.tools.mech import ClientForm
|
||||||
|
ControlNotFoundError = ClientForm.ControlNotFoundError
|
||||||
|
|
||||||
|
from weboob.tools.browser import BasePage
|
||||||
|
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
from weboob.capabilities.base import NotAvailable
|
||||||
|
from weboob.tools.browser import BrokenPageError
|
||||||
|
|
||||||
|
from .video import EuroparlVideo
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['VideoPage']
|
||||||
|
|
||||||
|
class VideoPage(BasePage):
|
||||||
|
def get_video(self, video=None):
|
||||||
|
if video is None:
|
||||||
|
video = EuroparlVideo(self.group_dict['id'])
|
||||||
|
video.title = unicode(self.get_title())
|
||||||
|
video.url = unicode(self.get_url())
|
||||||
|
self.set_details(video)
|
||||||
|
|
||||||
|
video.set_empty_fields(NotAvailable)
|
||||||
|
return video
|
||||||
|
|
||||||
|
def get_url(self):
|
||||||
|
# search for <input id="codeUrl">
|
||||||
|
# TODO: plenaries can be downloaded as mp4...
|
||||||
|
obj = self.parser.select(self.document.getroot(), 'input#codeUrl', 1)
|
||||||
|
if obj is None:
|
||||||
|
return None
|
||||||
|
return obj.attrib['value']
|
||||||
|
|
||||||
|
def get_title(self):
|
||||||
|
obj = self.parser.select(self.document.getroot(), 'h1#player_subjectTitle')
|
||||||
|
if len(obj) < 1:
|
||||||
|
obj = self.parser.select(self.document.getroot(), 'title')
|
||||||
|
if len(obj) < 1:
|
||||||
|
return None
|
||||||
|
title = obj[0].text.strip()
|
||||||
|
obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle')
|
||||||
|
if len(obj) < 1:
|
||||||
|
return title
|
||||||
|
|
||||||
|
for span in self.parser.select(obj[0], 'span.ep_acronym, span.ep_theme'):
|
||||||
|
if span.text_content():
|
||||||
|
title += ' ' + span.text_content().strip()
|
||||||
|
|
||||||
|
return title
|
||||||
|
|
||||||
|
def set_details(self, v):
|
||||||
|
v.author = u'European Parliament'
|
||||||
|
obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1)
|
||||||
|
if obj is not None:
|
||||||
|
value = obj.attrib['content']
|
||||||
|
print value
|
||||||
|
m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value)
|
||||||
|
if not m:
|
||||||
|
raise BrokenPageError('Unable to parse datetime: %r' % value)
|
||||||
|
day = m.group(1)
|
||||||
|
month = m.group(2)
|
||||||
|
year = m.group(3)
|
||||||
|
hour = m.group(4)
|
||||||
|
minute = m.group(5)
|
||||||
|
v.date = datetime.datetime(year=int(year),
|
||||||
|
month=int(month),
|
||||||
|
day=int(day),
|
||||||
|
hour=int(hour),
|
||||||
|
minute=int(minute))
|
||||||
|
|
||||||
|
obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1)
|
||||||
|
if obj is not None:
|
||||||
|
span = self.parser.select(obj, 'span.ep_date', 1)
|
||||||
|
value = span.text
|
||||||
|
m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value)
|
||||||
|
if not m:
|
||||||
|
raise BrokenPageError('Unable to parse datetime: %r' % value)
|
||||||
|
bhour = m.group(1)
|
||||||
|
bminute = m.group(2)
|
||||||
|
ehour = m.group(3)
|
||||||
|
eminute = m.group(4)
|
||||||
|
day = m.group(5)
|
||||||
|
month = m.group(6)
|
||||||
|
year = m.group(7)
|
||||||
|
|
||||||
|
start = datetime.datetime(year=int(year),
|
||||||
|
month=int(month),
|
||||||
|
day=int(day),
|
||||||
|
hour=int(bhour),
|
||||||
|
minute=int(bminute))
|
||||||
|
end = datetime.datetime(year=int(year),
|
||||||
|
month=int(month),
|
||||||
|
day=int(day),
|
||||||
|
hour=int(ehour),
|
||||||
|
minute=int(eminute))
|
||||||
|
|
||||||
|
v.duration = end - start
|
||||||
42
modules/europarl/test.py
Normal file
42
modules/europarl/test.py
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2010-2011 Romain Bignon
|
||||||
|
# Copyright(C) 2012 François Revol
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from weboob.tools.test import BackendTest
|
||||||
|
#from weboob.capabilities.video import BaseVideo
|
||||||
|
|
||||||
|
|
||||||
|
class EuroparlTest(BackendTest):
|
||||||
|
BACKEND = 'europarl'
|
||||||
|
|
||||||
|
# def test_search(self):
|
||||||
|
# l = list(self.backend.search_videos('neelie kroes'))
|
||||||
|
# self.assertTrue(len(l) > 0)
|
||||||
|
# v = l[0]
|
||||||
|
# self.backend.fillobj(v, ('url',))
|
||||||
|
# self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
|
||||||
|
# self.backend.browser.openurl(v.url)
|
||||||
|
|
||||||
|
# def test_latest(self):
|
||||||
|
# l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
|
||||||
|
# self.assertTrue(len(l) > 0)
|
||||||
|
# v = l[0]
|
||||||
|
# self.backend.fillobj(v, ('url',))
|
||||||
|
# self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
|
||||||
50
modules/europarl/video.py
Normal file
50
modules/europarl/video.py
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2010-2011 Roger Philibert
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from weboob.capabilities.video import BaseVideo
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
__all__ = ['EuroparlVideo']
|
||||||
|
|
||||||
|
|
||||||
|
class EuroparlVideo(BaseVideo):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
BaseVideo.__init__(self, *args, **kwargs)
|
||||||
|
self.ext = u'wmv'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def id2url(cls, _id):
|
||||||
|
m = re.match('.*-COMMITTEE-.*', _id)
|
||||||
|
if m:
|
||||||
|
return u'http://www.europarl.europa.eu/ep-live/en/committees/video?event=%s&format=wmv' % _id
|
||||||
|
m = re.match('.*-SPECIAL-.*', _id)
|
||||||
|
if m:
|
||||||
|
return u'http://www.europarl.europa.eu/ep-live/en/other-events/video?event=%s&format=wmv' % _id
|
||||||
|
# XXX: not yet supported
|
||||||
|
m = re.match('\d\d-\d\d-\d\d\d\d', _id)
|
||||||
|
if m:
|
||||||
|
return u'http://www.europarl.europa.eu/ep-live/en/plenary/video?date=%s' % _id
|
||||||
|
# XXX: not yet supported
|
||||||
|
m = re.match('\d+', _id)
|
||||||
|
if m:
|
||||||
|
return u'http://www.europarl.europa.eu/ep-live/en/plenary/video?debate=%s' % _id
|
||||||
|
return None
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue