diff --git a/modules/europarl/__init__.py b/modules/europarl/__init__.py
new file mode 100644
index 00000000..0994ffed
--- /dev/null
+++ b/modules/europarl/__init__.py
@@ -0,0 +1,3 @@
+from .backend import EuroparlBackend
+
+__all__ = ['EuroparlBackend']
diff --git a/modules/europarl/backend.py b/modules/europarl/backend.py
new file mode 100644
index 00000000..971c7d6c
--- /dev/null
+++ b/modules/europarl/backend.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+from __future__ import with_statement
+
+from weboob.capabilities.video import ICapVideo, BaseVideo
+from weboob.tools.backend import BaseBackend
+from weboob.capabilities.collection import ICapCollection, CollectionNotFound
+
+from .browser import EuroparlBrowser
+from .video import EuroparlVideo
+
+
+__all__ = ['EuroparlBackend']
+
+
+class EuroparlBackend(BaseBackend, ICapVideo, ICapCollection):
+ NAME = 'europarl'
+ MAINTAINER = u'François Revol'
+ EMAIL = 'revol@free.fr'
+ VERSION = '0.d'
+ DESCRIPTION = 'Europarl parliamentary video streaming website'
+ LICENSE = 'AGPLv3+'
+ BROWSER = EuroparlBrowser
+
+ def get_video(self, _id):
+ with self.browser:
+ return self.browser.get_video(_id)
+
+ SORTBY = ['relevance', 'rating', 'views', 'time']
+
+ # def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
+ # with self.browser:
+ # return self.browser.search_videos(pattern, self.SORTBY[sortby])
+
+ def fill_video(self, video, fields):
+ if fields != ['thumbnail']:
+ # if we don't want only the thumbnail, we probably want also every fields
+ with self.browser:
+ video = self.browser.get_video(EuroparlVideo.id2url(video.id), video)
+ if 'thumbnail' in fields and video.thumbnail:
+ with self.browser:
+ video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
+
+ return video
+
+ def iter_resources(self, objs, split_path):
+ if BaseVideo in objs:
+ collection = self.get_collection(objs, split_path)
+ if collection.path_level == 0:
+ yield self.get_collection(objs, [u'latest'])
+ if collection.split_path == [u'latest']:
+ for video in self.browser.latest_videos():
+ yield video
+
+ def validate_collection(self, objs, collection):
+ if collection.path_level == 0:
+ return
+ if BaseVideo in objs and collection.split_path == [u'latest']:
+ collection.title = u'Latest Europarl videos'
+ return
+ raise CollectionNotFound(collection.split_path)
+
+ OBJECTS = {EuroparlVideo: fill_video}
diff --git a/modules/europarl/browser.py b/modules/europarl/browser.py
new file mode 100644
index 00000000..734bd21a
--- /dev/null
+++ b/modules/europarl/browser.py
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+from weboob.tools.browser import BaseBrowser
+from weboob.tools.browser.decorators import id2url
+
+#from .pages.index import IndexPage
+from .pages import VideoPage
+from .video import EuroparlVideo
+
+
+__all__ = ['EuroparlBrowser']
+
+
+class EuroparlBrowser(BaseBrowser):
+ DOMAIN = 'europarl.europa.eu'
+ ENCODING = None
+ PAGES = {r'http://[w\.]*europarl\.europa\.eu/ep-live/(?P\w+)/committees/video\?.*event=(?P[^&]+).*': VideoPage,
+ r'http://[w\.]*europarl\.europa\.eu/ep-live/(?P\w+)/other-events/video\?.*event=(?P[^&]+).*': VideoPage
+#TODO:plenaries
+# r'http://[w\.]*europarl\.europa\.eu/ep-live/(?P\w+)/plenary/video\?.*date=(?P[^&]+).*': VideoPage
+# r'http://[w\.]*europarl\.europa\.eu/ep-live/(?P\w+)/plenary/video\?.*debate=(?P[^&]+).*': VideoPage
+ }
+
+ @id2url(EuroparlVideo.id2url)
+ def get_video(self, url, video=None):
+ self.location(url)
+ return self.page.get_video(video)
+
+ # def search_videos(self, pattern, sortby):
+ # return None
+ # self.location(self.buildurl('http://europarltv.europa.eu/en/search%s' % sortby, query=pattern.encode('utf-8')))
+ # assert self.is_on_page(IndexPage)
+ # return self.page.iter_videos()
+
+ # def latest_videos(self):
+ # self.home()
+ # assert self.is_on_page(IndexPage)
+ # return self.page.iter_videos()
diff --git a/modules/europarl/favicon.png b/modules/europarl/favicon.png
new file mode 100644
index 00000000..fffc442d
Binary files /dev/null and b/modules/europarl/favicon.png differ
diff --git a/modules/europarl/favicon_europarl.xcf b/modules/europarl/favicon_europarl.xcf
new file mode 100644
index 00000000..ea730149
Binary files /dev/null and b/modules/europarl/favicon_europarl.xcf differ
diff --git a/modules/europarl/pages.py b/modules/europarl/pages.py
new file mode 100644
index 00000000..19eaf335
--- /dev/null
+++ b/modules/europarl/pages.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+from weboob.tools.mech import ClientForm
+ControlNotFoundError = ClientForm.ControlNotFoundError
+
+from weboob.tools.browser import BasePage
+
+import re
+import datetime
+
+from weboob.capabilities.base import NotAvailable
+from weboob.tools.browser import BrokenPageError
+
+from .video import EuroparlVideo
+
+
+
+__all__ = ['VideoPage']
+
+class VideoPage(BasePage):
+ def get_video(self, video=None):
+ if video is None:
+ video = EuroparlVideo(self.group_dict['id'])
+ video.title = unicode(self.get_title())
+ video.url = unicode(self.get_url())
+ self.set_details(video)
+
+ video.set_empty_fields(NotAvailable)
+ return video
+
+ def get_url(self):
+ # search for
+ # TODO: plenaries can be downloaded as mp4...
+ obj = self.parser.select(self.document.getroot(), 'input#codeUrl', 1)
+ if obj is None:
+ return None
+ return obj.attrib['value']
+
+ def get_title(self):
+ obj = self.parser.select(self.document.getroot(), 'h1#player_subjectTitle')
+ if len(obj) < 1:
+ obj = self.parser.select(self.document.getroot(), 'title')
+ if len(obj) < 1:
+ return None
+ title = obj[0].text.strip()
+ obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle')
+ if len(obj) < 1:
+ return title
+
+ for span in self.parser.select(obj[0], 'span.ep_acronym, span.ep_theme'):
+ if span.text_content():
+ title += ' ' + span.text_content().strip()
+
+ return title
+
+ def set_details(self, v):
+ v.author = u'European Parliament'
+ obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1)
+ if obj is not None:
+ value = obj.attrib['content']
+ print value
+ m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value)
+ if not m:
+ raise BrokenPageError('Unable to parse datetime: %r' % value)
+ day = m.group(1)
+ month = m.group(2)
+ year = m.group(3)
+ hour = m.group(4)
+ minute = m.group(5)
+ v.date = datetime.datetime(year=int(year),
+ month=int(month),
+ day=int(day),
+ hour=int(hour),
+ minute=int(minute))
+
+ obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1)
+ if obj is not None:
+ span = self.parser.select(obj, 'span.ep_date', 1)
+ value = span.text
+ m = re.match('(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value)
+ if not m:
+ raise BrokenPageError('Unable to parse datetime: %r' % value)
+ bhour = m.group(1)
+ bminute = m.group(2)
+ ehour = m.group(3)
+ eminute = m.group(4)
+ day = m.group(5)
+ month = m.group(6)
+ year = m.group(7)
+
+ start = datetime.datetime(year=int(year),
+ month=int(month),
+ day=int(day),
+ hour=int(bhour),
+ minute=int(bminute))
+ end = datetime.datetime(year=int(year),
+ month=int(month),
+ day=int(day),
+ hour=int(ehour),
+ minute=int(eminute))
+
+ v.duration = end - start
diff --git a/modules/europarl/test.py b/modules/europarl/test.py
new file mode 100644
index 00000000..aa0c6dec
--- /dev/null
+++ b/modules/europarl/test.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2012 François Revol
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+from weboob.tools.test import BackendTest
+#from weboob.capabilities.video import BaseVideo
+
+
+class EuroparlTest(BackendTest):
+ BACKEND = 'europarl'
+
+ # def test_search(self):
+ # l = list(self.backend.search_videos('neelie kroes'))
+ # self.assertTrue(len(l) > 0)
+ # v = l[0]
+ # self.backend.fillobj(v, ('url',))
+ # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+ # self.backend.browser.openurl(v.url)
+
+ # def test_latest(self):
+ # l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
+ # self.assertTrue(len(l) > 0)
+ # v = l[0]
+ # self.backend.fillobj(v, ('url',))
+ # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
diff --git a/modules/europarl/video.py b/modules/europarl/video.py
new file mode 100644
index 00000000..c1cc3907
--- /dev/null
+++ b/modules/europarl/video.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Roger Philibert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+from weboob.capabilities.video import BaseVideo
+
+import re
+
+__all__ = ['EuroparlVideo']
+
+
+class EuroparlVideo(BaseVideo):
+ def __init__(self, *args, **kwargs):
+ BaseVideo.__init__(self, *args, **kwargs)
+ self.ext = u'wmv'
+
+ @classmethod
+ def id2url(cls, _id):
+ m = re.match('.*-COMMITTEE-.*', _id)
+ if m:
+ return u'http://www.europarl.europa.eu/ep-live/en/committees/video?event=%s&format=wmv' % _id
+ m = re.match('.*-SPECIAL-.*', _id)
+ if m:
+ return u'http://www.europarl.europa.eu/ep-live/en/other-events/video?event=%s&format=wmv' % _id
+ # XXX: not yet supported
+ m = re.match('\d\d-\d\d-\d\d\d\d', _id)
+ if m:
+ return u'http://www.europarl.europa.eu/ep-live/en/plenary/video?date=%s' % _id
+ # XXX: not yet supported
+ m = re.match('\d+', _id)
+ if m:
+ return u'http://www.europarl.europa.eu/ep-live/en/plenary/video?debate=%s' % _id
+ return None
+