From 0dacfa11502dd147f3ba193dbd10daef9de49509 Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Sat, 4 Feb 2012 02:06:36 +0100 Subject: [PATCH] Add support for fetching past emissions URLs Works with France Inter and France Culture --- modules/radiofrance/__init__.py | 1 - modules/radiofrance/backend.py | 26 +++++++++++-- modules/radiofrance/browser.py | 66 ++++++++++++++++++++++++++++++++- modules/radiofrance/test.py | 17 +++++++++ 4 files changed, 104 insertions(+), 6 deletions(-) diff --git a/modules/radiofrance/__init__.py b/modules/radiofrance/__init__.py index 35ed9971..f4f224de 100644 --- a/modules/radiofrance/__init__.py +++ b/modules/radiofrance/__init__.py @@ -22,4 +22,3 @@ from .backend import RadioFranceBackend __all__ = ['RadioFranceBackend'] - diff --git a/modules/radiofrance/backend.py b/modules/radiofrance/backend.py index 6d31755e..ee3885fc 100644 --- a/modules/radiofrance/backend.py +++ b/modules/radiofrance/backend.py @@ -19,18 +19,19 @@ from weboob.capabilities.base import NotLoaded +from weboob.capabilities.video import ICapVideo from weboob.capabilities.radio import ICapRadio, Radio, Stream, Emission from weboob.capabilities.collection import ICapCollection, CollectionNotFound, Collection from weboob.tools.backend import BaseBackend -from .browser import RadioFranceBrowser +from .browser import RadioFranceBrowser, RadioFranceVideo __all__ = ['RadioFranceBackend'] -class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection): +class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection, ICapVideo): NAME = 'radiofrance' MAINTAINER = 'Laurent Bachelier' EMAIL = 'laurent@bachelier.name' @@ -170,4 +171,23 @@ class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection): radio.current.artist = artist return radio - OBJECTS = {Radio: fill_radio} + # avoid warning, but TODO + # http://www.franceculture.fr/recherche/key%3DYOURSEARCH%2526type%3Demission + # http://www.franceinter.fr/recherche/key%3DYOURSEARCH%2526tri%3Dpertinence%2526theme%3Ddefault%2526type%3Demission + def iter_search_results(self, *args, **kwargs): + return [] + + def get_video(self, _id): + with self.browser: + video = self.browser.get_video(_id) + return video + + def fill_video(self, video, fields): + if 'url' in fields: + with self.browser: + video.url = self.browser.get_url(video.id) + + return video + + OBJECTS = {Radio: fill_radio, + RadioFranceVideo: fill_video} diff --git a/modules/radiofrance/browser.py b/modules/radiofrance/browser.py index 5e30fe27..fdd20954 100644 --- a/modules/radiofrance/browser.py +++ b/modules/radiofrance/browser.py @@ -18,9 +18,13 @@ # along with weboob. If not, see . from weboob.tools.browser import BaseBrowser, BasePage, BrokenPageError +from weboob.capabilities.video import BaseVideo +from weboob.tools.browser.decorators import id2url from StringIO import StringIO from time import time +import re +import urlparse try: import json @@ -28,7 +32,42 @@ except ImportError: import simplejson as json -__all__ = ['RadioFranceBrowser'] +__all__ = ['RadioFranceBrowser', 'RadioFranceVideo'] + + +class RadioFranceVideo(BaseVideo): + RADIOS = ('franceinter', 'franceculture') + + @classmethod + def id2url(cls, _id): + radio_id, replay_id = _id.split('-', 2) + return 'http://www.%s.fr/player/reecouter?play=%s' % \ + (radio_id, replay_id) + + +class PlayerPage(BasePage): + URL = r'^http://www\.(?P%s)\.fr/player/reecouter\?play=(?P\d+)$' \ + % '|'.join(RadioFranceVideo.RADIOS) + MP3_REGEXP = re.compile(r'sites%2Fdefault.+.(?:MP3|mp3)') + + def get_url(self): + radio_id = self.groups[0] + player = self.parser.select(self.document.getroot(), '#rfPlayer embed', 1) + urlparams = urlparse.parse_qs(player.attrib['src']) + return 'http://www.%s.fr/%s' % (radio_id, urlparams['urlAOD'][0]) + + +class ReplayPage(BasePage): + URL = r'^http://www\.(?P%s)\.fr/emission-.+$' \ + % '|'.join(RadioFranceVideo.RADIOS) + + def get_id(self): + radio_id = self.groups[0] + for node in self.parser.select(self.document.getroot(), 'div.node-rf_diffusion'): + match = re.match(r'^node-(\d+)$', node.attrib.get('id', '')) + if match: + player_id = match.groups()[0] + return (radio_id, player_id) class DataPage(BasePage): @@ -54,6 +93,7 @@ class RssPage(BasePage): class RssAntennaPage(BasePage): ENCODING = 'ISO-8859-1' + def get_track(self): # This information is not always available try: @@ -71,7 +111,9 @@ class RadioFranceBrowser(BaseBrowser): ENCODING = 'UTF-8' PAGES = {r'/playerjs/direct/donneesassociees/html\?guid=$': DataPage, r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS.html': RssPage, - r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS_a_lantenne.html': RssAntennaPage} + r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS_a_lantenne.html': RssAntennaPage, + PlayerPage.URL: PlayerPage, + ReplayPage.URL: ReplayPage} def get_current_playerjs(self, _id): self.location('http://www.%s.fr/playerjs/direct/donneesassociees/html?guid=' % _id) @@ -103,3 +145,23 @@ class RadioFranceBrowser(BaseBrowser): result = self.page.get_track() self.ENCODING = RadioFranceBrowser.ENCODING return result + + @id2url(RadioFranceVideo.id2url) + def get_video(self, url): + radio_id = replay_id = None + match = re.match(PlayerPage.URL, url) + if match: + radio_id, replay_id = match.groups() + elif re.match(ReplayPage.URL, url): + self.location(url) + assert self.is_on_page(ReplayPage) + radio_id, replay_id = self.page.get_id() + if radio_id and replay_id: + _id = '%s-%s' % (radio_id, replay_id) + return RadioFranceVideo(_id) + + @id2url(RadioFranceVideo.id2url) + def get_url(self, url): + self.location(url) + assert self.is_on_page(PlayerPage) + return self.page.get_url() diff --git a/modules/radiofrance/test.py b/modules/radiofrance/test.py index 77893cc9..0b847a24 100644 --- a/modules/radiofrance/test.py +++ b/modules/radiofrance/test.py @@ -27,3 +27,20 @@ class RadioFranceTest(BackendTest): def test_get_radios(self): l = list(self.backend.iter_resources([])) self.assertTrue(len(l) > 0) + + def test_get_video(self): + # this should be available up to 24/10/2014 15h00 + urls = ('http://www.franceinter.fr/emission-vivre-avec-les-betes-y-arthus-bertrand-felins-g-tsai-s-envoler-conte-boreal-reha-hutin-30-m', + 'http://www.franceinter.fr/player/reecouter?play=263735', + 'franceinter-263735') + for url in urls: + vid = self.backend.get_video(url) + assert vid.id == urls[-1] + self.backend.fillobj(vid, ['url']) + assert vid.url.lower().endswith('.mp3') + + # france culture (no expiration known) + vid = self.backend.get_video('http://www.franceculture.fr/emission-la-dispute-expositions-paul-strand-youssef-nabil-et-dorothee-smith-2012-02-01') + assert vid.id + self.backend.fillobj(vid, ['url']) + assert vid.url.lower().endswith('.mp3')