Add support for fetching past emissions URLs

Works with France Inter and France Culture
This commit is contained in:
Laurent Bachelier 2012-02-04 02:06:36 +01:00
commit 0dacfa1150
4 changed files with 104 additions and 6 deletions

View file

@ -22,4 +22,3 @@ from .backend import RadioFranceBackend
__all__ = ['RadioFranceBackend']

View file

@ -19,18 +19,19 @@
from weboob.capabilities.base import NotLoaded
from weboob.capabilities.video import ICapVideo
from weboob.capabilities.radio import ICapRadio, Radio, Stream, Emission
from weboob.capabilities.collection import ICapCollection, CollectionNotFound, Collection
from weboob.tools.backend import BaseBackend
from .browser import RadioFranceBrowser
from .browser import RadioFranceBrowser, RadioFranceVideo
__all__ = ['RadioFranceBackend']
class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection):
class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection, ICapVideo):
NAME = 'radiofrance'
MAINTAINER = 'Laurent Bachelier'
EMAIL = 'laurent@bachelier.name'
@ -170,4 +171,23 @@ class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection):
radio.current.artist = artist
return radio
OBJECTS = {Radio: fill_radio}
# avoid warning, but TODO
# http://www.franceculture.fr/recherche/key%3DYOURSEARCH%2526type%3Demission
# http://www.franceinter.fr/recherche/key%3DYOURSEARCH%2526tri%3Dpertinence%2526theme%3Ddefault%2526type%3Demission
def iter_search_results(self, *args, **kwargs):
return []
def get_video(self, _id):
with self.browser:
video = self.browser.get_video(_id)
return video
def fill_video(self, video, fields):
if 'url' in fields:
with self.browser:
video.url = self.browser.get_url(video.id)
return video
OBJECTS = {Radio: fill_radio,
RadioFranceVideo: fill_video}

View file

@ -18,9 +18,13 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser, BasePage, BrokenPageError
from weboob.capabilities.video import BaseVideo
from weboob.tools.browser.decorators import id2url
from StringIO import StringIO
from time import time
import re
import urlparse
try:
import json
@ -28,7 +32,42 @@ except ImportError:
import simplejson as json
__all__ = ['RadioFranceBrowser']
__all__ = ['RadioFranceBrowser', 'RadioFranceVideo']
class RadioFranceVideo(BaseVideo):
RADIOS = ('franceinter', 'franceculture')
@classmethod
def id2url(cls, _id):
radio_id, replay_id = _id.split('-', 2)
return 'http://www.%s.fr/player/reecouter?play=%s' % \
(radio_id, replay_id)
class PlayerPage(BasePage):
URL = r'^http://www\.(?P<radio_id>%s)\.fr/player/reecouter\?play=(?P<replay_id>\d+)$' \
% '|'.join(RadioFranceVideo.RADIOS)
MP3_REGEXP = re.compile(r'sites%2Fdefault.+.(?:MP3|mp3)')
def get_url(self):
radio_id = self.groups[0]
player = self.parser.select(self.document.getroot(), '#rfPlayer embed', 1)
urlparams = urlparse.parse_qs(player.attrib['src'])
return 'http://www.%s.fr/%s' % (radio_id, urlparams['urlAOD'][0])
class ReplayPage(BasePage):
URL = r'^http://www\.(?P<radio_id>%s)\.fr/emission-.+$' \
% '|'.join(RadioFranceVideo.RADIOS)
def get_id(self):
radio_id = self.groups[0]
for node in self.parser.select(self.document.getroot(), 'div.node-rf_diffusion'):
match = re.match(r'^node-(\d+)$', node.attrib.get('id', ''))
if match:
player_id = match.groups()[0]
return (radio_id, player_id)
class DataPage(BasePage):
@ -54,6 +93,7 @@ class RssPage(BasePage):
class RssAntennaPage(BasePage):
ENCODING = 'ISO-8859-1'
def get_track(self):
# This information is not always available
try:
@ -71,7 +111,9 @@ class RadioFranceBrowser(BaseBrowser):
ENCODING = 'UTF-8'
PAGES = {r'/playerjs/direct/donneesassociees/html\?guid=$': DataPage,
r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS.html': RssPage,
r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS_a_lantenne.html': RssAntennaPage}
r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS_a_lantenne.html': RssAntennaPage,
PlayerPage.URL: PlayerPage,
ReplayPage.URL: ReplayPage}
def get_current_playerjs(self, _id):
self.location('http://www.%s.fr/playerjs/direct/donneesassociees/html?guid=' % _id)
@ -103,3 +145,23 @@ class RadioFranceBrowser(BaseBrowser):
result = self.page.get_track()
self.ENCODING = RadioFranceBrowser.ENCODING
return result
@id2url(RadioFranceVideo.id2url)
def get_video(self, url):
radio_id = replay_id = None
match = re.match(PlayerPage.URL, url)
if match:
radio_id, replay_id = match.groups()
elif re.match(ReplayPage.URL, url):
self.location(url)
assert self.is_on_page(ReplayPage)
radio_id, replay_id = self.page.get_id()
if radio_id and replay_id:
_id = '%s-%s' % (radio_id, replay_id)
return RadioFranceVideo(_id)
@id2url(RadioFranceVideo.id2url)
def get_url(self, url):
self.location(url)
assert self.is_on_page(PlayerPage)
return self.page.get_url()

View file

@ -27,3 +27,20 @@ class RadioFranceTest(BackendTest):
def test_get_radios(self):
l = list(self.backend.iter_resources([]))
self.assertTrue(len(l) > 0)
def test_get_video(self):
# this should be available up to 24/10/2014 15h00
urls = ('http://www.franceinter.fr/emission-vivre-avec-les-betes-y-arthus-bertrand-felins-g-tsai-s-envoler-conte-boreal-reha-hutin-30-m',
'http://www.franceinter.fr/player/reecouter?play=263735',
'franceinter-263735')
for url in urls:
vid = self.backend.get_video(url)
assert vid.id == urls[-1]
self.backend.fillobj(vid, ['url'])
assert vid.url.lower().endswith('.mp3')
# france culture (no expiration known)
vid = self.backend.get_video('http://www.franceculture.fr/emission-la-dispute-expositions-paul-strand-youssef-nabil-et-dorothee-smith-2012-02-01')
assert vid.id
self.backend.fillobj(vid, ['url'])
assert vid.url.lower().endswith('.mp3')