Add support for fetching past emissions URLs

Works with France Inter and France Culture
2012-02-04 02:06:36 +01:00 · 2012-02-04 02:06:36 +01:00 · 0dacfa1150
commit 0dacfa1150
parent 53915bf6b5
4 changed files with 104 additions and 6 deletions
--- a/modules/radiofrance/init.py
+++ b/modules/radiofrance/init.py
@ -22,4 +22,3 @@ from .backend import RadioFranceBackend


 __all__ = ['RadioFranceBackend']
-
--- a/modules/radiofrance/backend.py
+++ b/modules/radiofrance/backend.py
@ -19,18 +19,19 @@


 from weboob.capabilities.base import NotLoaded
+from weboob.capabilities.video import ICapVideo
 from weboob.capabilities.radio import ICapRadio, Radio, Stream, Emission
 from weboob.capabilities.collection import ICapCollection, CollectionNotFound, Collection
 from weboob.tools.backend import BaseBackend


-from .browser import RadioFranceBrowser
+from .browser import RadioFranceBrowser, RadioFranceVideo


 __all__ = ['RadioFranceBackend']


-class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection):
+class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection, ICapVideo):
    NAME = 'radiofrance'
    MAINTAINER = 'Laurent Bachelier'
    EMAIL = 'laurent@bachelier.name'
@ -170,4 +171,23 @@ class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection):
                radio.current.artist = artist
        return radio

-    OBJECTS = {Radio: fill_radio}
+    # avoid warning, but TODO
+    # http://www.franceculture.fr/recherche/key%3DYOURSEARCH%2526type%3Demission
+    # http://www.franceinter.fr/recherche/key%3DYOURSEARCH%2526tri%3Dpertinence%2526theme%3Ddefault%2526type%3Demission
+    def iter_search_results(self, *args, **kwargs):
+        return []
+
+    def get_video(self, _id):
+        with self.browser:
+            video = self.browser.get_video(_id)
+        return video
+
+    def fill_video(self, video, fields):
+        if 'url' in fields:
+            with self.browser:
+                video.url = self.browser.get_url(video.id)
+
+        return video
+
+    OBJECTS = {Radio: fill_radio,
+            RadioFranceVideo: fill_video}
--- a/modules/radiofrance/browser.py
+++ b/modules/radiofrance/browser.py
@ -18,9 +18,13 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

 from weboob.tools.browser import BaseBrowser, BasePage, BrokenPageError
+from weboob.capabilities.video import BaseVideo
+from weboob.tools.browser.decorators import id2url

 from StringIO import StringIO
 from time import time
+import re
+import urlparse

 try:
    import json
@ -28,7 +32,42 @@ except ImportError:
    import simplejson as json


-__all__ = ['RadioFranceBrowser']
+__all__ = ['RadioFranceBrowser', 'RadioFranceVideo']
+
+
+class RadioFranceVideo(BaseVideo):
+    RADIOS = ('franceinter', 'franceculture')
+
+    @classmethod
+    def id2url(cls, _id):
+        radio_id, replay_id = _id.split('-', 2)
+        return 'http://www.%s.fr/player/reecouter?play=%s' % \
+            (radio_id, replay_id)
+
+
+class PlayerPage(BasePage):
+    URL = r'^http://www\.(?P<radio_id>%s)\.fr/player/reecouter\?play=(?P<replay_id>\d+)$' \
+        % '|'.join(RadioFranceVideo.RADIOS)
+    MP3_REGEXP = re.compile(r'sites%2Fdefault.+.(?:MP3|mp3)')
+
+    def get_url(self):
+        radio_id = self.groups[0]
+        player = self.parser.select(self.document.getroot(), '#rfPlayer embed', 1)
+        urlparams = urlparse.parse_qs(player.attrib['src'])
+        return 'http://www.%s.fr/%s' % (radio_id, urlparams['urlAOD'][0])
+
+
+class ReplayPage(BasePage):
+    URL = r'^http://www\.(?P<radio_id>%s)\.fr/emission-.+$' \
+            % '|'.join(RadioFranceVideo.RADIOS)
+
+    def get_id(self):
+        radio_id = self.groups[0]
+        for node in self.parser.select(self.document.getroot(), 'div.node-rf_diffusion'):
+            match = re.match(r'^node-(\d+)$', node.attrib.get('id', ''))
+            if match:
+                player_id = match.groups()[0]
+        return (radio_id, player_id)


 class DataPage(BasePage):
@ -54,6 +93,7 @@ class RssPage(BasePage):

 class RssAntennaPage(BasePage):
    ENCODING = 'ISO-8859-1'
+
    def get_track(self):
        # This information is not always available
        try:
@ -71,7 +111,9 @@ class RadioFranceBrowser(BaseBrowser):
    ENCODING = 'UTF-8'
    PAGES = {r'/playerjs/direct/donneesassociees/html\?guid=$': DataPage,
        r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS.html': RssPage,
-        r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS_a_lantenne.html': RssAntennaPage}
+        r'http://players.tv-radio.com/radiofrance/metadatas/([a-z]+)RSS_a_lantenne.html': RssAntennaPage,
+        PlayerPage.URL: PlayerPage,
+        ReplayPage.URL: ReplayPage}

    def get_current_playerjs(self, _id):
        self.location('http://www.%s.fr/playerjs/direct/donneesassociees/html?guid=' % _id)
@ -103,3 +145,23 @@ class RadioFranceBrowser(BaseBrowser):
        result = self.page.get_track()
        self.ENCODING = RadioFranceBrowser.ENCODING
        return result
+
+    @id2url(RadioFranceVideo.id2url)
+    def get_video(self, url):
+        radio_id = replay_id = None
+        match = re.match(PlayerPage.URL, url)
+        if match:
+            radio_id, replay_id = match.groups()
+        elif re.match(ReplayPage.URL, url):
+            self.location(url)
+            assert self.is_on_page(ReplayPage)
+            radio_id, replay_id = self.page.get_id()
+        if radio_id and replay_id:
+            _id = '%s-%s' % (radio_id, replay_id)
+            return RadioFranceVideo(_id)
+
+    @id2url(RadioFranceVideo.id2url)
+    def get_url(self, url):
+        self.location(url)
+        assert self.is_on_page(PlayerPage)
+        return self.page.get_url()
--- a/modules/radiofrance/test.py
+++ b/modules/radiofrance/test.py
@ -27,3 +27,20 @@ class RadioFranceTest(BackendTest):
    def test_get_radios(self):
        l = list(self.backend.iter_resources([]))
        self.assertTrue(len(l) > 0)
+
+    def test_get_video(self):
+        # this should be available up to 24/10/2014 15h00
+        urls = ('http://www.franceinter.fr/emission-vivre-avec-les-betes-y-arthus-bertrand-felins-g-tsai-s-envoler-conte-boreal-reha-hutin-30-m',
+            'http://www.franceinter.fr/player/reecouter?play=263735',
+            'franceinter-263735')
+        for url in urls:
+            vid = self.backend.get_video(url)
+            assert vid.id == urls[-1]
+        self.backend.fillobj(vid, ['url'])
+        assert vid.url.lower().endswith('.mp3')
+
+        # france culture (no expiration known)
+        vid = self.backend.get_video('http://www.franceculture.fr/emission-la-dispute-expositions-paul-strand-youssef-nabil-et-dorothee-smith-2012-02-01')
+        assert vid.id
+        self.backend.fillobj(vid, ['url'])
+        assert vid.url.lower().endswith('.mp3')
				`@ -22,4 +22,3 @@ from .backend import RadioFranceBackend`


				`__all__ = ['RadioFranceBackend']`