[radiofrance] handle podcasts

This commit is contained in:
Bezleputh 2015-07-02 15:33:51 +02:00
commit 19a785a643
4 changed files with 223 additions and 16 deletions

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import PagesBrowser, URL from weboob.browser import PagesBrowser, URL
from .pages import PlayerPage, JsonPage from .pages import RadioPage, JsonPage, PodcastPage
__all__ = ['RadioFranceBrowser'] __all__ = ['RadioFranceBrowser']
@ -26,11 +26,12 @@ __all__ = ['RadioFranceBrowser']
class RadioFranceBrowser(PagesBrowser): class RadioFranceBrowser(PagesBrowser):
json_page = URL('sites/default/files/(?P<json_url>.*).json', json_page = URL('sites/default/files/(?P<json_url>.*).json',
'player-json/reecoute/(?P<json_url_fip>.*)', JsonPage) 'player-json/reecoute/(?P<json_url_fip>.*)', JsonPage)
player_page = URL('(?P<player>.*)', PlayerPage) podcast_page = URL('podcast09/rss_(?P<podcast_id>.*)\.xml', PodcastPage)
radio_page = URL('(?P<page>.*)', RadioPage)
def get_radio_url(self, radio, player): def get_radio_url(self, radio, player):
self.BASEURL = 'http://www.%s.fr/' % radio self.BASEURL = 'http://www.%s.fr/' % radio
return self.player_page.go(player=player).get_url() return self.radio_page.go(page=player).get_url()
def get_current(self, radio, json_url): def get_current(self, radio, json_url):
self.BASEURL = 'http://www.%s.fr/' % radio self.BASEURL = 'http://www.%s.fr/' % radio
@ -53,3 +54,24 @@ class RadioFranceBrowser(PagesBrowser):
for item in self.get_selection(radio_url, json_url, radio_id): for item in self.get_selection(radio_url, json_url, radio_id):
if pattern.upper() in item.title.upper(): if pattern.upper() in item.title.upper():
yield item yield item
def get_podcast_emissions(self, radio_url, podcast_url, split_path):
self.BASEURL = 'http://www.%s.fr/' % radio_url
if split_path[0] == 'franceinter':
return self.radio_page.go(page=podcast_url).get_france_inter_podcast_emissions(split_path=split_path)
elif split_path[0] == 'franceculture':
return self.radio_page.go(page=podcast_url).get_france_culture_podcast_emissions(split_path=split_path)
elif split_path[0] == 'franceinfo':
return self.radio_page.go(page=podcast_url).get_france_info_podcast_emissions(split_path=split_path)
elif split_path[0] == 'francemusique':
return self.radio_page.go(page=podcast_url).get_france_musique_podcast_emissions(split_path=split_path)
elif split_path[0] == 'mouv':
return self.radio_page.go(page=podcast_url).get_mouv_podcast_emissions(split_path=split_path)
def get_podcasts(self, podcast_id):
self.BASEURL = 'http://radiofrance-podcast.net/'
return self.podcast_page.go(podcast_id=podcast_id).iter_podcasts()
def get_france_culture_podcasts_url(self, url):
self.BASEURL = 'http://www.franceculture.fr/podcast/'
return self.radio_page.go(page=url).get_france_culture_podcasts_url()

View file

@ -48,14 +48,17 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
'franceinter': {u'title': u'France Inter', 'franceinter': {u'title': u'France Inter',
u'player': u'player', u'player': u'player',
u'live': u'lecteur_commun_json/timeline', u'live': u'lecteur_commun_json/timeline',
u'podcast': u'podcasts',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'franceculture': {u'title': u'France Culture', 'franceculture': {u'title': u'France Culture',
u'player': u'player', u'player': u'player',
u'live': u'lecteur_commun_json/timeline', u'live': u'lecteur_commun_json/timeline',
u'podcast': u'podcasts',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'franceinfo': {u'title': u'France Info', 'franceinfo': {u'title': u'France Info',
u'player': u'player', u'player': u'player',
u'live': u'lecteur_commun_json/timeline', u'live': u'lecteur_commun_json/timeline',
u'podcast': u'programmes-chroniques/podcasts',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'fbidf': {u'title': u'France Bleu Île-de-France (Paris)', 'fbidf': {u'title': u'France Bleu Île-de-France (Paris)',
u'player': u'player/france-bleu-107-1', u'player': u'player/france-bleu-107-1',
@ -68,10 +71,12 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
'francemusique': {u'title': u'France Musique', 'francemusique': {u'title': u'France Musique',
u'player': u'player', u'player': u'player',
u'live': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple())), u'live': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple())),
u'podcast': u'emissions',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'mouv': {u'title': u'Le Mouv\'', 'mouv': {u'title': u'Le Mouv\'',
u'player': u'player', u'player': u'player',
u'live': u'lecteur_commun_json/timeline', u'live': u'lecteur_commun_json/timeline',
u'podcast': u'podcasts',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'fbalsace': {u'title': u'France Bleu Alsace (Strasbourg)', 'fbalsace': {u'title': u'France Bleu Alsace (Strasbourg)',
u'player': u'player/station/france-bleu-alsace', u'player': u'player/station/france-bleu-alsace',
@ -252,7 +257,13 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
} }
def iter_resources(self, objs, split_path): def iter_resources(self, objs, split_path):
if split_path and split_path[0] == u'francebleu': if len(split_path) == 0:
for _id, item in sorted(self._RADIOS.iteritems()):
if not _id.startswith('fb'):
yield Collection([_id], item['title'])
yield Collection([u'francebleu'], u'France Bleu')
elif split_path[0] == u'francebleu':
if len(split_path) == 1: if len(split_path) == 1:
for _id, item in sorted(self._RADIOS.iteritems()): for _id, item in sorted(self._RADIOS.iteritems()):
if _id.startswith('fb'): if _id.startswith('fb'):
@ -264,18 +275,33 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
for item in self.browser.get_selection('francebleu', selection_url, _id): for item in self.browser.get_selection('francebleu', selection_url, _id):
yield item yield item
break break
elif len(split_path) == 0:
for _id, item in sorted(self._RADIOS.iteritems()):
if not _id.startswith('fb'):
yield Collection([_id], item['title'])
yield Collection([u'francebleu'], u'France Bleu')
elif len(split_path) == 1: elif len(split_path) == 1:
yield Collection([split_path[0], u'selection'], u'Selection')
if 'podcast' in self._RADIOS[split_path[0]]:
yield Collection([split_path[0], u'podcasts'], u'Podcast')
elif len(split_path) == 2 and split_path[1] == 'selection':
for _id, item in sorted(self._RADIOS.iteritems()): for _id, item in sorted(self._RADIOS.iteritems()):
if _id == split_path[0]: if _id == split_path[0]:
selection_url = self._RADIOS[_id]['selection'] selection_url = self._RADIOS[_id]['selection']
for item in self.browser.get_selection(_id, selection_url, _id): for item in self.browser.get_selection(_id, selection_url, _id):
yield item yield item
break break
elif len(split_path) == 2 and split_path[1] == 'podcasts':
for item in self.browser.get_podcast_emissions(split_path[0],
self._RADIOS[split_path[0]]['podcast'],
split_path):
yield item
elif len(split_path) == 3:
podcasts_url = split_path[-1]
if split_path[0] == 'franceculture':
podcasts_url = self.browser.get_france_culture_podcasts_url(split_path[-1])
for item in self.browser.get_podcasts(podcasts_url):
yield item
else: else:
raise CollectionNotFound(split_path) raise CollectionNotFound(split_path)
@ -347,6 +373,12 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
selection_url = self._RADIOS[radio]['selection'] selection_url = self._RADIOS[radio]['selection']
radio_url = radio if not radio.startswith('fb') else 'francebleu' radio_url = radio if not radio.startswith('fb') else 'francebleu'
return self.browser.get_audio(_id, radio_url, selection_url, radio) return self.browser.get_audio(_id, radio_url, selection_url, radio)
elif radio == 'podcast':
m = re.match('audio\.podcast\.(\d*)-.*', _id)
if m:
for item in self.browser.get_podcasts(m.group(1)):
if _id == item.id:
return item
def iter_radios_search(self, pattern): def iter_radios_search(self, pattern):
for key, radio in self._RADIOS.iteritems(): for key, radio in self._RADIOS.iteritems():

View file

@ -17,21 +17,161 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser.elements import ItemElement, DictElement, method from weboob.browser.elements import ItemElement, DictElement, ListElement, method
from weboob.browser.pages import HTMLPage, JsonPage from weboob.browser.pages import HTMLPage, JsonPage, XMLPage
from weboob.browser.filters.json import Dict from weboob.browser.filters.json import Dict
from weboob.browser.filters.standard import Format, CleanText, Join, Env from weboob.browser.filters.standard import Format, CleanText, Join, Env, Regexp, Duration
from weboob.capabilities.audio import BaseAudio, BaseAudioIdFilter from weboob.capabilities.audio import BaseAudio, BaseAudioIdFilter
from weboob.capabilities.image import BaseImage from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection
import time import time
from datetime import timedelta from datetime import timedelta
class PlayerPage(HTMLPage): class PodcastPage(XMLPage):
@method
class iter_podcasts(ListElement):
item_xpath = '//item'
class item(ItemElement):
klass = BaseAudio
obj_id = BaseAudioIdFilter(Format('podcast.%s',
Regexp(CleanText('./guid'),
'http://media.radiofrance-podcast.net/podcast09/(.*).mp3')))
obj_title = CleanText('title')
obj_format = u'mp3'
obj_url = CleanText('enclosure/@url')
obj_description = CleanText('description')
def obj_author(self):
author = self.el.xpath('itunes:author',
namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
return CleanText('.')(author[0])
def obj_duration(self):
duration = self.el.xpath('itunes:duration',
namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
return Duration(CleanText('.'))(duration[0])
def obj_thumbnail(self):
thumbnail = BaseImage(CleanText('//image[1]/url')(self))
thumbnail.url = thumbnail.id
return thumbnail
class RadioPage(HTMLPage):
def get_url(self): def get_url(self):
return CleanText('//a[@id="player"][1]/@href')(self.doc) return CleanText('//a[@id="player"][1]/@href')(self.doc)
def get_france_culture_podcasts_url(self):
return Regexp(CleanText('//a[@class="lien-rss"][1]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self.doc)
@method
class get_france_culture_podcast_emissions(ListElement):
item_xpath = '//li/h3/a'
class item(ItemElement):
klass = Collection
def condition(self):
return u'/podcast/' in CleanText('./@href')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./@href'), '/podcast/(.*)')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./@href'), '/podcast/(.*)')
obj_title = CleanText('.')
@method
class get_france_info_podcast_emissions(ListElement):
item_xpath = '//div[@class="emission-gdp"]'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def obj_split_path(self):
_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2/a')
@method
class get_mouv_podcast_emissions(ListElement):
item_xpath = '//div[@class="view-content"]/div'
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/a[@class="podcast-rss"]/@href')(self) and \
Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2')
@method
class get_france_musique_podcast_emissions(ListElement):
item_xpath = '//div[@class="liste-emissions"]/ul/li'
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/ul/li/a[@class="ico-rss"]/@href')(self) and\
Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./div/h3')
@method
class get_france_inter_podcast_emissions(ListElement):
item_xpath = '//div[has-class("item-list")]/ul/li/div/div'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/a[@class="podrss"]/@href')(self) and\
Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2/a')
class JsonPage(JsonPage): class JsonPage(JsonPage):
@method @method

View file

@ -26,7 +26,7 @@ from weboob.capabilities.radio import Radio
class RadioFranceTest(BackendTest): class RadioFranceTest(BackendTest):
MODULE = 'radiofrance' MODULE = 'radiofrance'
def test_get_radios_and_selections(self): def test_ls_radios_and_selections(self):
l = list(self.backend.iter_resources(objs=[Radio], split_path=[])) l = list(self.backend.iter_resources(objs=[Radio], split_path=[]))
self.assertTrue(0 < len(l) < 30) self.assertTrue(0 < len(l) < 30)
@ -36,7 +36,7 @@ class RadioFranceTest(BackendTest):
streams = self.backend.get_radio(name).streams streams = self.backend.get_radio(name).streams
self.assertTrue(len(streams) > 0) self.assertTrue(len(streams) > 0)
l_sel = list(self.backend.iter_resources(objs=[BaseAudio], split_path=[name])) l_sel = list(self.backend.iter_resources(objs=[BaseAudio], split_path=[name, 'selection']))
self.assertTrue(len(l_sel) > 0) self.assertTrue(len(l_sel) > 0)
self.assertTrue(len(l_sel[0].url) > 0) self.assertTrue(len(l_sel[0].url) > 0)
@ -47,10 +47,23 @@ class RadioFranceTest(BackendTest):
streams = self.backend.get_radio(radio.split_path[-1]).streams streams = self.backend.get_radio(radio.split_path[-1]).streams
self.assertTrue(len(streams) > 0) self.assertTrue(len(streams) > 0)
l_sel = list(self.backend.iter_resources(objs=[BaseAudio], split_path=['francebleu', radio.split_path[-1]])) l_sel = list(self.backend.iter_resources(objs=[BaseAudio],
split_path=['francebleu',
radio.split_path[-1],
'selection']))
if len(l_sel) > 0: if len(l_sel) > 0:
self.assertTrue(len(l_sel[0].url) > 0) self.assertTrue(len(l_sel[0].url) > 0)
def test_podcasts(self):
for key, item in self.backend._RADIOS.iteritems():
if 'podcast' in item:
emissions = list(self.backend.iter_resources(objs=[BaseAudio], split_path=[key, 'podcasts']))
self.assertTrue(len(emissions) > 0)
podcasts = list(self.backend.iter_resources(objs=[BaseAudio], split_path=emissions[0].split_path))
self.assertTrue(len(podcasts) > 0)
podcast = self.backend.get_audio(podcasts[0].id)
self.assertTrue(podcast.url)
def test_search_radio(self): def test_search_radio(self):
l = list(self.backend.iter_radios_search('bleu')) l = list(self.backend.iter_radios_search('bleu'))
self.assertTrue(len(l) > 0) self.assertTrue(len(l) > 0)