[radiofrance] handle podcasts

This commit is contained in:
Bezleputh 2015-07-02 15:33:51 +02:00
commit 19a785a643
4 changed files with 223 additions and 16 deletions

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import PagesBrowser, URL
from .pages import PlayerPage, JsonPage
from .pages import RadioPage, JsonPage, PodcastPage
__all__ = ['RadioFranceBrowser']
@ -26,11 +26,12 @@ __all__ = ['RadioFranceBrowser']
class RadioFranceBrowser(PagesBrowser):
json_page = URL('sites/default/files/(?P<json_url>.*).json',
'player-json/reecoute/(?P<json_url_fip>.*)', JsonPage)
player_page = URL('(?P<player>.*)', PlayerPage)
podcast_page = URL('podcast09/rss_(?P<podcast_id>.*)\.xml', PodcastPage)
radio_page = URL('(?P<page>.*)', RadioPage)
def get_radio_url(self, radio, player):
self.BASEURL = 'http://www.%s.fr/' % radio
return self.player_page.go(player=player).get_url()
return self.radio_page.go(page=player).get_url()
def get_current(self, radio, json_url):
self.BASEURL = 'http://www.%s.fr/' % radio
@ -53,3 +54,24 @@ class RadioFranceBrowser(PagesBrowser):
for item in self.get_selection(radio_url, json_url, radio_id):
if pattern.upper() in item.title.upper():
yield item
def get_podcast_emissions(self, radio_url, podcast_url, split_path):
self.BASEURL = 'http://www.%s.fr/' % radio_url
if split_path[0] == 'franceinter':
return self.radio_page.go(page=podcast_url).get_france_inter_podcast_emissions(split_path=split_path)
elif split_path[0] == 'franceculture':
return self.radio_page.go(page=podcast_url).get_france_culture_podcast_emissions(split_path=split_path)
elif split_path[0] == 'franceinfo':
return self.radio_page.go(page=podcast_url).get_france_info_podcast_emissions(split_path=split_path)
elif split_path[0] == 'francemusique':
return self.radio_page.go(page=podcast_url).get_france_musique_podcast_emissions(split_path=split_path)
elif split_path[0] == 'mouv':
return self.radio_page.go(page=podcast_url).get_mouv_podcast_emissions(split_path=split_path)
def get_podcasts(self, podcast_id):
self.BASEURL = 'http://radiofrance-podcast.net/'
return self.podcast_page.go(podcast_id=podcast_id).iter_podcasts()
def get_france_culture_podcasts_url(self, url):
self.BASEURL = 'http://www.franceculture.fr/podcast/'
return self.radio_page.go(page=url).get_france_culture_podcasts_url()

View file

@ -48,14 +48,17 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
'franceinter': {u'title': u'France Inter',
u'player': u'player',
u'live': u'lecteur_commun_json/timeline',
u'podcast': u'podcasts',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'franceculture': {u'title': u'France Culture',
u'player': u'player',
u'live': u'lecteur_commun_json/timeline',
u'podcast': u'podcasts',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'franceinfo': {u'title': u'France Info',
u'player': u'player',
u'live': u'lecteur_commun_json/timeline',
u'podcast': u'programmes-chroniques/podcasts',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'fbidf': {u'title': u'France Bleu Île-de-France (Paris)',
u'player': u'player/france-bleu-107-1',
@ -68,10 +71,12 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
'francemusique': {u'title': u'France Musique',
u'player': u'player',
u'live': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple())),
u'podcast': u'emissions',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'mouv': {u'title': u'Le Mouv\'',
u'player': u'player',
u'live': u'lecteur_commun_json/timeline',
u'podcast': u'podcasts',
u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))},
'fbalsace': {u'title': u'France Bleu Alsace (Strasbourg)',
u'player': u'player/station/france-bleu-alsace',
@ -252,7 +257,13 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
}
def iter_resources(self, objs, split_path):
if split_path and split_path[0] == u'francebleu':
if len(split_path) == 0:
for _id, item in sorted(self._RADIOS.iteritems()):
if not _id.startswith('fb'):
yield Collection([_id], item['title'])
yield Collection([u'francebleu'], u'France Bleu')
elif split_path[0] == u'francebleu':
if len(split_path) == 1:
for _id, item in sorted(self._RADIOS.iteritems()):
if _id.startswith('fb'):
@ -264,18 +275,33 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
for item in self.browser.get_selection('francebleu', selection_url, _id):
yield item
break
elif len(split_path) == 0:
for _id, item in sorted(self._RADIOS.iteritems()):
if not _id.startswith('fb'):
yield Collection([_id], item['title'])
yield Collection([u'francebleu'], u'France Bleu')
elif len(split_path) == 1:
yield Collection([split_path[0], u'selection'], u'Selection')
if 'podcast' in self._RADIOS[split_path[0]]:
yield Collection([split_path[0], u'podcasts'], u'Podcast')
elif len(split_path) == 2 and split_path[1] == 'selection':
for _id, item in sorted(self._RADIOS.iteritems()):
if _id == split_path[0]:
selection_url = self._RADIOS[_id]['selection']
for item in self.browser.get_selection(_id, selection_url, _id):
yield item
break
elif len(split_path) == 2 and split_path[1] == 'podcasts':
for item in self.browser.get_podcast_emissions(split_path[0],
self._RADIOS[split_path[0]]['podcast'],
split_path):
yield item
elif len(split_path) == 3:
podcasts_url = split_path[-1]
if split_path[0] == 'franceculture':
podcasts_url = self.browser.get_france_culture_podcasts_url(split_path[-1])
for item in self.browser.get_podcasts(podcasts_url):
yield item
else:
raise CollectionNotFound(split_path)
@ -347,6 +373,12 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio):
selection_url = self._RADIOS[radio]['selection']
radio_url = radio if not radio.startswith('fb') else 'francebleu'
return self.browser.get_audio(_id, radio_url, selection_url, radio)
elif radio == 'podcast':
m = re.match('audio\.podcast\.(\d*)-.*', _id)
if m:
for item in self.browser.get_podcasts(m.group(1)):
if _id == item.id:
return item
def iter_radios_search(self, pattern):
for key, radio in self._RADIOS.iteritems():

View file

@ -17,21 +17,161 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser.elements import ItemElement, DictElement, method
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import ItemElement, DictElement, ListElement, method
from weboob.browser.pages import HTMLPage, JsonPage, XMLPage
from weboob.browser.filters.json import Dict
from weboob.browser.filters.standard import Format, CleanText, Join, Env
from weboob.browser.filters.standard import Format, CleanText, Join, Env, Regexp, Duration
from weboob.capabilities.audio import BaseAudio, BaseAudioIdFilter
from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection
import time
from datetime import timedelta
class PlayerPage(HTMLPage):
class PodcastPage(XMLPage):
@method
class iter_podcasts(ListElement):
item_xpath = '//item'
class item(ItemElement):
klass = BaseAudio
obj_id = BaseAudioIdFilter(Format('podcast.%s',
Regexp(CleanText('./guid'),
'http://media.radiofrance-podcast.net/podcast09/(.*).mp3')))
obj_title = CleanText('title')
obj_format = u'mp3'
obj_url = CleanText('enclosure/@url')
obj_description = CleanText('description')
def obj_author(self):
author = self.el.xpath('itunes:author',
namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
return CleanText('.')(author[0])
def obj_duration(self):
duration = self.el.xpath('itunes:duration',
namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
return Duration(CleanText('.'))(duration[0])
def obj_thumbnail(self):
thumbnail = BaseImage(CleanText('//image[1]/url')(self))
thumbnail.url = thumbnail.id
return thumbnail
class RadioPage(HTMLPage):
def get_url(self):
return CleanText('//a[@id="player"][1]/@href')(self.doc)
def get_france_culture_podcasts_url(self):
return Regexp(CleanText('//a[@class="lien-rss"][1]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self.doc)
@method
class get_france_culture_podcast_emissions(ListElement):
item_xpath = '//li/h3/a'
class item(ItemElement):
klass = Collection
def condition(self):
return u'/podcast/' in CleanText('./@href')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./@href'), '/podcast/(.*)')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./@href'), '/podcast/(.*)')
obj_title = CleanText('.')
@method
class get_france_info_podcast_emissions(ListElement):
item_xpath = '//div[@class="emission-gdp"]'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def obj_split_path(self):
_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2/a')
@method
class get_mouv_podcast_emissions(ListElement):
item_xpath = '//div[@class="view-content"]/div'
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/a[@class="podcast-rss"]/@href')(self) and \
Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2')
@method
class get_france_musique_podcast_emissions(ListElement):
item_xpath = '//div[@class="liste-emissions"]/ul/li'
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/ul/li/a[@class="ico-rss"]/@href')(self) and\
Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./div/h3')
@method
class get_france_inter_podcast_emissions(ListElement):
item_xpath = '//div[has-class("item-list")]/ul/li/div/div'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/a[@class="podrss"]/@href')(self) and\
Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2/a')
class JsonPage(JsonPage):
@method

View file

@ -26,7 +26,7 @@ from weboob.capabilities.radio import Radio
class RadioFranceTest(BackendTest):
MODULE = 'radiofrance'
def test_get_radios_and_selections(self):
def test_ls_radios_and_selections(self):
l = list(self.backend.iter_resources(objs=[Radio], split_path=[]))
self.assertTrue(0 < len(l) < 30)
@ -36,7 +36,7 @@ class RadioFranceTest(BackendTest):
streams = self.backend.get_radio(name).streams
self.assertTrue(len(streams) > 0)
l_sel = list(self.backend.iter_resources(objs=[BaseAudio], split_path=[name]))
l_sel = list(self.backend.iter_resources(objs=[BaseAudio], split_path=[name, 'selection']))
self.assertTrue(len(l_sel) > 0)
self.assertTrue(len(l_sel[0].url) > 0)
@ -47,10 +47,23 @@ class RadioFranceTest(BackendTest):
streams = self.backend.get_radio(radio.split_path[-1]).streams
self.assertTrue(len(streams) > 0)
l_sel = list(self.backend.iter_resources(objs=[BaseAudio], split_path=['francebleu', radio.split_path[-1]]))
l_sel = list(self.backend.iter_resources(objs=[BaseAudio],
split_path=['francebleu',
radio.split_path[-1],
'selection']))
if len(l_sel) > 0:
self.assertTrue(len(l_sel[0].url) > 0)
def test_podcasts(self):
for key, item in self.backend._RADIOS.iteritems():
if 'podcast' in item:
emissions = list(self.backend.iter_resources(objs=[BaseAudio], split_path=[key, 'podcasts']))
self.assertTrue(len(emissions) > 0)
podcasts = list(self.backend.iter_resources(objs=[BaseAudio], split_path=emissions[0].split_path))
self.assertTrue(len(podcasts) > 0)
podcast = self.backend.get_audio(podcasts[0].id)
self.assertTrue(podcast.url)
def test_search_radio(self):
l = list(self.backend.iter_radios_search('bleu'))
self.assertTrue(len(l) > 0)