diff --git a/modules/radiofrance/browser.py b/modules/radiofrance/browser.py index 630a8337..69944a50 100644 --- a/modules/radiofrance/browser.py +++ b/modules/radiofrance/browser.py @@ -18,7 +18,7 @@ # along with weboob. If not, see . from weboob.browser import PagesBrowser, URL -from .pages import PlayerPage, JsonPage +from .pages import RadioPage, JsonPage, PodcastPage __all__ = ['RadioFranceBrowser'] @@ -26,11 +26,12 @@ __all__ = ['RadioFranceBrowser'] class RadioFranceBrowser(PagesBrowser): json_page = URL('sites/default/files/(?P.*).json', 'player-json/reecoute/(?P.*)', JsonPage) - player_page = URL('(?P.*)', PlayerPage) + podcast_page = URL('podcast09/rss_(?P.*)\.xml', PodcastPage) + radio_page = URL('(?P.*)', RadioPage) def get_radio_url(self, radio, player): self.BASEURL = 'http://www.%s.fr/' % radio - return self.player_page.go(player=player).get_url() + return self.radio_page.go(page=player).get_url() def get_current(self, radio, json_url): self.BASEURL = 'http://www.%s.fr/' % radio @@ -53,3 +54,24 @@ class RadioFranceBrowser(PagesBrowser): for item in self.get_selection(radio_url, json_url, radio_id): if pattern.upper() in item.title.upper(): yield item + + def get_podcast_emissions(self, radio_url, podcast_url, split_path): + self.BASEURL = 'http://www.%s.fr/' % radio_url + if split_path[0] == 'franceinter': + return self.radio_page.go(page=podcast_url).get_france_inter_podcast_emissions(split_path=split_path) + elif split_path[0] == 'franceculture': + return self.radio_page.go(page=podcast_url).get_france_culture_podcast_emissions(split_path=split_path) + elif split_path[0] == 'franceinfo': + return self.radio_page.go(page=podcast_url).get_france_info_podcast_emissions(split_path=split_path) + elif split_path[0] == 'francemusique': + return self.radio_page.go(page=podcast_url).get_france_musique_podcast_emissions(split_path=split_path) + elif split_path[0] == 'mouv': + return self.radio_page.go(page=podcast_url).get_mouv_podcast_emissions(split_path=split_path) + + def get_podcasts(self, podcast_id): + self.BASEURL = 'http://radiofrance-podcast.net/' + return self.podcast_page.go(podcast_id=podcast_id).iter_podcasts() + + def get_france_culture_podcasts_url(self, url): + self.BASEURL = 'http://www.franceculture.fr/podcast/' + return self.radio_page.go(page=url).get_france_culture_podcasts_url() diff --git a/modules/radiofrance/module.py b/modules/radiofrance/module.py index 1a24f806..abeb2d25 100644 --- a/modules/radiofrance/module.py +++ b/modules/radiofrance/module.py @@ -48,14 +48,17 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio): 'franceinter': {u'title': u'France Inter', u'player': u'player', u'live': u'lecteur_commun_json/timeline', + u'podcast': u'podcasts', u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, 'franceculture': {u'title': u'France Culture', u'player': u'player', u'live': u'lecteur_commun_json/timeline', + u'podcast': u'podcasts', u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, 'franceinfo': {u'title': u'France Info', u'player': u'player', u'live': u'lecteur_commun_json/timeline', + u'podcast': u'programmes-chroniques/podcasts', u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, 'fbidf': {u'title': u'France Bleu Île-de-France (Paris)', u'player': u'player/france-bleu-107-1', @@ -68,10 +71,12 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio): 'francemusique': {u'title': u'France Musique', u'player': u'player', u'live': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple())), + u'podcast': u'emissions', u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, 'mouv': {u'title': u'Le Mouv\'', u'player': u'player', u'live': u'lecteur_commun_json/timeline', + u'podcast': u'podcasts', u'selection': u'lecteur_commun_json/reecoute-%s' % int(time.mktime(datetime.now().replace(hour=14, minute=0, second=0).timetuple()))}, 'fbalsace': {u'title': u'France Bleu Alsace (Strasbourg)', u'player': u'player/station/france-bleu-alsace', @@ -252,7 +257,13 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio): } def iter_resources(self, objs, split_path): - if split_path and split_path[0] == u'francebleu': + if len(split_path) == 0: + for _id, item in sorted(self._RADIOS.iteritems()): + if not _id.startswith('fb'): + yield Collection([_id], item['title']) + yield Collection([u'francebleu'], u'France Bleu') + + elif split_path[0] == u'francebleu': if len(split_path) == 1: for _id, item in sorted(self._RADIOS.iteritems()): if _id.startswith('fb'): @@ -264,18 +275,33 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio): for item in self.browser.get_selection('francebleu', selection_url, _id): yield item break - elif len(split_path) == 0: - for _id, item in sorted(self._RADIOS.iteritems()): - if not _id.startswith('fb'): - yield Collection([_id], item['title']) - yield Collection([u'francebleu'], u'France Bleu') + elif len(split_path) == 1: + yield Collection([split_path[0], u'selection'], u'Selection') + if 'podcast' in self._RADIOS[split_path[0]]: + yield Collection([split_path[0], u'podcasts'], u'Podcast') + + elif len(split_path) == 2 and split_path[1] == 'selection': for _id, item in sorted(self._RADIOS.iteritems()): if _id == split_path[0]: selection_url = self._RADIOS[_id]['selection'] for item in self.browser.get_selection(_id, selection_url, _id): yield item break + + elif len(split_path) == 2 and split_path[1] == 'podcasts': + for item in self.browser.get_podcast_emissions(split_path[0], + self._RADIOS[split_path[0]]['podcast'], + split_path): + yield item + + elif len(split_path) == 3: + podcasts_url = split_path[-1] + if split_path[0] == 'franceculture': + podcasts_url = self.browser.get_france_culture_podcasts_url(split_path[-1]) + for item in self.browser.get_podcasts(podcasts_url): + yield item + else: raise CollectionNotFound(split_path) @@ -347,6 +373,12 @@ class RadioFranceModule(Module, CapRadio, CapCollection, CapAudio): selection_url = self._RADIOS[radio]['selection'] radio_url = radio if not radio.startswith('fb') else 'francebleu' return self.browser.get_audio(_id, radio_url, selection_url, radio) + elif radio == 'podcast': + m = re.match('audio\.podcast\.(\d*)-.*', _id) + if m: + for item in self.browser.get_podcasts(m.group(1)): + if _id == item.id: + return item def iter_radios_search(self, pattern): for key, radio in self._RADIOS.iteritems(): diff --git a/modules/radiofrance/pages.py b/modules/radiofrance/pages.py index fe53ee12..0187f819 100644 --- a/modules/radiofrance/pages.py +++ b/modules/radiofrance/pages.py @@ -17,21 +17,161 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.browser.elements import ItemElement, DictElement, method -from weboob.browser.pages import HTMLPage, JsonPage +from weboob.browser.elements import ItemElement, DictElement, ListElement, method +from weboob.browser.pages import HTMLPage, JsonPage, XMLPage from weboob.browser.filters.json import Dict -from weboob.browser.filters.standard import Format, CleanText, Join, Env +from weboob.browser.filters.standard import Format, CleanText, Join, Env, Regexp, Duration from weboob.capabilities.audio import BaseAudio, BaseAudioIdFilter from weboob.capabilities.image import BaseImage +from weboob.capabilities.collection import Collection import time from datetime import timedelta -class PlayerPage(HTMLPage): +class PodcastPage(XMLPage): + @method + class iter_podcasts(ListElement): + item_xpath = '//item' + + class item(ItemElement): + klass = BaseAudio + + obj_id = BaseAudioIdFilter(Format('podcast.%s', + Regexp(CleanText('./guid'), + 'http://media.radiofrance-podcast.net/podcast09/(.*).mp3'))) + obj_title = CleanText('title') + obj_format = u'mp3' + obj_url = CleanText('enclosure/@url') + obj_description = CleanText('description') + + def obj_author(self): + author = self.el.xpath('itunes:author', + namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'}) + return CleanText('.')(author[0]) + + def obj_duration(self): + duration = self.el.xpath('itunes:duration', + namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'}) + return Duration(CleanText('.'))(duration[0]) + + def obj_thumbnail(self): + thumbnail = BaseImage(CleanText('//image[1]/url')(self)) + thumbnail.url = thumbnail.id + return thumbnail + + +class RadioPage(HTMLPage): def get_url(self): return CleanText('//a[@id="player"][1]/@href')(self.doc) + def get_france_culture_podcasts_url(self): + return Regexp(CleanText('//a[@class="lien-rss"][1]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self.doc) + + @method + class get_france_culture_podcast_emissions(ListElement): + item_xpath = '//li/h3/a' + + class item(ItemElement): + klass = Collection + + def condition(self): + return u'/podcast/' in CleanText('./@href')(self) + + def obj_split_path(self): + _id = Regexp(CleanText('./@href'), '/podcast/(.*)')(self) + self.env['split_path'].append(_id) + return self.env['split_path'] + + obj_id = Regexp(CleanText('./@href'), '/podcast/(.*)') + obj_title = CleanText('.') + + @method + class get_france_info_podcast_emissions(ListElement): + item_xpath = '//div[@class="emission-gdp"]' + ignore_duplicate = True + + class item(ItemElement): + klass = Collection + + def obj_split_path(self): + _id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self) + self.env['split_path'].append(_id) + return self.env['split_path'] + + obj_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml') + obj_title = CleanText('./h2/a') + + @method + class get_mouv_podcast_emissions(ListElement): + item_xpath = '//div[@class="view-content"]/div' + + class item(ItemElement): + klass = Collection + + def condition(self): + return CleanText('./div/a[@class="podcast-rss"]/@href')(self) and \ + Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self) + + def obj_split_path(self): + _id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self) + self.env['split_path'].append(_id) + return self.env['split_path'] + + obj_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml') + obj_title = CleanText('./h2') + + @method + class get_france_musique_podcast_emissions(ListElement): + item_xpath = '//div[@class="liste-emissions"]/ul/li' + + class item(ItemElement): + klass = Collection + + def condition(self): + return CleanText('./div/ul/li/a[@class="ico-rss"]/@href')(self) and\ + Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self) + + def obj_split_path(self): + _id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self) + self.env['split_path'].append(_id) + return self.env['split_path'] + + obj_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml') + obj_title = CleanText('./div/h3') + + @method + class get_france_inter_podcast_emissions(ListElement): + item_xpath = '//div[has-class("item-list")]/ul/li/div/div' + ignore_duplicate = True + + class item(ItemElement): + klass = Collection + + def condition(self): + return CleanText('./div/a[@class="podrss"]/@href')(self) and\ + Regexp(CleanText('./div/a[@class="podrss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self) + + def obj_split_path(self): + _id = Regexp(CleanText('./div/a[@class="podrss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self) + self.env['split_path'].append(_id) + return self.env['split_path'] + + obj_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'), + 'http://radiofrance-podcast.net/podcast09/rss_(.*).xml') + obj_title = CleanText('./h2/a') + class JsonPage(JsonPage): @method diff --git a/modules/radiofrance/test.py b/modules/radiofrance/test.py index 713de45a..58e093d3 100644 --- a/modules/radiofrance/test.py +++ b/modules/radiofrance/test.py @@ -26,7 +26,7 @@ from weboob.capabilities.radio import Radio class RadioFranceTest(BackendTest): MODULE = 'radiofrance' - def test_get_radios_and_selections(self): + def test_ls_radios_and_selections(self): l = list(self.backend.iter_resources(objs=[Radio], split_path=[])) self.assertTrue(0 < len(l) < 30) @@ -36,7 +36,7 @@ class RadioFranceTest(BackendTest): streams = self.backend.get_radio(name).streams self.assertTrue(len(streams) > 0) - l_sel = list(self.backend.iter_resources(objs=[BaseAudio], split_path=[name])) + l_sel = list(self.backend.iter_resources(objs=[BaseAudio], split_path=[name, 'selection'])) self.assertTrue(len(l_sel) > 0) self.assertTrue(len(l_sel[0].url) > 0) @@ -47,10 +47,23 @@ class RadioFranceTest(BackendTest): streams = self.backend.get_radio(radio.split_path[-1]).streams self.assertTrue(len(streams) > 0) - l_sel = list(self.backend.iter_resources(objs=[BaseAudio], split_path=['francebleu', radio.split_path[-1]])) + l_sel = list(self.backend.iter_resources(objs=[BaseAudio], + split_path=['francebleu', + radio.split_path[-1], + 'selection'])) if len(l_sel) > 0: self.assertTrue(len(l_sel[0].url) > 0) + def test_podcasts(self): + for key, item in self.backend._RADIOS.iteritems(): + if 'podcast' in item: + emissions = list(self.backend.iter_resources(objs=[BaseAudio], split_path=[key, 'podcasts'])) + self.assertTrue(len(emissions) > 0) + podcasts = list(self.backend.iter_resources(objs=[BaseAudio], split_path=emissions[0].split_path)) + self.assertTrue(len(podcasts) > 0) + podcast = self.backend.get_audio(podcasts[0].id) + self.assertTrue(podcast.url) + def test_search_radio(self): l = list(self.backend.iter_radios_search('bleu')) self.assertTrue(len(l) > 0)