[radiofrance] handle podcasts

2015-07-02 15:33:51 +02:00 · 2015-07-02 15:33:51 +02:00 · 19a785a643
commit 19a785a643
parent b892aafec3
4 changed files with 223 additions and 16 deletions
--- a/modules/radiofrance/pages.py
+++ b/modules/radiofrance/pages.py
@ -17,21 +17,161 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

-from weboob.browser.elements import ItemElement, DictElement, method
-from weboob.browser.pages import HTMLPage, JsonPage
+from weboob.browser.elements import ItemElement, DictElement, ListElement, method
+from weboob.browser.pages import HTMLPage, JsonPage, XMLPage
 from weboob.browser.filters.json import Dict
-from weboob.browser.filters.standard import Format, CleanText, Join, Env
+from weboob.browser.filters.standard import Format, CleanText, Join, Env, Regexp, Duration
 from weboob.capabilities.audio import BaseAudio, BaseAudioIdFilter
 from weboob.capabilities.image import BaseImage
+from weboob.capabilities.collection import Collection

 import time
 from datetime import timedelta


-class PlayerPage(HTMLPage):
+class PodcastPage(XMLPage):
+    @method
+    class iter_podcasts(ListElement):
+        item_xpath = '//item'
+
+        class item(ItemElement):
+            klass = BaseAudio
+
+            obj_id = BaseAudioIdFilter(Format('podcast.%s',
+                                              Regexp(CleanText('./guid'),
+                                                     'http://media.radiofrance-podcast.net/podcast09/(.*).mp3')))
+            obj_title = CleanText('title')
+            obj_format = u'mp3'
+            obj_url = CleanText('enclosure/@url')
+            obj_description = CleanText('description')
+
+            def obj_author(self):
+                author = self.el.xpath('itunes:author',
+                                       namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
+                return CleanText('.')(author[0])
+
+            def obj_duration(self):
+                duration = self.el.xpath('itunes:duration',
+                                         namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
+                return Duration(CleanText('.'))(duration[0])
+
+            def obj_thumbnail(self):
+                thumbnail = BaseImage(CleanText('//image[1]/url')(self))
+                thumbnail.url = thumbnail.id
+                return thumbnail
+
+
+class RadioPage(HTMLPage):
    def get_url(self):
        return CleanText('//a[@id="player"][1]/@href')(self.doc)

+    def get_france_culture_podcasts_url(self):
+        return Regexp(CleanText('//a[@class="lien-rss"][1]/@href'),
+                      'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self.doc)
+
+    @method
+    class get_france_culture_podcast_emissions(ListElement):
+        item_xpath = '//li/h3/a'
+
+        class item(ItemElement):
+            klass = Collection
+
+            def condition(self):
+                return u'/podcast/' in CleanText('./@href')(self)
+
+            def obj_split_path(self):
+                _id = Regexp(CleanText('./@href'), '/podcast/(.*)')(self)
+                self.env['split_path'].append(_id)
+                return self.env['split_path']
+
+            obj_id = Regexp(CleanText('./@href'), '/podcast/(.*)')
+            obj_title = CleanText('.')
+
+    @method
+    class get_france_info_podcast_emissions(ListElement):
+        item_xpath = '//div[@class="emission-gdp"]'
+        ignore_duplicate = True
+
+        class item(ItemElement):
+            klass = Collection
+
+            def obj_split_path(self):
+                _id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
+                             'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
+                self.env['split_path'].append(_id)
+                return self.env['split_path']
+
+            obj_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
+                            'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
+            obj_title = CleanText('./h2/a')
+
+    @method
+    class get_mouv_podcast_emissions(ListElement):
+        item_xpath = '//div[@class="view-content"]/div'
+
+        class item(ItemElement):
+            klass = Collection
+
+            def condition(self):
+                return CleanText('./div/a[@class="podcast-rss"]/@href')(self) and \
+                    Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
+                           'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
+
+            def obj_split_path(self):
+                _id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
+                             'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
+                self.env['split_path'].append(_id)
+                return self.env['split_path']
+
+            obj_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
+                            'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
+            obj_title = CleanText('./h2')
+
+    @method
+    class get_france_musique_podcast_emissions(ListElement):
+        item_xpath = '//div[@class="liste-emissions"]/ul/li'
+
+        class item(ItemElement):
+            klass = Collection
+
+            def condition(self):
+                return CleanText('./div/ul/li/a[@class="ico-rss"]/@href')(self) and\
+                    Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
+                           'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
+
+            def obj_split_path(self):
+                _id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
+                             'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
+                self.env['split_path'].append(_id)
+                return self.env['split_path']
+
+            obj_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
+                            'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
+            obj_title = CleanText('./div/h3')
+
+    @method
+    class get_france_inter_podcast_emissions(ListElement):
+        item_xpath = '//div[has-class("item-list")]/ul/li/div/div'
+        ignore_duplicate = True
+
+        class item(ItemElement):
+            klass = Collection
+
+            def condition(self):
+                return CleanText('./div/a[@class="podrss"]/@href')(self) and\
+                    Regexp(CleanText('./div/a[@class="podrss"]/@href'),
+                           'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
+
+            def obj_split_path(self):
+                _id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
+                             'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
+                self.env['split_path'].append(_id)
+                return self.env['split_path']
+
+            obj_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
+                            'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
+            obj_title = CleanText('./h2/a')
+

 class JsonPage(JsonPage):
    @method