[radiofrance] handle podcasts

This commit is contained in:
Bezleputh 2015-07-02 15:33:51 +02:00
commit 19a785a643
4 changed files with 223 additions and 16 deletions

View file

@ -17,21 +17,161 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser.elements import ItemElement, DictElement, method
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import ItemElement, DictElement, ListElement, method
from weboob.browser.pages import HTMLPage, JsonPage, XMLPage
from weboob.browser.filters.json import Dict
from weboob.browser.filters.standard import Format, CleanText, Join, Env
from weboob.browser.filters.standard import Format, CleanText, Join, Env, Regexp, Duration
from weboob.capabilities.audio import BaseAudio, BaseAudioIdFilter
from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection
import time
from datetime import timedelta
class PlayerPage(HTMLPage):
class PodcastPage(XMLPage):
@method
class iter_podcasts(ListElement):
item_xpath = '//item'
class item(ItemElement):
klass = BaseAudio
obj_id = BaseAudioIdFilter(Format('podcast.%s',
Regexp(CleanText('./guid'),
'http://media.radiofrance-podcast.net/podcast09/(.*).mp3')))
obj_title = CleanText('title')
obj_format = u'mp3'
obj_url = CleanText('enclosure/@url')
obj_description = CleanText('description')
def obj_author(self):
author = self.el.xpath('itunes:author',
namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
return CleanText('.')(author[0])
def obj_duration(self):
duration = self.el.xpath('itunes:duration',
namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
return Duration(CleanText('.'))(duration[0])
def obj_thumbnail(self):
thumbnail = BaseImage(CleanText('//image[1]/url')(self))
thumbnail.url = thumbnail.id
return thumbnail
class RadioPage(HTMLPage):
def get_url(self):
return CleanText('//a[@id="player"][1]/@href')(self.doc)
def get_france_culture_podcasts_url(self):
return Regexp(CleanText('//a[@class="lien-rss"][1]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self.doc)
@method
class get_france_culture_podcast_emissions(ListElement):
item_xpath = '//li/h3/a'
class item(ItemElement):
klass = Collection
def condition(self):
return u'/podcast/' in CleanText('./@href')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./@href'), '/podcast/(.*)')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./@href'), '/podcast/(.*)')
obj_title = CleanText('.')
@method
class get_france_info_podcast_emissions(ListElement):
item_xpath = '//div[@class="emission-gdp"]'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def obj_split_path(self):
_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2/a')
@method
class get_mouv_podcast_emissions(ListElement):
item_xpath = '//div[@class="view-content"]/div'
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/a[@class="podcast-rss"]/@href')(self) and \
Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2')
@method
class get_france_musique_podcast_emissions(ListElement):
item_xpath = '//div[@class="liste-emissions"]/ul/li'
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/ul/li/a[@class="ico-rss"]/@href')(self) and\
Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./div/h3')
@method
class get_france_inter_podcast_emissions(ListElement):
item_xpath = '//div[has-class("item-list")]/ul/li/div/div'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/a[@class="podrss"]/@href')(self) and\
Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2/a')
class JsonPage(JsonPage):
@method