weboob-devel/modules/radiofrance/pages.py

268 lines
11 KiB
Python

# -*- coding: utf-8 -*-
# Copyright(C) 2013 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser.elements import ItemElement, DictElement, ListElement, method
from weboob.browser.pages import HTMLPage, JsonPage, XMLPage
from weboob.browser.filters.json import Dict
from weboob.browser.filters.standard import Format, CleanText, Join, Env, Regexp, Duration, Time
from weboob.capabilities.audio import BaseAudio
from weboob.tools.capabilities.audio.audio import BaseAudioIdFilter
from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection
import time
from datetime import timedelta, datetime, date
class PodcastPage(XMLPage):
@method
class iter_podcasts(ListElement):
item_xpath = '//item'
class item(ItemElement):
klass = BaseAudio
obj_id = BaseAudioIdFilter(Format('podcast.%s',
Regexp(CleanText('./guid'),
'http://media.radiofrance-podcast.net/podcast09/(.*).mp3')))
obj_title = CleanText('title')
obj_format = u'mp3'
obj_url = CleanText('enclosure/@url')
obj_description = CleanText('description')
def obj_author(self):
author = self.el.xpath('itunes:author',
namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
return CleanText('.')(author[0])
def obj_duration(self):
duration = self.el.xpath('itunes:duration',
namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
return Duration(CleanText('.'))(duration[0])
def obj_thumbnail(self):
thumbnail = BaseImage(CleanText('//image[1]/url')(self))
thumbnail.url = thumbnail.id
return thumbnail
class RadioPage(HTMLPage):
def get_url(self):
url = Regexp(CleanText('//script'), '.*liveUrl: \'(.*)\', timeshiftUrl.*', default=None)(self.doc)
if not url:
url = CleanText('//a[@id="player"][1]/@href')(self.doc)
return url
def get_france_culture_podcasts_url(self):
return Regexp(CleanText('//a[@class="lien-rss"][1]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self.doc)
@method
class get_france_culture_podcast_emissions(ListElement):
item_xpath = '//li/h3/a'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def condition(self):
return u'/podcast/' in CleanText('./@href')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./@href'), '/podcast/(.*)')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./@href'), '/podcast/(.*)')
obj_title = CleanText('.')
@method
class get_france_info_podcast_emissions(ListElement):
item_xpath = '//div[@class="emission-gdp"]'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def obj_split_path(self):
_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/div/div/div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2/a')
@method
class get_mouv_podcast_emissions(ListElement):
item_xpath = '//div[@class="view-content"]/div'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/a[@class="podcast-rss"]/@href')(self) and \
Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/a[@class="podcast-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2')
@method
class get_france_musique_podcast_emissions(ListElement):
item_xpath = '//div[@class="liste-emissions"]/ul/li'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/ul/li/a[@class="ico-rss"]/@href')(self) and\
Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/ul/li/a[@class="ico-rss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./div/h3')
@method
class get_france_inter_podcast_emissions(ListElement):
item_xpath = '//div[has-class("item-list")]/ul/li/div/div'
ignore_duplicate = True
class item(ItemElement):
klass = Collection
def condition(self):
return CleanText('./div/a[@class="podrss"]/@href')(self) and\
Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
def obj_split_path(self):
_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')(self)
self.env['split_path'].append(_id)
return self.env['split_path']
obj_id = Regexp(CleanText('./div/a[@class="podrss"]/@href'),
'http://radiofrance-podcast.net/podcast09/rss_(.*).xml')
obj_title = CleanText('./h2/a')
def get_current(self):
now = datetime.now()
today = date.today()
emission_title = u''
for el in self.doc.xpath('//li[@class="chronique clear"]'):
emission_time = Time(CleanText('./div[@class="quand"]',
replace=[(u'à', '')]))(el)
emission_datetime = datetime.combine(today, emission_time)
if emission_datetime > now:
return u'', emission_title
emission_title = CleanText('./h3[@class="titre"]')(el)
return u'', u''
class JsonPage(JsonPage):
@method
class get_selection(DictElement):
item_xpath = 'diffusions'
ignore_duplicate = True
class item(ItemElement):
klass = BaseAudio
def condition(self):
return Dict('path_mp3')(self)
obj_id = BaseAudioIdFilter(Format(u'%s.%s', Env('radio_id'), Dict('nid')))
obj_format = u'mp3'
obj_title = Format(u'%s : %s',
Dict('title_emission'),
Dict('title_diff'))
obj_description = Dict('desc_emission', default=u'')
obj_author = Join(u', ', Dict('personnes', default=u''))
obj_url = Dict('path_mp3')
def obj_thumbnail(self):
if 'path_img_emission' in self.el:
thumbnail = BaseImage(Dict('path_img_emission')(self))
thumbnail.url = thumbnail.id
return thumbnail
def obj_duration(self):
fin = Dict('fin')(self)
debut = Dict('debut')(self)
if debut and fin:
return timedelta(seconds=int(fin) - int(debut))
def get_current(self):
if 'current' in self.doc:
emission_title = self.doc['current']['emission']['titre']
song_title = self.doc['current']['song']['titre']
title = u'%s: %s' % (emission_title, song_title)
person = self.doc['current']['song']['interpreteMorceau']
return person, title
elif 'diffusions' in self.doc:
now = int(time.time())
for item in self.doc['diffusions']:
if item['debut'] < now and item['fin'] > now:
title = u'%s: %s' % (item['title_emission'],
item['title_diff'] if 'title_diff' in item else '')
person = u''
return person, title
return u'', u''
else:
now = int(time.time())
for item in self.doc:
if int(item['debut']) < now and int(item['fin']) > now:
emission = u''
if 'diffusions' in item and item['diffusions'] and 'title' in item['diffusions'][0]:
emission = item['diffusions'][0]['title']
title = item['title_emission']
if emission:
title = u'%s: %s' % (title, emission)
person = u''
if 'personnes' in item and item['personnes'] and item['personnes'][0]:
person = u','.join(item['personnes'])
return person, title
return u'', u''
def get_fburl(self):
for el in self.doc['url']:
if el['type'] == 'live' and el['bitrate'] == 128:
return Dict('url')(el)