weboob-devel/modules/arte/pages.py
Bezleputh cdc4410170 [arte] adapt to browser2
* use of browser 2
* choose between videos version (ie: subtitles or not)
* support of cinema.arte.tv
2015-03-13 00:05:36 +01:00

265 lines
9 KiB
Python

# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from datetime import timedelta
from weboob.capabilities.image import BaseImage
from weboob.capabilities.base import BaseObject, NotAvailable
from weboob.capabilities.collection import Collection
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import DictElement, ItemElement, ListElement, method
from weboob.browser.filters.standard import Date, Format, Env, CleanText, Field, Regexp, Join
from weboob.browser.filters.json import Dict
from weboob.browser.filters.html import XPath
from .video import ArteVideo, ArteSiteVideo, SITE
class ArteItemElement(ItemElement):
obj_id = Dict('VID')
def obj_title(self):
vti = Dict('VTI')(self)
vtu = Dict('VSU', default=None)(self)
if not vtu:
return vti
return '%s: %s' % (vti, vtu)
obj_rating = Dict('VRT', default=NotAvailable)
obj_rating_max = 10
obj_description = Dict('VDE', default=NotAvailable)
obj_date = Date(Dict('VDA'))
def obj_duration(self):
seconds = Dict('videoDurationSeconds')(self)
if isinstance(seconds, basestring):
seconds = int(seconds)
return timedelta(seconds=seconds)
def obj_thumbnail(self):
url = Dict('VTU/IUR')(self)
thumbnail = BaseImage(url)
thumbnail.url = thumbnail.id
return thumbnail
class VideosListPage(HTMLPage):
@method
class iter_arte_concert_categories(ListElement):
item_xpath = '//ul[@class="filter-liste"]/li'
class item(ItemElement):
klass = Collection
obj_title = CleanText('./a/span')
obj_id = CleanText('./@data-target', replace=[('video_box_tab_', '')])
def obj_split_path(self):
_id = CleanText('./@data-target', replace=[('video_box_tab_', '')])(self)
return [SITE.CONCERT.get('id'), u'%s' % _id]
@method
class iter_arte_concert_videos(ListElement):
def find_elements(self):
self.item_xpath = '//div[@id="video_box_tab_%s"]/article' % Env('cat')(self)
for el in self.el.xpath(self.item_xpath):
yield el
class item(ItemElement):
klass = ArteSiteVideo
obj__site = SITE.CONCERT.get('id')
obj_id = Format('%s.%s', Field('_site'), CleanText('./@about'))
obj_title = CleanText('div/div[@class="info-article "]/div/h3/a')
def obj_thumbnail(self):
url = CleanText('div/div/a/figure/span/span/@data-src')(self)
thumbnail = BaseImage(url)
thumbnail.url = thumbnail.id
return thumbnail
@method
class iter_arte_cinema_categories(ListElement):
item_xpath = '//li[has-class("leaf")]'
class item(ItemElement):
klass = Collection
def condition(self):
return Regexp(CleanText('./a/@href'), '^(/\w{2}/%s/.*)' % self.env['cat'], default=None)(self)
obj_title = CleanText('./a')
obj_id = CleanText('./a/@href')
def obj_split_path(self):
_id = Regexp(CleanText('./a/@href'), '/\w{2}/(.*)')(self)
return [SITE.CINEMA.get('id')] + _id.split('/')
def get_arte_cinema_menu(self):
return self.doc.xpath('//li[has-class("leaf")]/a[starts-with(@href,"/")]/@href')
@method
class get_arte_cinema_videos(ListElement):
item_xpath = '//article'
class item(ItemElement):
klass = ArteSiteVideo
def condition(self):
return len(XPath('.//div[@class="article-secondary "]')(self)) == 1 and\
len(XPath('.//article')(self)) == 0
obj__site = SITE.CINEMA.get('id')
obj_id = Format('%s.%s', Field('_site'), CleanText('./@about'))
obj_title = Join(u' - ',
'.//div[@class="article-secondary "]/div/div')
def obj_thumbnail(self):
url = CleanText('.//div[@class="article-primary "]/div[has-class("field-thumbnail")]/span/noscript/img/@src')(self)
thumbnail = BaseImage(url)
thumbnail.url = thumbnail.id
return thumbnail
class VideoPage(HTMLPage):
def get_json_url(self):
return self.doc.xpath('//div[@class="video-container"]')[0].attrib['arte_vp_url']
class ArteJsonPage(JsonPage):
def get_video_url(self, quality, format, version, language_version):
urls = Dict('videoJsonPlayer/VSR')(self.doc).keys()
if urls:
key = '_'.join([format, quality, version])
found = self.find_url(key, urls, version, quality)
if not found:
# We use the default language version
key = '_'.join([format, quality, language_version])
found = self.find_url(key, urls, version, quality)
if not found:
# We only keep the quality
key = '_'.join([quality, language_version])
found = self.find_url(key, urls, version, quality)
if not found:
found = urls[0]
streamer = Dict('videoJsonPlayer/VSR/%s/streamer' % (found), default=None)(self.doc)
url = Dict('videoJsonPlayer/VSR/%s/url' % (found))(self.doc)
if streamer:
return '%s%s' % (streamer, url)
return url
def find_url(self, key, urls, version, quality):
self.logger.debug('available urls: %s' % urls)
self.logger.debug('search url matching : %s' % key)
# Best Case: key is mathing
matching = [s for s in urls if key in s]
self.logger.debug('best case matching: %s' % matching)
if matching:
return matching[0]
# Second Case: is the version available
matching = [s for s in urls if version in s]
self.logger.debug('is version available: %s' % matching)
if matching:
# Do the quality + version match
matching_quality = [s for s in matching if quality in s]
self.logger.debug('does quality + version match: %s' % matching_quality)
if matching_quality:
return matching[0]
# Only format + version mathes
return matching[0]
@method
class iter_videos(DictElement):
item_xpath = 'videoList'
class item(ArteItemElement):
klass = ArteVideo
@method
class iter_programs(DictElement):
item_xpath = 'configClusterList'
class item(ItemElement):
klass = Collection
obj_title = Dict(CleanText(Env('title')))
obj_id = Dict('clusterId')
def obj_split_path(self):
return [SITE.PROGRAM.get('id'), Dict('clusterId')(self)]
@method
class get_video(ArteItemElement):
def __init__(self, *args, **kwargs):
super(ArteItemElement, self).__init__(*args, **kwargs)
self.el = self.el.get('videoJsonPlayer')
klass = ArteVideo
@method
class get_arte_concert_video(ArteItemElement):
def __init__(self, *args, **kwargs):
super(ArteItemElement, self).__init__(*args, **kwargs)
self.el = self.el.get('videoJsonPlayer')
klass = ArteSiteVideo
obj__site = SITE.CONCERT.get('id')
obj_id = Format('%s.%s', Field('_site'), Regexp(Dict('VTR'), 'http://concert.arte.tv(.*)'))
@method
class get_arte_cinema_video(ArteItemElement):
def __init__(self, *args, **kwargs):
super(ArteItemElement, self).__init__(*args, **kwargs)
self.el = self.el.get('videoJsonPlayer')
klass = ArteSiteVideo
obj__site = SITE.CINEMA.get('id')
obj_date = Date(Dict('VRA'))
@method
class get_program_video(ArteItemElement):
def __init__(self, *args, **kwargs):
super(ArteItemElement, self).__init__(*args, **kwargs)
if 'VDO' in self.el['abstractProgram'].keys():
self.el = self.el['abstractProgram']['VDO']
klass = ArteVideo
@method
class iter_program_videos(DictElement):
item_xpath = 'clusterWrapper/broadcasts'
ignore_duplicate = True
class item(ItemElement):
klass = BaseObject
def condition(self):
return 'VDS' in self.el.keys() and len(self.el['VDS']) > 0
obj_id = Dict('programId')