diff --git a/modules/arte/browser.py b/modules/arte/browser.py index b991ff0e..432501f7 100644 --- a/modules/arte/browser.py +++ b/modules/arte/browser.py @@ -18,56 +18,70 @@ # along with weboob. If not, see . import re -import datetime -import time -import urllib +from weboob.capabilities.collection import Collection +from weboob.capabilities.base import UserError from weboob.capabilities import NotAvailable -from weboob.capabilities.image import BaseImage -from weboob.tools.json import json as simplejson -from weboob.deprecated.browser import Browser -from weboob.deprecated.browser.decorators import id2url -from .pages import ArteLivePage, ArteLiveVideoPage -from .video import ArteVideo, ArteLiveVideo +from weboob.browser import PagesBrowser, URL +from .pages import VideosListPage, VideoPage, ArteJsonPage +from .video import VERSION_VIDEO, LANG, QUALITY, FORMATS, SITE + __all__ = ['ArteBrowser'] -class ArteBrowser(Browser): - DOMAIN = u'videos.arte.tv' - ENCODING = None - PAGES = {r'http://concert.arte.tv/\w+': ArteLivePage, - r'http://concert.arte.tv/(?P.+)': ArteLiveVideoPage, - } +class ArteBrowser(PagesBrowser): + BASEURL = 'http://arte.tv/' - LIVE_LANG = {'F': 'fr', - 'D': 'de' - } + webservice = URL('papi/tvguide/(?P.*)/(?P.*)/(?P.*).json', + 'http://(?P<__site>.*).arte.tv/(?P<_lang>\w{2})/player/(?P<_id>.*)', + 'https://api.arte.tv/api/player/v1/config/(?P<__lang>\w{2})/(?P.*)\?vector=(?P<___site>.*)', + ArteJsonPage) + videos_list = URL('http://(?P.*).arte.tv/(?P\w{2})/?(?P.*?)', VideosListPage) + video_page = URL('http://(?P<_site>.*).arte.tv/(?P.+)', VideoPage) - API_URL = 'http://arte.tv/papi/tvguide' - - def __init__(self, lang, quality, order, *args, **kwargs): - self.lang = lang - self.quality = quality + def __init__(self, lang, quality, order, format, version, *args, **kwargs): self.order = order - Browser.__init__(self, *args, **kwargs) + self.lang = (value for key, value in LANG.items if key == lang).next() + self.version = (value for key, value in VERSION_VIDEO.items + if self.lang.get('label') in value.keys() and version == key).next() + self.quality = (value for key, value in QUALITY.items if key == quality).next() + self.format = format - @id2url(ArteVideo.id2url) - def get_video(self, url, video=None): - response = self.openurl('%s/ALL.json' % url) - result = simplejson.loads(response.read(), self.ENCODING) + if self.lang.get('label') not in self.version.keys(): + raise UserError('%s is not available for %s' % (self.lang.get('label'), version)) - if video is None: - video = self.create_video(result['video']) - try: - video.url = self.get_m3u8_link(result['video']['VSR'][0]['VUR']) - video.ext = u'm3u8' - except: - video.url, video.ext = NotAvailable, NotAvailable + PagesBrowser.__init__(self, *args, **kwargs) + def search_videos(self, pattern): + class_name = 'videos/plus7' + method_name = 'search' + parameters = '/'.join([self.lang.get('webservice'), 'L1', pattern.encode('utf-8'), 'ALL', 'ALL', '-1', + self.order, '10', '0']) + return self.webservice.go(class_name=class_name, method_name=method_name, parameters=parameters).iter_videos() + + def get_video(self, id, video=None): + class_name = 'videos' + method_name = 'stream/player' + parameters = '/'.join([self.lang.get('webservice'), id, 'ALL', 'ALL']) + video = self.webservice.go(class_name=class_name, + method_name=method_name, + parameters=parameters).get_video(obj=video) + video.ext, video.url = self.get_url() return video + def get_url(self): + url = self.page.get_video_url(self.quality, self.format, self.version.get(self.lang.get('label')), + self.lang.get('version')) + if format == FORMATS.HLS: + ext = u'm3u8' + url = self.get_m3u8_link(url) + else: + ext = u'mp4' + url = url + return ext, url + def get_m3u8_link(self, url): r = self.openurl(url) baseurl = url.rpartition('/')[0] @@ -84,189 +98,93 @@ class ArteBrowser(Browser): return links_by_quality[0] return NotAvailable - @id2url(ArteLiveVideo.id2url) - def get_live_video(self, url, video=None): - self.location(url) - assert self.is_on_page(ArteLiveVideoPage) - json_url, video = self.page.get_video(video) - return self.fill_live_video(video, json_url) - - def fill_live_video(self, video, json_url): - response = self.openurl(json_url) - result = simplejson.loads(response.read(), self.ENCODING) - - quality = None - if 'VTI' in result['videoJsonPlayer']: - video.title = u'%s' % result['videoJsonPlayer']['VTI'] - - if 'VSR' in result['videoJsonPlayer']: - for item in result['videoJsonPlayer']['VSR']: - if self.quality[0] in item: - quality = item - break - - if not quality: - url = result['videoJsonPlayer']['VSR'][0]['url'] - ext = result['videoJsonPlayer']['VSR'][0]['mediaType'] - else: - url = result['videoJsonPlayer']['VSR'][quality]['url'] - ext = result['videoJsonPlayer']['VSR'][quality]['mediaType'] - - video.url = u'%s' % url - video.ext = u'%s' % ext - if 'VDA' in result['videoJsonPlayer']: - date_string = result['videoJsonPlayer']['VDA'][:-6] - - try: - video.date = datetime.datetime.strptime(date_string, '%d/%m/%Y %H:%M:%S') - except TypeError: - video.date = datetime.datetime(*(time.strptime(date_string, '%d/%m/%Y %H:%M:%S')[0:6])) - - if 'VDU' in result['videoJsonPlayer'].keys(): - video.duration = int(result['videoJsonPlayer']['VDU']) - - if 'IUR' in result['videoJsonPlayer']['VTU'].keys(): - video.thumbnail = BaseImage(result['videoJsonPlayer']['VTU']['IUR']) - video.thumbnail.url = video.thumbnail.id - return video - - def home(self): - self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang) - def get_video_from_program_id(self, _id): class_name = 'epg' method_name = 'program' - level = 'L2' - url = self.API_URL \ - + '/' + class_name \ - + '/' + method_name \ - + '/' + self.lang \ - + '/' + level \ - + '/' + _id \ - + '.json' - - response = self.openurl(url) - result = simplejson.loads(response.read(), self.ENCODING) - if 'VDO' in result['abstractProgram'].keys(): - video = self.create_video(result['abstractProgram']['VDO']) - return self.get_video(video.id, video) - - def search_videos(self, pattern): - class_name = 'videos/plus7' - method_name = 'search' - level = 'L1' - cluster = 'ALL' - channel = 'ALL' - limit = '10' - offset = '0' - - url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset, pattern) - response = self.openurl(url) - result = simplejson.loads(response.read(), self.ENCODING) - return self.create_video_from_plus7(result['videoList']) - - def create_video_from_plus7(self, result): - for item in result: - yield self.create_video(item) - - def create_video(self, item): - video = ArteVideo(item['VID']) - if 'VSU' in item: - video.title = u'%s : %s' % (item['VTI'], item['VSU']) - else: - video.title = u'%s' % (item['VTI']) - video.rating = int(item['VRT']) - - if 'programImage' in item: - url = u'%s' % item['programImage'] - video.thumbnail = BaseImage(url) - video.thumbnail.url = video.thumbnail.id - - video.duration = datetime.timedelta(seconds=int(item['videoDurationSeconds'])) - video.set_empty_fields(NotAvailable, ('url',)) - if 'VDE' in item: - video.description = u'%s' % item['VDE'] - if 'VDA' in item: - m = re.match('(\d{2})\s(\d{2})\s(\d{4})(.*?)', item['VDA']) - if m: - dd = int(m.group(1)) - mm = int(m.group(2)) - yyyy = int(m.group(3)) - video.date = datetime.date(yyyy, mm, dd) - return video - - def create_url_plus7(self, class_name, method_name, level, cluster, channel, limit, offset, pattern=None): - url = self.API_URL \ - + '/' + class_name \ - + '/' + method_name \ - + '/' + self.lang \ - + '/' + level - - if pattern: - url += '/' + urllib.quote(pattern.encode('utf-8')) - - url += '/' + channel \ - + '/' + cluster \ - + '/' + '-1' \ - + '/' + self.order \ - + '/' + limit \ - + '/' + offset \ - + '.json' - - return url - - def get_arte_programs(self): - class_name = 'epg' - method_name = 'clusters' - url = self.API_URL \ - + '/' + class_name \ - + '/' + method_name \ - + '/' + self.lang \ - + '/0/ALL.json' - - response = self.openurl(url) - result = simplejson.loads(response.read(), self.ENCODING) - return result['configClusterList'] - - def program_videos(self, program): - class_name = 'epg' - method_name = 'cluster' - - url = self.API_URL \ - + '/' + class_name \ - + '/' + method_name \ - + '/' + self.lang \ - + '/' + program \ - + '.json' - - response = self.openurl(url) - result = simplejson.loads(response.read(), self.ENCODING) - for item in result['clusterWrapper']['broadcasts']: - if 'VDS' in item.keys() and len(item['VDS']) > 0: - video = self.get_video_from_program_id(item['programId']) - if video: - yield video + parameters = '/'.join([self.lang.get('webservice'), 'L2', _id]) + video = self.webservice.go(class_name=class_name, method_name=method_name, + parameters=parameters).get_program_video() + return self.get_video(video.id, video) def latest_videos(self): class_name = 'videos' method_name = 'plus7' - level = 'L1' - cluster = 'ALL' - channel = 'ALL' - limit = '10' - offset = '0' + parameters = '/'.join([self.lang.get('webservice'), 'L1', 'ALL', 'ALL', '-1', self.order, '10', '0']) + return self.webservice.go(class_name=class_name, method_name=method_name, parameters=parameters).iter_videos() - url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset) - response = self.openurl(url) - result = simplejson.loads(response.read(), self.ENCODING) - return self.create_video_from_plus7(result['videoList']) + def get_arte_programs(self): + class_name = 'epg' + method_name = 'clusters' + parameters = '/'.join([self.lang.get('webservice'), '0', 'ALL']) + return self.webservice.go(class_name=class_name, method_name=method_name, + parameters=parameters).iter_programs(title=self.lang.get('title')) - def get_arte_live_categories(self): - self.location('http://concert.arte.tv/%s' % self.LIVE_LANG[self.lang]) - assert self.is_on_page(ArteLivePage) - return self.page.iter_resources() + def get_arte_program_videos(self, program): + class_name = 'epg' + method_name = 'cluster' + parameters = '/'.join([self.lang.get('webservice'), program[-1]]) + available_videos = self.webservice.go(class_name=class_name, method_name=method_name, + parameters=parameters).iter_program_videos() + for item in available_videos: + yield self.get_video_from_program_id(item.id) - def live_videos(self, cat): - self.location('http://concert.arte.tv/%s' % self.LIVE_LANG[self.lang]) - assert self.is_on_page(ArteLivePage) - return self.page.iter_videos(cat, lang=self.LIVE_LANG[self.lang]) + def get_arte_concert_categories(self): + return self.videos_list.go(site=SITE.CONCERT.get('id'), lang=self.lang.get('site'), + cat='').iter_arte_concert_categories() + + def get_arte_concert_videos(self, cat): + return self.videos_list.go(site=SITE.CONCERT.get('id'), lang=self.lang.get('site'), + cat='').iter_arte_concert_videos(cat=cat[-1]) + + def get_arte_concert_video(self, id, video=None): + json_url = self.video_page.go(_site=SITE.CONCERT.get('id'), id=id).get_json_url() + m = re.search('http://(?P<__site>.*).arte.tv/(?P<_lang>\w{2})/player/(?P<_id>.*)', json_url) + if m: + video = self.webservice.go(__site=m.group('__site'), _lang=m.group('_lang'), + _id=m.group('_id')).get_arte_concert_video(obj=video) + video.ext, video.url = self.get_url() + return video + + def get_arte_cinema_categories(self, cat=[]): + menu = self.videos_list.go(site=SITE.CINEMA.get('id'), lang=self.lang.get('site'), + cat='').get_arte_cinema_menu() + + menuSplit = map(lambda x: x.split("/")[2:], menu) + + result = {} + for record in menuSplit: + here = result + for item in record[:-1]: + if item not in here: + here[item] = {} + here = here[item] + if "end" not in here: + here["end"] = [] + here["end"].append(record[-1]) + + cat = cat if not cat else cat[1:] + + for el in cat: + result = result.get(el) + + if "end" in result.keys(): + return self.page.iter_arte_cinema_categories(cat='/'.join(cat)) + else: + categories = [] + for item in result.keys(): + categories.append(Collection([SITE.CINEMA.get('id'), unicode(item)], unicode(item))) + return categories + + def get_arte_cinema_videos(self, cat): + return self.videos_list.go(site=SITE.CINEMA.get('id'), lang=self.lang.get('site'), + cat='/%s' % '/'.join(cat[1:])).get_arte_cinema_videos() + + def get_arte_cinema_video(self, id, video=None): + json_url = self.video_page.go(_site=SITE.CINEMA.get('id'), id=id).get_json_url() + m = re.search('https://api.arte.tv/api/player/v1/config/(\w{2})/(.*)\?vector=(.*)\&.*', json_url) + if m: + video = self.webservice.go(__lang=m.group(1), + vid=m.group(2), ___site=m.group(3)).get_arte_cinema_video(obj=video) + video.ext, video.url = self.get_url() + video.id = id + return video diff --git a/modules/arte/module.py b/modules/arte/module.py index 2e094d30..b692c5b7 100644 --- a/modules/arte/module.py +++ b/modules/arte/module.py @@ -19,14 +19,14 @@ import re - +from weboob.tools.ordereddict import OrderedDict from weboob.capabilities.video import CapVideo, BaseVideo from weboob.capabilities.collection import CapCollection, CollectionNotFound, Collection from weboob.tools.backend import Module, BackendConfig from weboob.tools.value import Value from .browser import ArteBrowser -from .video import ArteVideo, ArteLiveVideo +from .video import ArteVideo, ArteSiteVideo, VERSION_VIDEO, FORMATS, LANG, QUALITY, SITE __all__ = ['ArteModule'] @@ -46,26 +46,25 @@ class ArteModule(Module, CapVideo, CapCollection): 'LAST_CHANCE': 'Last chance' } - CONFIG = BackendConfig(Value('lang', label='Lang of videos', - choices={'fr': 'French', 'de': 'Deutsch', 'en': 'English'}, default='fr'), - Value('order', label='Sort order', choices=order, default='AIRDATE_DESC'), - Value('quality', label='Quality of videos', choices=['hd', 'sd', 'md', 'ed'], default='hd')) + versions_choice = OrderedDict([(k, u'%s' % (v.get('label'))) for k, v in VERSION_VIDEO.items]) + format_choice = OrderedDict([(k, u'%s' % (v)) for k, v in FORMATS.items]) + lang_choice = OrderedDict([(k, u'%s' % (v.get('label'))) for k, v in LANG.items]) + quality_choice = [u'%s' % (k) for k, v in QUALITY.items] - TRANSLATION = {'fr': 'F', - 'en': 'F', - 'de': 'D', - 'hd': ['HQ', -1], - 'md': ['MQ', 2], - 'sd': ['SQ', 0], - 'ed': ['EQ', 1] - } + CONFIG = BackendConfig(Value('lang', label='Lang of videos', choices=lang_choice, default=LANG.FRENCH), + Value('order', label='Sort order', choices=order, default='AIRDATE_DESC'), + Value('quality', label='Quality of videos', choices=quality_choice, default=QUALITY.HD), + Value('format', label='Format of videos', choices=format_choice, default=FORMATS.HTTP_MP4), + Value('version', label='Version of videos', choices=versions_choice)) BROWSER = ArteBrowser def create_default_browser(self): - return self.create_browser(lang=self.TRANSLATION[self.config['lang'].get()], - quality=self.TRANSLATION[self.config['quality'].get()], - order=self.config['order'].get()) + return self.create_browser(lang=self.config['lang'].get(), + quality=self.config['quality'].get(), + order=self.config['order'].get(), + format=self.config['format'].get(), + version=self.config['version'].get()) def parse_id(self, _id): m = re.match('^(\w+)\.(.*)', _id) @@ -74,96 +73,81 @@ class ArteModule(Module, CapVideo, CapCollection): m = re.match('https?://www.arte.tv/guide/\w+/(?P.+)/(.*)', _id) if m: - return 'program', m.group(1) + return SITE.PROGRAM.get('id'), m.group(1) - m = re.match('https?://concert.arte.tv/(\w+)/(.*)', _id) + m = re.match('https?://(%s).arte.tv/(\w+)/(.*)' % ('|'.join(value.get('id') for value in SITE.values)), _id) if m: - return 'live', '/%s/%s' % (m.group(1), m.group(2)) + return m.group(1), '/%s/%s' % (m.group(2), m.group(3)) return 'videos', _id def get_video(self, _id): - with self.browser: - site, _id = self.parse_id(_id) + site, _id = self.parse_id(_id) - if site == 'live': - return self.browser.get_live_video(_id) + if site in [value.get('id') for value in SITE.values]: + _site = (value for value in SITE.values if value.get('id') == site).next() + return getattr(self.browser, _site.get('video'))(_id) - elif site == 'program': - return self.browser.get_video_from_program_id(_id) - - else: - return self.browser.get_video(_id) + else: + return self.browser.get_video(_id) def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False): - with self.browser: - return self.browser.search_videos(pattern) + return self.browser.search_videos(pattern) - def fill_video(self, video, fields): + def fill_arte_video(self, video, fields): if fields != ['thumbnail']: - # if we don't want only the thumbnail, we probably want also every fields - with self.browser: - site, _id = self.parse_id(video.id) + video = self.browser.get_video(video.id, video) - if isinstance(video, ArteVideo): - video = self.browser.get_video(_id, video) - if isinstance(video, ArteLiveVideo): - video = self.browser.get_live_video(_id, video) if 'thumbnail' in fields and video and video.thumbnail: - with self.browser: - video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + video.thumbnail.data = self.browser.open(video.thumbnail.url).content + + return video + + def fill_site_video(self, video, fields): + if fields != ['thumbnail']: + for site in SITE.values: + m = re.match('%s\.(.*)' % site.get('id'), video.id) + if m: + video = getattr(self.browser, site.get('video'))(m.group(1), video) + break + + if 'thumbnail' in fields and video and video.thumbnail: + video.thumbnail.data = self.browser.open(video.thumbnail.url).content return video def iter_resources(self, objs, split_path): - with self.browser: - if BaseVideo in objs: - collection = self.get_collection(objs, split_path) - if collection.path_level == 0: - yield Collection([u'arte-latest'], u'Latest Arte videos') - yield Collection([u'arte-live'], u'Arte Web Live videos') - yield Collection([u'arte-program'], u'Arte Programs') - if collection.path_level == 1: - if collection.split_path == [u'arte-latest']: - for video in self.browser.latest_videos(): - yield video - if collection.split_path == [u'arte-live']: - for categorie in self.browser.get_arte_live_categories(): - yield categorie - if collection.split_path == [u'arte-program']: - for item in self.browser.get_arte_programs(): - lang = self.TRANSLATION[self.config['lang'].get()] + if BaseVideo in objs: + collection = self.get_collection(objs, split_path) + if collection.path_level == 0: + yield Collection([u'arte-latest'], u'Latest Arte videos') + for site in SITE.values: + yield Collection([site.get('id')], site.get('label')) + if collection.path_level == 1: + if collection.split_path == [u'arte-latest']: + for video in self.browser.latest_videos(): + yield video + else: + for site in SITE.values: + if collection.split_path[0] == site.get('id') and collection.path_level in site.keys(): + for item in getattr(self.browser, site.get(collection.path_level))(): + yield item - if lang == 'F': - title = 'titleFR' - elif lang == 'D': - title = 'titleDE' - else: - title = 'name' - - name = item['clusterId'] - if title in item.keys(): - name = item[title] - - yield Collection([u'arte-program', item['clusterId']], u'%s' % name) - if collection.path_level == 2: - if collection.split_path[0] == u'arte-live': - for video in self.browser.live_videos(collection.basename): - yield video - if collection.split_path[0] == u'arte-program': - for video in self.browser.program_videos(collection.split_path[1]): - yield video + if collection.path_level >= 2: + for site in SITE.values: + if collection.split_path[0] == site.get('id') and collection.path_level in site.keys(): + for item in getattr(self.browser, site.get(collection.path_level))(collection.split_path): + yield item def validate_collection(self, objs, collection): if collection.path_level == 0: return if BaseVideo in objs and (collection.split_path == [u'arte-latest'] or - collection.split_path == [u'arte-live'] or - collection.split_path == [u'arte-program']): + collection.split_path[0] in [value.get('id') for value in SITE.values]): return - if BaseVideo in objs and collection.path_level == 2 and (collection.split_path[0] == u'arte-live' or - collection.split_path[0] == u'arte-program'): + if BaseVideo in objs and collection.path_level >= 2 and\ + collection.split_path[0] in [value.get('id') for value in SITE.values]: return raise CollectionNotFound(collection.split_path) - OBJECTS = {ArteVideo: fill_video, ArteLiveVideo: fill_video} + OBJECTS = {ArteVideo: fill_arte_video, ArteSiteVideo: fill_site_video} diff --git a/modules/arte/pages.py b/modules/arte/pages.py index cf653ddb..eb815f32 100644 --- a/modules/arte/pages.py +++ b/modules/arte/pages.py @@ -17,60 +17,249 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . +from datetime import timedelta -from weboob.deprecated.browser import Page -from weboob.tools.html import html2text -from weboob.capabilities import NotAvailable from weboob.capabilities.image import BaseImage +from weboob.capabilities.base import BaseObject, NotAvailable from weboob.capabilities.collection import Collection -from .video import ArteLiveVideo + +from weboob.browser.pages import HTMLPage, JsonPage +from weboob.browser.elements import DictElement, ItemElement, ListElement, method +from weboob.browser.filters.standard import Date, Format, Env, CleanText, Field, Regexp, Join +from weboob.browser.filters.json import Dict +from weboob.browser.filters.html import XPath -class ArteLiveVideoPage(Page): - def get_video(self, video=None): - if not video: - video = ArteLiveVideo('/%s' % self.group_dict['id']) - - div = self.document.xpath('//div[@class="bloc-presentation"]')[0] - - description = self.parser.select(div, - 'div[@class="field field-name-body field-type-text-with-summary field-label-hidden bloc-rte"]', - 1, - method='xpath') - video.description = html2text(self.parser.tostring(description)) - - json_url = self.document.xpath('//div[@class="video-container"]')[0].attrib['arte_vp_url'] - return json_url, video +from .video import ArteVideo, ArteSiteVideo, SITE -class ArteLivePage(Page): - def iter_resources(self): - items = list() - for el in self.document.xpath('//ul[@class="filter-liste"]/li'): - _id = el.attrib['data-target'].replace('video_box_tab_', '') - text = self.parser.select(el, 'a/span', 1, method='xpath').text - item = Collection([u'arte-live', u'%s' % _id], u'%s' % (text)) - items.append(item) - return items +class ArteItemElement(ItemElement): - def iter_videos(self, cat, lang='fr'): - articles = self.document.xpath('//div[@id="video_box_tab_%s"]/article' % cat) - videos = list() - for article in articles: - _id = article.attrib['about'] - title = self.parser.select(article, - 'div/div[@class="info-article "]/div/h3/a', - 1, - method='xpath').text - thumbnail = self.parser.select(article, - 'div/div/a/figure/span/span', - 1, - method='xpath').attrib['data-src'] + obj_id = Dict('VID') - video = ArteLiveVideo(_id) - video.title = u'%s' % title - video.thumbnail = BaseImage(thumbnail) - video.thumbnail.url = video.thumbnail.id - video.set_empty_fields(NotAvailable, ('url',)) - videos.append(video) - return videos + def obj_title(self): + vti = Dict('VTI')(self) + vtu = Dict('VSU', default=None)(self) + if not vtu: + return vti + + return '%s: %s' % (vti, vtu) + + obj_rating = Dict('VRT', default=NotAvailable) + obj_rating_max = 10 + obj_description = Dict('VDE', default=NotAvailable) + obj_date = Date(Dict('VDA')) + + def obj_duration(self): + seconds = Dict('videoDurationSeconds')(self) + if isinstance(seconds, basestring): + seconds = int(seconds) + return timedelta(seconds=seconds) + + def obj_thumbnail(self): + url = Dict('VTU/IUR')(self) + thumbnail = BaseImage(url) + thumbnail.url = thumbnail.id + return thumbnail + + +class VideosListPage(HTMLPage): + + @method + class iter_arte_concert_categories(ListElement): + item_xpath = '//ul[@class="filter-liste"]/li' + + class item(ItemElement): + klass = Collection + + obj_title = CleanText('./a/span') + obj_id = CleanText('./@data-target', replace=[('video_box_tab_', '')]) + + def obj_split_path(self): + _id = CleanText('./@data-target', replace=[('video_box_tab_', '')])(self) + return [SITE.CONCERT.get('id'), u'%s' % _id] + + @method + class iter_arte_concert_videos(ListElement): + + def find_elements(self): + self.item_xpath = '//div[@id="video_box_tab_%s"]/article' % Env('cat')(self) + for el in self.el.xpath(self.item_xpath): + yield el + + class item(ItemElement): + klass = ArteSiteVideo + + obj__site = SITE.CONCERT.get('id') + obj_id = Format('%s.%s', Field('_site'), CleanText('./@about')) + obj_title = CleanText('div/div[@class="info-article "]/div/h3/a') + + def obj_thumbnail(self): + url = CleanText('div/div/a/figure/span/span/@data-src')(self) + thumbnail = BaseImage(url) + thumbnail.url = thumbnail.id + return thumbnail + + @method + class iter_arte_cinema_categories(ListElement): + item_xpath = '//li[has-class("leaf")]' + + class item(ItemElement): + klass = Collection + + def condition(self): + return Regexp(CleanText('./a/@href'), '^(/\w{2}/%s/.*)' % self.env['cat'], default=None)(self) + + obj_title = CleanText('./a') + obj_id = CleanText('./a/@href') + + def obj_split_path(self): + _id = Regexp(CleanText('./a/@href'), '/\w{2}/(.*)')(self) + return [SITE.CINEMA.get('id')] + _id.split('/') + + def get_arte_cinema_menu(self): + return self.doc.xpath('//li[has-class("leaf")]/a[starts-with(@href,"/")]/@href') + + @method + class get_arte_cinema_videos(ListElement): + item_xpath = '//article' + + class item(ItemElement): + klass = ArteSiteVideo + + def condition(self): + return len(XPath('.//div[@class="article-secondary "]')(self)) == 1 and\ + len(XPath('.//article')(self)) == 0 + + obj__site = SITE.CINEMA.get('id') + obj_id = Format('%s.%s', Field('_site'), CleanText('./@about')) + obj_title = Join(u' - ', + './/div[@class="article-secondary "]/div/div') + + def obj_thumbnail(self): + url = CleanText('.//div[@class="article-primary "]/div[has-class("field-thumbnail")]/span/noscript/img/@src')(self) + thumbnail = BaseImage(url) + thumbnail.url = thumbnail.id + return thumbnail + + +class VideoPage(HTMLPage): + def get_json_url(self): + return self.doc.xpath('//div[@class="video-container"]')[0].attrib['arte_vp_url'] + + +class ArteJsonPage(JsonPage): + + def get_video_url(self, quality, format, version, language_version): + urls = Dict('videoJsonPlayer/VSR')(self.doc).keys() + if urls: + key = '_'.join([format, quality, version]) + found = self.find_url(key, urls, version, quality) + if not found: + # We use the default language version + key = '_'.join([format, quality, language_version]) + found = self.find_url(key, urls, version, quality) + if not found: + # We only keep the quality + key = '_'.join([quality, language_version]) + found = self.find_url(key, urls, version, quality) + if not found: + found = urls[0] + streamer = Dict('videoJsonPlayer/VSR/%s/streamer' % (found), default=None)(self.doc) + url = Dict('videoJsonPlayer/VSR/%s/url' % (found))(self.doc) + if streamer: + return '%s%s' % (streamer, url) + return url + + def find_url(self, key, urls, version, quality): + self.logger.debug('available urls: %s' % urls) + self.logger.debug('search url matching : %s' % key) + # Best Case: key is mathing + matching = [s for s in urls if key in s] + self.logger.debug('best case matching: %s' % matching) + if matching: + return matching[0] + + # Second Case: is the version available + matching = [s for s in urls if version in s] + self.logger.debug('is version available: %s' % matching) + if matching: + # Do the quality + version match + matching_quality = [s for s in matching if quality in s] + self.logger.debug('does quality + version match: %s' % matching_quality) + if matching_quality: + return matching[0] + + # Only format + version mathes + return matching[0] + + @method + class iter_videos(DictElement): + item_xpath = 'videoList' + + class item(ArteItemElement): + klass = ArteVideo + + @method + class iter_programs(DictElement): + item_xpath = 'configClusterList' + + class item(ItemElement): + klass = Collection + + obj_title = Dict(CleanText(Env('title'))) + obj_id = Dict('clusterId') + + def obj_split_path(self): + return [SITE.PROGRAM.get('id'), Dict('clusterId')(self)] + + @method + class get_video(ArteItemElement): + def __init__(self, *args, **kwargs): + super(ArteItemElement, self).__init__(*args, **kwargs) + self.el = self.el.get('videoJsonPlayer') + + klass = ArteVideo + + @method + class get_arte_concert_video(ArteItemElement): + def __init__(self, *args, **kwargs): + super(ArteItemElement, self).__init__(*args, **kwargs) + self.el = self.el.get('videoJsonPlayer') + + klass = ArteSiteVideo + obj__site = SITE.CONCERT.get('id') + obj_id = Format('%s.%s', Field('_site'), Regexp(Dict('VTR'), 'http://concert.arte.tv(.*)')) + + @method + class get_arte_cinema_video(ArteItemElement): + def __init__(self, *args, **kwargs): + super(ArteItemElement, self).__init__(*args, **kwargs) + self.el = self.el.get('videoJsonPlayer') + + klass = ArteSiteVideo + + obj__site = SITE.CINEMA.get('id') + obj_date = Date(Dict('VRA')) + + @method + class get_program_video(ArteItemElement): + def __init__(self, *args, **kwargs): + super(ArteItemElement, self).__init__(*args, **kwargs) + if 'VDO' in self.el['abstractProgram'].keys(): + self.el = self.el['abstractProgram']['VDO'] + + klass = ArteVideo + + @method + class iter_program_videos(DictElement): + item_xpath = 'clusterWrapper/broadcasts' + ignore_duplicate = True + + class item(ItemElement): + klass = BaseObject + + def condition(self): + return 'VDS' in self.el.keys() and len(self.el['VDS']) > 0 + + obj_id = Dict('programId') diff --git a/modules/arte/test.py b/modules/arte/test.py index 4e699071..1db292c0 100644 --- a/modules/arte/test.py +++ b/modules/arte/test.py @@ -20,6 +20,7 @@ from weboob.tools.test import BackendTest from weboob.capabilities.video import BaseVideo +from .video import SITE class ArteTest(BackendTest): @@ -32,14 +33,23 @@ class ArteTest(BackendTest): self.backend.fillobj(v, ('url',)) self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id)) - def test_live(self): - l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-live'])) - assert len(l1) - l2 = list(self.backend.iter_resources([BaseVideo], l1[0].split_path)) - assert len(l2) - v = l2[0] - self.backend.fillobj(v, ('url',)) - self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id)) + def test_sites(self): + for site in SITE.values: + + if site.get('id') == SITE.PROGRAM.get('id'): + continue + + l1 = list(self.backend.iter_resources([BaseVideo], [site.get('id')])) + assert len(l1) + l1 = l1[0] + + while not isinstance(l1, BaseVideo): + l1 = list(self.backend.iter_resources([BaseVideo], l1.split_path)) + assert len(l1) + l1 = l1[0] + + self.backend.fillobj(l1, ('url',)) + self.assertTrue(l1.url, 'URL for video "%s" not found' % (l1.id)) def test_latest(self): l = list(self.backend.iter_resources([BaseVideo], [u'arte-latest'])) @@ -49,7 +59,7 @@ class ArteTest(BackendTest): self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id)) def test_program(self): - l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-program'])) + l1 = list(self.backend.iter_resources([BaseVideo], [u'program'])) assert len(l1) # some categories may contain no available videos (during summer period for example) for l in l1: diff --git a/modules/arte/video.py b/modules/arte/video.py index 987122bf..8dff04c4 100644 --- a/modules/arte/video.py +++ b/modules/arte/video.py @@ -17,21 +17,36 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . - +from weboob.capabilities.base import enum from weboob.capabilities.video import BaseVideo +FORMATS = enum(HTTP_MP4=u'HBBTV', HLS=u'M3U8', RTMP=u'RTMP', HLS_MOBILE=u'MOBILE') + +LANG = enum(FRENCH={u'label': u'French', u'webservice': u'F', u'site': u'fr', u'version': u'1', u'title': u'titleFR'}, + GERMAN={u'label': u'German', u'webservice': u'D', u'site': u'de', u'version': u'1', u'title': u'titleDE'}) + +SITE = enum(PROGRAM={u'id': u'program', u'label': u'Arte Programs', 1: 'get_arte_programs', + 2: 'get_arte_program_videos', u'video': 'get_video_from_program_id'}, + CONCERT={u'id': u'concert', u'label': u'Arte Concert videos', 1: 'get_arte_concert_categories', + 2: 'get_arte_concert_videos', 'video': 'get_arte_concert_video'}, + CINEMA={u'id': u'cinema', u'label': u'Arte Cinema', 1: 'get_arte_cinema_categories', + 2: 'get_arte_cinema_categories', 3: 'get_arte_cinema_videos', 'video': 'get_arte_cinema_video'}) + +QUALITY = enum(HD=u'SQ', MD=u'EQ', SD=u'MQ', LD=u'LQ') + +VERSION_VIDEO = enum(VOSTA={u'label': u'Original version subtitled (German)', LANG.GERMAN.get('label'): u'3'}, + VOSTF={u'label': u'Original version subtitled (French)', LANG.FRENCH.get('label'): u'3'}, + VASTA={u'label': u'Translated version (German)', + LANG.GERMAN.get('label'): u'1', LANG.FRENCH.get('label'): u'2'}, + VFSTF={u'label': u'Translated version (French)', + LANG.FRENCH.get('label'): u'1', LANG.GERMAN.get('label'): u'2'}, + VASTMA={u'label': u'Deaf version (German)', LANG.GERMAN.get('label'): u'8'}, + VFSTMF={u'label': u'Deaf version (French)', LANG.FRENCH.get('label'): u'8'}) + class ArteVideo(BaseVideo): - @classmethod - def id2url(cls, _id): - lang = _id[-1:] - return 'http://arte.tv/papi/tvguide/videos/stream/%s/%s/M3U8' % (lang, _id) + pass -class ArteLiveVideo(BaseVideo): - def __init__(self, _id, *args, **kwargs): - BaseVideo.__init__(self, 'live.%s' % _id, *args, **kwargs) - - @classmethod - def id2url(cls, _id): - return 'http://concert.arte.tv%s' % _id +class ArteSiteVideo(BaseVideo): + pass