From d61d3ba6a43fbe85fb1cfe746cc07018172acebb Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Thu, 6 Feb 2014 19:27:47 +0100 Subject: [PATCH] [arte] fix : arte-live site changes --- modules/arte/backend.py | 11 ++-- modules/arte/browser.py | 54 ++++++++++++---- modules/arte/collection.py | 27 -------- modules/arte/pages.py | 128 +++++++++++-------------------------- modules/arte/test.py | 2 +- modules/arte/video.py | 2 +- 6 files changed, 88 insertions(+), 136 deletions(-) delete mode 100644 modules/arte/collection.py diff --git a/modules/arte/backend.py b/modules/arte/backend.py index 5d9e4e35..3ff2af3b 100644 --- a/modules/arte/backend.py +++ b/modules/arte/backend.py @@ -27,7 +27,7 @@ from weboob.tools.value import Value from .browser import ArteBrowser from .video import ArteVideo, ArteLiveVideo -from .collection import ArteLiveCollection + __all__ = ['ArteBackend'] @@ -76,9 +76,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection): if m: return 'program', m.group(1) - m = re.match('https?://liveweb.arte.tv/\w+/video/(.*)/', _id) + m = re.match('https?://concert.arte.tv/(\w+)/(.*)', _id) if m: - return 'live_url', _id + return 'live', '/%s/%s' % (m.group(1), m.group(2)) return 'videos', _id @@ -89,9 +89,6 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection): if site == 'live': return self.browser.get_live_video(_id) - elif site == 'live_url': - return self.browser.get_live_from_url(_id) - elif site == 'program': return self.browser.get_video_from_program_id(_id) @@ -134,7 +131,7 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection): yield categorie if collection.path_level == 2: if collection.split_path[0] == u'arte-live': - for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.LIVE_LANG[self.browser.lang])): + for video in self.browser.live_videos(collection.basename): yield video def validate_collection(self, objs, collection): diff --git a/modules/arte/browser.py b/modules/arte/browser.py index 2a365f33..d3abe58c 100644 --- a/modules/arte/browser.py +++ b/modules/arte/browser.py @@ -27,7 +27,7 @@ from weboob.tools.json import json as simplejson from weboob.tools.browser import BaseBrowser from weboob.tools.browser.decorators import id2url -from .pages import ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage +from .pages import ArteLivePage, ArteLiveVideoPage from .video import ArteVideo, ArteLiveVideo __all__ = ['ArteBrowser'] @@ -36,14 +36,14 @@ __all__ = ['ArteBrowser'] class ArteBrowser(BaseBrowser): DOMAIN = u'videos.arte.tv' ENCODING = None - PAGES = {r'http://liveweb.arte.tv/\w+': ArteLivePage, - r'http://liveweb.arte.tv/\w+/cat/.*': ArteLiveCategorieVideoPage, - r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P.+).xml': ArteLiveVideoPage, - } + PAGES = {r'http://concert.arte.tv/\w+': ArteLivePage, + r'http://concert.arte.tv/(?P.+)': ArteLiveVideoPage, + } LIVE_LANG = {'F': 'fr', 'D': 'de' } + API_URL = 'http://arte.tv/papi/tvguide' def __init__(self, lang, quality, order, *args, **kwargs): @@ -85,7 +85,39 @@ class ArteBrowser(BaseBrowser): def get_live_video(self, url, video=None): self.location(url) assert self.is_on_page(ArteLiveVideoPage) - return self.page.get_video(video, self.lang, self.quality) + json_url, video = self.page.get_video(video) + return self.fill_live_video(video, json_url) + + def fill_live_video(self, video, json_url): + + response = self.openurl(json_url) + result = simplejson.loads(response.read(), self.ENCODING) + + quality = None + if 'VSR' in result['videoJsonPlayer']: + for item in result['videoJsonPlayer']['VSR']: + if self.quality in item: + quality = item + break + + if not quality: + url = result['videoJsonPlayer']['VSR'][0]['url'] + ext = result['videoJsonPlayer']['VSR'][0]['mediaType'] + else: + url = result['videoJsonPlayer']['VSR'][quality]['url'] + ext = result['videoJsonPlayer']['VSR'][quality]['mediaType'] + + video.url = u'%s' % url + video.ext = u'%s' % ext + video.date = datetime.datetime.strptime(result['videoJsonPlayer']['VDA'][:-6], '%d/%m/%Y %H:%M:%S') + + if 'VDU' in result['videoJsonPlayer'].keys(): + video.duration = int(result['videoJsonPlayer']['VDU']) + + if 'IUR' in result['videoJsonPlayer']['VTU'].keys(): + video.thumbnail = BaseImage(result['videoJsonPlayer']['VTU']['IUR']) + video.thumbnail.url = video.thumbnail.id + return video def home(self): self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang) @@ -182,11 +214,11 @@ class ArteBrowser(BaseBrowser): return self.create_video_from_plus7(result['videoList']) def get_arte_live_categories(self): - self.location('http://liveweb.arte.tv/%s' % self.LIVE_LANG[self.lang]) + self.location('http://concert.arte.tv/%s' % self.LIVE_LANG[self.lang]) assert self.is_on_page(ArteLivePage) return self.page.iter_resources() - def live_videos(self, url): - self.location(url) - assert self.is_on_page(ArteLiveCategorieVideoPage) - return self.page.iter_videos(self.LIVE_LANG[self.lang]) + def live_videos(self, cat): + self.location('http://concert.arte.tv/%s' % self.LIVE_LANG[self.lang]) + assert self.is_on_page(ArteLivePage) + return self.page.iter_videos(cat, lang=self.LIVE_LANG[self.lang]) diff --git a/modules/arte/collection.py b/modules/arte/collection.py deleted file mode 100644 index 9b55d2b2..00000000 --- a/modules/arte/collection.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2010-2011 Christophe Benz -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - -from weboob.capabilities.collection import Collection - -__all__ = ['ArteLiveCollection'] - -class ArteLiveCollection(Collection): - @classmethod - def id2url(cls, _id, lang): - return 'http://liveweb.arte.tv/%s/cat/%s/' % (lang, _id) diff --git a/modules/arte/pages.py b/modules/arte/pages.py index ff58b71b..c6621002 100644 --- a/modules/arte/pages.py +++ b/modules/arte/pages.py @@ -17,112 +17,62 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -import re -import HTMLParser from weboob.tools.browser import BasePage +from weboob.tools.misc import html2text from weboob.capabilities import NotAvailable from weboob.capabilities.image import BaseImage - +from weboob.capabilities.collection import Collection from .video import ArteLiveVideo -from .collection import ArteLiveCollection -__all__ = ['ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage'] +__all__ = ['ArteLivePage', 'ArteLiveVideoPage'] class ArteLiveVideoPage(BasePage): - def get_video(self, video=None, lang='fr', quality='hd'): + def get_video(self, video=None): if not video: video = ArteLiveVideo(self.group_dict['id']) - urls = {} - for url in self.document.xpath('//video')[0].getchildren(): - if url.tag.startswith('url'): - urls[url.tag[-2:]] = url.text + div = self.document.xpath('//div[@class="bloc-presentation"]')[0] - if quality in urls: - video.url = u'%s' % urls[quality] - else: - video.url = u'%s' % urls.popitem()[1] - return video + description = self.parser.select(div, + 'div[@class="field field-name-body field-type-text-with-summary field-label-hidden bloc-rte"]', + 1, + method='xpath') + video.description = html2text(self.parser.tostring(description)) - -class ArteLiveCategorieVideoPage(BasePage): - def iter_videos(self, lang='fr'): - videos = list() - xml_url = (self.document.xpath('//link')[0]).attrib['href'] - datas = self.browser.readurl(xml_url) - re_items = re.compile("(.*?)", re.DOTALL) - items = re.findall(re_items, datas) - for item in items: - parsed_element = self.get_element(item, lang) - if parsed_element: - video = ArteLiveVideo(parsed_element['ID']) - video.title = parsed_element['title'] - video.description = parsed_element['pitch'] - video.author = parsed_element['author'] - if parsed_element['pict']: - video.thumbnail = BaseImage(parsed_element['pict']) - video.thumbnail.url = video.thumbnail.id - video.set_empty_fields(NotAvailable, ('url',)) - videos.append(video) - return videos - - def get_element(self, chain, lang): - ele = {} - tt = re.compile("(?<=)(.*?)(?=)", re.DOTALL) - lk = re.compile("(?<=)(http://liveweb.arte.tv/{0}/video/.*?)" - "(?=)".format(lang), re.DOTALL) - dt = re.compile("(?<=)(.*?)(?=)", re.DOTALL) - pt = re.compile("(?<=)(.*?)(?=)", re.DOTALL) - at = re.compile("(?<=)(.*?)(?=)", re.DOTALL) - en = re.compile("", re.DOTALL) - pix = re.compile("(?<=