diff --git a/modules/arte/backend.py b/modules/arte/backend.py index 36cbe3af..720f0bd7 100644 --- a/modules/arte/backend.py +++ b/modules/arte/backend.py @@ -31,6 +31,7 @@ from .collection import ArteLiveCollection __all__ = ['ArteBackend'] + class ArteBackend(BaseBackend, ICapVideo, ICapCollection): NAME = 'arte' MAINTAINER = u'Romain Bignon' @@ -38,22 +39,42 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection): VERSION = '0.h' DESCRIPTION = 'Arte French and German TV' LICENSE = 'AGPLv3+' + + order = {'AIRDATE_DESC': 'Date', + 'VIEWS': 'Views', + 'ALPHA': 'Alphabetic', + 'LAST_CHANCE': 'Last chance' + } + CONFIG = BackendConfig(Value('lang', label='Lang of videos', choices={'fr': 'French', 'de': 'Deutsch', 'en': 'English'}, default='fr'), - Value('quality', label='Quality of videos', choices=['hd', 'sd'], default='hd')) + Value('order', label='Sort order', choices=order, default='AIRDATE_DESC'), + Value('quality', label='Quality of videos', choices=['hd', 'sd', 'md', 'ed'], default='hd')) + + TRANSLATION = {'fr': 'F', + 'en': 'F', + 'de': 'D', + 'hd': 'HQ', + 'md': 'MQ', + 'sd': 'SQ', + 'eq': 'EQ' + } + BROWSER = ArteBrowser def create_default_browser(self): - return self.create_browser(lang=self.config['lang'].get(), quality=self.config['quality'].get()) + return self.create_browser(lang=self.TRANSLATION[self.config['lang'].get()], + quality=self.TRANSLATION[self.config['quality'].get()], + order=self.config['order'].get()) def parse_id(self, _id): m = re.match('^(\w+)\.(.*)', _id) if m: return m.groups() - m = re.match('https?://videos.arte.tv/\w+/videos/(?P.+)\.html', _id) + m = re.match('https?://www.arte.tv/guide/\w+/(?P.+)/(.*)', _id) if m: - return 'videos', m.group(1) + return 'program', m.group(1) m = re.match('https?://liveweb.arte.tv/\w+/video/(.*)/', _id) if m: @@ -71,6 +92,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection): elif site == 'live_url': return self.browser.get_live_from_url(_id) + elif site == 'program': + return self.browser.get_video_from_program_id(_id) + else: return self.browser.get_video(_id) @@ -84,9 +108,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection): with self.browser: site, _id = self.parse_id(video.id) - if isinstance(video,ArteVideo): + if isinstance(video, ArteVideo): video = self.browser.get_video(_id, video) - if isinstance(video,ArteLiveVideo): + if isinstance(video, ArteLiveVideo): video = self.browser.get_live_video(_id, video) if 'thumbnail' in fields and video and video.thumbnail: with self.browser: @@ -99,26 +123,26 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection): if BaseVideo in objs: collection = self.get_collection(objs, split_path) if collection.path_level == 0: - yield Collection([u'latest'],u'Latest Arte videos') - yield Collection([u'live'],u'Arte Web Live videos') + yield Collection([u'arte-latest'], u'Latest Arte videos') + yield Collection([u'arte-live'], u'Arte Web Live videos') if collection.path_level == 1: - if collection.split_path == [u'latest']: + if collection.split_path == [u'arte-latest']: for video in self.browser.latest_videos(): yield video - if collection.split_path == [u'live']: + if collection.split_path == [u'arte-live']: for categorie in self.browser.get_arte_live_categories(): yield categorie if collection.path_level == 2: - if collection.split_path[0] == u'live': - for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.lang)): + if collection.split_path[0] == u'arte-live': + for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.LIVE_LANG[self.browser.lang])): yield video def validate_collection(self, objs, collection): if collection.path_level == 0: return - if BaseVideo in objs and ( collection.split_path == [u'latest'] or collection.split_path == [u'live'] ): + if BaseVideo in objs and (collection.split_path == [u'arte-latest'] or collection.split_path == [u'arte-live']): return - if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'live' : + if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'arte-live': return raise CollectionNotFound(collection.split_path) diff --git a/modules/arte/browser.py b/modules/arte/browser.py index 4847bd92..e43c690a 100644 --- a/modules/arte/browser.py +++ b/modules/arte/browser.py @@ -17,40 +17,53 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . +import re +import datetime +import urllib +from weboob.capabilities import NotAvailable +from weboob.tools.capabilities.thumbnail import Thumbnail +from weboob.tools.json import json as simplejson from weboob.tools.browser import BaseBrowser from weboob.tools.browser.decorators import id2url -from .pages import IndexPage, VideoPage, ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage, ArteLivePlayerPage +from .pages import ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage from .video import ArteVideo, ArteLiveVideo - __all__ = ['ArteBrowser'] class ArteBrowser(BaseBrowser): DOMAIN = u'videos.arte.tv' ENCODING = None - PAGES = {r'http://videos.arte.tv/\w+/videos/toutesLesVideos.*': IndexPage, - r'http://videos.arte.tv/\w+/do_search/videos/.*': IndexPage, - r'http://videos.arte.tv/\w+/videos/(?P.+)\.html': VideoPage, - r'http://liveweb.arte.tv/\w+' : ArteLivePage, - r'http://liveweb.arte.tv/\w+/cat/.*' : ArteLiveCategorieVideoPage, - r'http://liveweb.arte.tv/\w+/video/.*': ArteLivePlayerPage, - r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P.+).xml' : ArteLiveVideoPage, - } + PAGES = {r'http://liveweb.arte.tv/\w+': ArteLivePage, + r'http://liveweb.arte.tv/\w+/cat/.*': ArteLiveCategorieVideoPage, + r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P.+).xml': ArteLiveVideoPage, + } - SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'} + LIVE_LANG = {'F': 'fr', + 'D': 'de' + } + API_URL = 'http://arte.tv/papi/tvguide' - def __init__(self, lang, quality, *args, **kwargs): + def __init__(self, lang, quality, order, *args, **kwargs): self.lang = lang self.quality = quality + self.order = order BaseBrowser.__init__(self, *args, **kwargs) @id2url(ArteVideo.id2url) def get_video(self, url, video=None): - self.location(url) - return self.page.get_video(video, self.lang, self.quality) + _url = url \ + + '/' + self.quality \ + + '.json' + + response = self.openurl(_url) + result = simplejson.loads(response.read(), self.ENCODING) + if video is None: + video = ArteVideo(result['video']['VID']) + video.url = u'%s' % result['video']['VSR'][0]['VUR'] + return video @id2url(ArteLiveVideo.id2url) def get_live_video(self, url, video=None): @@ -61,29 +74,100 @@ class ArteBrowser(BaseBrowser): def home(self): self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang) + def get_video_from_program_id(self, _id): + class_name = 'epg' + method_name = 'program' + level = 'L2' + url = self.API_URL \ + + '/' + class_name \ + + '/' + method_name \ + + '/' + self.lang \ + + '/' + level \ + + '/' + _id \ + + '.json' + + response = self.openurl(url) + result = simplejson.loads(response.read(), self.ENCODING) + video = self.create_video(result['abstractProgram']['VDO']) + return self.get_video(video.id, video) + def search_videos(self, pattern): - self.location(self.buildurl('/%s/do_search/videos/%s' % (self.lang, self.SEARCH_LANG[self.lang]), q=pattern.encode('utf-8'))) - assert self.is_on_page(IndexPage) - return self.page.iter_videos() + class_name = 'videos/plus7' + method_name = 'search' + level = 'L1' + cluster = 'ALL' + channel = 'ALL' + limit = '10' + offset = '0' + + url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset, pattern) + response = self.openurl(url) + result = simplejson.loads(response.read(), self.ENCODING) + return self.create_video_from_plus7(result['videoList']) + + def create_video_from_plus7(self, result): + for item in result: + yield self.create_video(item) + + def create_video(self, item): + video = ArteVideo(item['VID']) + if 'VSU' in item: + video.title = u'%s : %s' % (item['VTI'], item['VSU']) + else: + video.title = u'%s' % (item['VTI']) + video.rating = int(item['VRT']) + video.thumbnail = Thumbnail(u'%s' % item['programImage']) + video.duration = datetime.timedelta(seconds=int(item['videoDurationSeconds'])) + video.set_empty_fields(NotAvailable, ('url',)) + video.description = u'%s' % item['VDE'] + m = re.match('(\d{2})\s(\d{2})\s(\d{4})(.*?)', item['VDA']) + if m: + dd = int(m.group(1)) + mm = int(m.group(2)) + yyyy = int(m.group(3)) + video.date = datetime.date(yyyy, mm, dd) + return video + + def create_url_plus7(self, class_name, method_name, level, cluster, channel, limit, offset, pattern=None): + url = self.API_URL \ + + '/' + class_name \ + + '/' + method_name \ + + '/' + self.lang \ + + '/' + level + + if pattern: + url += '/' + urllib.quote(pattern) + + url += '/' + channel \ + + '/' + cluster \ + + '/' + '-1' \ + + '/' + self.order \ + + '/' + limit \ + + '/' + offset \ + + '.json' + + return url def latest_videos(self): - self.home() - assert self.is_on_page(IndexPage) - return self.page.iter_videos() + class_name = 'videos' + method_name = 'plus7' + level = 'L1' + cluster = 'ALL' + channel = 'ALL' + limit = '10' + offset = '0' + + url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset) + response = self.openurl(url) + result = simplejson.loads(response.read(), self.ENCODING) + return self.create_video_from_plus7(result['videoList']) def get_arte_live_categories(self): - self.location('http://liveweb.arte.tv/%s' %self.lang) + self.location('http://liveweb.arte.tv/%s' % self.LIVE_LANG[self.lang]) assert self.is_on_page(ArteLivePage) return self.page.iter_resources() def live_videos(self, url): self.location(url) assert self.is_on_page(ArteLiveCategorieVideoPage) - return self.page.iter_videos(self.lang) - - def get_live_from_url(self, url): - self.location(url) - assert self.is_on_page(ArteLivePlayerPage) - _id = self.page.retrieve_id() - if _id: - return self.get_live_video(_id) + return self.page.iter_videos(self.LIVE_LANG[self.lang]) diff --git a/modules/arte/pages.py b/modules/arte/pages.py index 7d1d6dba..b25c93ec 100644 --- a/modules/arte/pages.py +++ b/modules/arte/pages.py @@ -18,19 +18,17 @@ # along with weboob. If not, see . -import datetime import re -import urllib import HTMLParser -from weboob.tools.browser import BasePage, BrokenPageError +from weboob.tools.browser import BasePage from weboob.tools.capabilities.thumbnail import Thumbnail from weboob.capabilities import NotAvailable -from .video import ArteVideo, ArteLiveVideo +from .video import ArteLiveVideo from .collection import ArteLiveCollection -__all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage'] +__all__ = ['ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage'] class ArteLiveVideoPage(BasePage): @@ -44,9 +42,9 @@ class ArteLiveVideoPage(BasePage): urls[url.tag[-2:]] = url.text if quality in urls: - video.url = urls[quality] + video.url = u'%s' % urls[quality] else: - video.url = urls.popitem()[1] + video.url = u'%s' % urls.popitem()[1] return video @@ -127,98 +125,3 @@ class ArteLivePage(BasePage): item = ArteLiveCollection([u'live', u'%s' % _id], u'%s' % (el.find('a').text)) items.append(item) return items - - -class IndexPage(BasePage): - def iter_videos(self): - videos = self.document.getroot().cssselect("div[class=video]") - for div in videos: - title = div.find('h2').find('a').text - m = re.match(r'/(fr|de|en)/videos/(.*)\.html', div.find('h2').find('a').attrib['href']) - _id = '' - if m: - _id = m.group(2) - rating = rating_max = 0 - rates = self.parser.select(div, 'div[class=rateContainer]', 1) - for r in rates.findall('div'): - if 'star-rating-on' in r.attrib['class']: - rating += 1 - rating_max += 1 - - video = ArteVideo(_id) - video.title = unicode(title) - video.rating = rating - video.rating_max = rating_max - - thumb = self.parser.select(div, 'img[class=thumbnail]', 1) - video.thumbnail = Thumbnail(u'http://videos.arte.tv' + thumb.attrib['src']) - - try: - parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':') - if len(parts) == 2: - hours = 0 - minutes, seconds = parts - elif len(parts) == 3: - hours, minutes, seconds = parts - else: - raise BrokenPageError('Unable to parse duration %r' % parts) - except BrokenPageError: - pass - else: - video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) - - video.set_empty_fields(NotAvailable, ('url',)) - - yield video - - -class VideoPage(BasePage): - def get_video(self, video=None, lang='fr', quality='hd'): - if not video: - video = ArteVideo(self.group_dict['id']) - video.title = unicode(self.get_title()) - video.url = unicode(self.get_url(lang, quality)) - video.set_empty_fields(NotAvailable) - return video - - def get_title(self): - return self.document.getroot().cssselect('h1')[0].text - - def get_url(self, lang, quality): - obj = self.parser.select(self.document.getroot(), 'object', 1) - movie_url = self.parser.select(obj, 'param[name=movie]', 1) - xml_url = urllib.unquote(movie_url.attrib['value'].split('videorefFileUrl=')[-1]) - - doc = self.browser.get_document(self.browser.openurl(xml_url)) - videos_list = self.parser.select(doc.getroot(), 'video') - videos = {} - for v in videos_list: - videos[v.attrib['lang']] = v.attrib['ref'] - - if lang in videos: - xml_url = videos[lang] - else: - xml_url = videos.popitem()[1] - - doc = self.browser.get_document(self.browser.openurl(xml_url)) - - obj = self.parser.select(doc.getroot(), 'urls', 1) - videos_list = self.parser.select(obj, 'url') - urls = {} - for v in videos_list: - urls[v.attrib['quality']] = v.text - - if quality in urls: - video_url = urls[quality] - else: - video_url = urls.popitem()[1] - - return video_url - - -class ArteLivePlayerPage(BasePage): - def retrieve_id(self): - player_url = self.document.xpath('//div[@class="flash"]/div/object/param')[0].attrib['value'] - _id = re.match('(.*)&eventId=(\d*)&(.*)', player_url) - if _id: - return u'%s' % _id.group(2) diff --git a/modules/arte/test.py b/modules/arte/test.py index e98b951e..8316fbe8 100644 --- a/modules/arte/test.py +++ b/modules/arte/test.py @@ -30,20 +30,20 @@ class ArteTest(BackendTest): if len(l) > 0: v = l[0] self.backend.fillobj(v, ('url',)) - self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id)) def test_live(self): - l1 = list(self.backend.iter_resources([BaseVideo], [u'live'])) + l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-live'])) assert len(l1) - l2 = list(self.backend.iter_resources([BaseVideo], [u'live',u'%s'%l1[0]])) + l2 = list(self.backend.iter_resources([BaseVideo], [u'arte-live', u'%s' % l1[0]])) assert len(l2) v = l2[0] self.backend.fillobj(v, ('url',)) - self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id)) def test_latest(self): - l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) + l = list(self.backend.iter_resources([BaseVideo], [u'arte-latest'])) assert len(l) v = l[0] self.backend.fillobj(v, ('url',)) - self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id)) diff --git a/modules/arte/video.py b/modules/arte/video.py index a67ff6ce..2c044451 100644 --- a/modules/arte/video.py +++ b/modules/arte/video.py @@ -21,13 +21,14 @@ from weboob.capabilities.video import BaseVideo -__all__ = ['ArteVideo','ArteLiveVideo'] +__all__ = ['ArteVideo', 'ArteLiveVideo'] class ArteVideo(BaseVideo): @classmethod def id2url(cls, _id): - return 'http://videos.arte.tv/fr/videos/%s.html' % _id + lang = _id[-1:] + return 'http://arte.tv/papi/tvguide/videos/stream/%s/%s/HBBTV' % (lang, _id) class ArteLiveVideo(BaseVideo):