diff --git a/modules/arte/backend.py b/modules/arte/backend.py index 46706f28..169169cf 100644 --- a/modules/arte/backend.py +++ b/modules/arte/backend.py @@ -21,17 +21,16 @@ from __future__ import with_statement from weboob.capabilities.video import ICapVideo, BaseVideo -from weboob.capabilities.collection import ICapCollection, CollectionNotFound +from weboob.capabilities.collection import ICapCollection, CollectionNotFound, Collection from weboob.tools.backend import BaseBackend, BackendConfig from weboob.tools.value import Value from .browser import ArteBrowser -from .video import ArteVideo - +from .video import ArteVideo, ArteLiveVideo +from .collection import ArteLiveCollection __all__ = ['ArteBackend'] - class ArteBackend(BaseBackend, ICapVideo, ICapCollection): NAME = 'arte' MAINTAINER = u'Romain Bignon' @@ -59,28 +58,42 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection): if fields != ['thumbnail']: # if we don't want only the thumbnail, we probably want also every fields with self.browser: - video = self.browser.get_video(ArteVideo.id2url(video.id), video) - if 'thumbnail' in fields and video.thumbnail: + if isinstance(video,ArteVideo): + video = self.browser.get_video(ArteVideo.id2url(video.id), video) + if isinstance(video,ArteLiveVideo): + video = self.browser.get_live_video(ArteLiveVideo.id2url(video.id), video) + if 'thumbnail' in fields and video and video.thumbnail: with self.browser: video.thumbnail.data = self.browser.readurl(video.thumbnail.url) return video def iter_resources(self, objs, split_path): - if BaseVideo in objs: - collection = self.get_collection(objs, split_path) - if collection.path_level == 0: - yield self.get_collection(objs, [u'latest']) - if collection.split_path == [u'latest']: - for video in self.browser.latest_videos(): - yield video + with self.browser: + if BaseVideo in objs: + collection = self.get_collection(objs, split_path) + if collection.path_level == 0: + yield Collection([u'latest'],u'Latest Arte videos') + yield Collection([u'live'],u'Arte Web Live videos') + if collection.path_level == 1: + if collection.split_path == [u'latest']: + for video in self.browser.latest_videos(): + yield video + if collection.split_path == [u'live']: + for categorie in self.browser.get_arte_live_categories(): + yield categorie + if collection.path_level == 2: + if collection.split_path[0] == u'live': + for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename)): + yield video def validate_collection(self, objs, collection): if collection.path_level == 0: return - if BaseVideo in objs and collection.split_path == [u'latest']: - collection.title = u'Latest Arte videos' + if BaseVideo in objs and ( collection.split_path == [u'latest'] or collection.split_path == [u'live'] ): + return + if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'live' : return raise CollectionNotFound(collection.split_path) - OBJECTS = {ArteVideo: fill_video} + OBJECTS = {ArteVideo: fill_video, ArteLiveVideo: fill_video } diff --git a/modules/arte/browser.py b/modules/arte/browser.py index 57938e86..41a82096 100644 --- a/modules/arte/browser.py +++ b/modules/arte/browser.py @@ -21,7 +21,7 @@ from weboob.tools.browser import BaseBrowser from weboob.tools.browser.decorators import id2url -from .pages import IndexPage, VideoPage +from .pages import IndexPage, VideoPage, ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage from .video import ArteVideo @@ -33,7 +33,10 @@ class ArteBrowser(BaseBrowser): ENCODING = None PAGES = {r'http://videos.arte.tv/\w+/videos/toutesLesVideos.*': IndexPage, r'http://videos.arte.tv/\w+/do_search/videos/.*': IndexPage, - r'http://videos.arte.tv/\w+/videos/(?P.+)\.html': VideoPage + r'http://videos.arte.tv/\w+/videos/(?P.+)\.html': VideoPage, + r'http://liveweb.arte.tv/\w+' : ArteLivePage, + r'http://liveweb.arte.tv/\w+/cat/.*' : ArteLiveCategorieVideoPage, + r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P.+).xml' : ArteLiveVideoPage, } SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'} @@ -48,6 +51,11 @@ class ArteBrowser(BaseBrowser): self.location(url) return self.page.get_video(video, self.lang, self.quality) + def get_live_video(self, url, video=None): + self.location(url) + assert self.is_on_page(ArteLiveVideoPage) + return self.page.get_video(url, video, self.lang, self.quality) + def home(self): self.location('http://videos.arte.tv/fr/videos/toutesLesVideos') @@ -60,3 +68,13 @@ class ArteBrowser(BaseBrowser): self.home() assert self.is_on_page(IndexPage) return self.page.iter_videos() + + def get_arte_live_categories(self): + self.location('http://liveweb.arte.tv/%s' %self.lang) + assert self.is_on_page(ArteLivePage) + return self.page.iter_resources() + + def live_videos(self, url): + self.location(url) + assert self.is_on_page(ArteLiveCategorieVideoPage) + return self.page.iter_videos(self.lang) diff --git a/modules/arte/collection.py b/modules/arte/collection.py new file mode 100644 index 00000000..61a44400 --- /dev/null +++ b/modules/arte/collection.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Christophe Benz +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.collection import Collection + +__all__ = ['ArteLiveCollection'] + +class ArteLiveCollection(Collection): + @classmethod + def id2url(cls, _id): + return 'http://liveweb.arte.tv/fr/cat/%s/' % _id diff --git a/modules/arte/pages.py b/modules/arte/pages.py index fb58a04d..49416fdf 100644 --- a/modules/arte/pages.py +++ b/modules/arte/pages.py @@ -21,17 +21,114 @@ import datetime import re import urllib +import HTMLParser from weboob.tools.browser import BasePage, BrokenPageError from weboob.tools.capabilities.thumbnail import Thumbnail from weboob.capabilities import NotAvailable +from .video import ArteVideo, ArteLiveVideo +from .collection import ArteLiveCollection -from .video import ArteVideo +__all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage'] +class ArteLiveVideoPage(BasePage): + def get_video(self, url, video=None, lang='fr', quality='hd'): + if not video: + video = ArteVideo(self.group_dict['id']) -__all__ = ['IndexPage', 'VideoPage'] + HD = re.compile("(?<=)(.*)(?=)", re.DOTALL) + SD = re.compile("(?<=)(.*)(?=)", re.DOTALL) + page = self.browser.readurl(url) + urls = {} + try: + urls['hd'] = u'%s' %HD.search(page).group(0).split('?')[0] + except AttributeError: + urls['hd'] = None + try: + urls['sd'] = u'%s' %SD.search(page).group(0).split('?')[0] + except AttributeError: + urls['sd'] = None + video.url = urls[quality] + return video + +class ArteLiveCategorieVideoPage(BasePage): + def iter_videos(self, lang='fr'): + videos = list() + xml_url = (self.document.xpath('//link')[0]).attrib['href'] + datas = self.browser.readurl(xml_url) + re_items = re.compile("(.*?)", re.DOTALL) + items = re.findall(re_items, datas) + for item in items: + parsed_element = self.get_element(item, lang) + if parsed_element: + video = ArteLiveVideo(parsed_element['ID']) + video.title = parsed_element['title'] + video.description = parsed_element['pitch'] + video.author = parsed_element['author'] + if parsed_element['pict']: + video.thumbnail = Thumbnail(parsed_element['pict']) + video.set_empty_fields(NotAvailable, ('url',)) + videos.append(video) + return videos + + def get_element(self, chain, lang): + ele = {} + tt = re.compile("(?<=)(.*?)(?=)", re.DOTALL) + lk = re.compile("(?<=)(http://liveweb.arte.tv/{0}/video/.*?)" + "(?=)".format(lang), re.DOTALL) + dt = re.compile("(?<=)(.*?)(?=)", re.DOTALL) + pt = re.compile("(?<=)(.*?)(?=)", re.DOTALL) + at = re.compile("(?<=)(.*?)(?=)", re.DOTALL) + en = re.compile("", re.DOTALL) + pix = re.compile("(?<=