ajout de la prise en charge de Arte Live Web par le module arte

Signed-off-by: Bezleputh <carton_ben@yahoo.fr>
Signed-off-by: Romain Bignon <romain@budget-insight.com>
This commit is contained in:
Bezleputh 2013-05-13 21:37:01 +02:00 committed by Romain Bignon
commit d1fd5de309
6 changed files with 191 additions and 21 deletions

View file

@ -21,17 +21,16 @@
from __future__ import with_statement from __future__ import with_statement
from weboob.capabilities.video import ICapVideo, BaseVideo from weboob.capabilities.video import ICapVideo, BaseVideo
from weboob.capabilities.collection import ICapCollection, CollectionNotFound from weboob.capabilities.collection import ICapCollection, CollectionNotFound, Collection
from weboob.tools.backend import BaseBackend, BackendConfig from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.tools.value import Value from weboob.tools.value import Value
from .browser import ArteBrowser from .browser import ArteBrowser
from .video import ArteVideo from .video import ArteVideo, ArteLiveVideo
from .collection import ArteLiveCollection
__all__ = ['ArteBackend'] __all__ = ['ArteBackend']
class ArteBackend(BaseBackend, ICapVideo, ICapCollection): class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
NAME = 'arte' NAME = 'arte'
MAINTAINER = u'Romain Bignon' MAINTAINER = u'Romain Bignon'
@ -59,28 +58,42 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
if fields != ['thumbnail']: if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields # if we don't want only the thumbnail, we probably want also every fields
with self.browser: with self.browser:
video = self.browser.get_video(ArteVideo.id2url(video.id), video) if isinstance(video,ArteVideo):
if 'thumbnail' in fields and video.thumbnail: video = self.browser.get_video(ArteVideo.id2url(video.id), video)
if isinstance(video,ArteLiveVideo):
video = self.browser.get_live_video(ArteLiveVideo.id2url(video.id), video)
if 'thumbnail' in fields and video and video.thumbnail:
with self.browser: with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url) video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
return video return video
def iter_resources(self, objs, split_path): def iter_resources(self, objs, split_path):
if BaseVideo in objs: with self.browser:
collection = self.get_collection(objs, split_path) if BaseVideo in objs:
if collection.path_level == 0: collection = self.get_collection(objs, split_path)
yield self.get_collection(objs, [u'latest']) if collection.path_level == 0:
if collection.split_path == [u'latest']: yield Collection([u'latest'],u'Latest Arte videos')
for video in self.browser.latest_videos(): yield Collection([u'live'],u'Arte Web Live videos')
yield video if collection.path_level == 1:
if collection.split_path == [u'latest']:
for video in self.browser.latest_videos():
yield video
if collection.split_path == [u'live']:
for categorie in self.browser.get_arte_live_categories():
yield categorie
if collection.path_level == 2:
if collection.split_path[0] == u'live':
for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename)):
yield video
def validate_collection(self, objs, collection): def validate_collection(self, objs, collection):
if collection.path_level == 0: if collection.path_level == 0:
return return
if BaseVideo in objs and collection.split_path == [u'latest']: if BaseVideo in objs and ( collection.split_path == [u'latest'] or collection.split_path == [u'live'] ):
collection.title = u'Latest Arte videos' return
if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'live' :
return return
raise CollectionNotFound(collection.split_path) raise CollectionNotFound(collection.split_path)
OBJECTS = {ArteVideo: fill_video} OBJECTS = {ArteVideo: fill_video, ArteLiveVideo: fill_video }

View file

@ -21,7 +21,7 @@
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url from weboob.tools.browser.decorators import id2url
from .pages import IndexPage, VideoPage from .pages import IndexPage, VideoPage, ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage
from .video import ArteVideo from .video import ArteVideo
@ -33,7 +33,10 @@ class ArteBrowser(BaseBrowser):
ENCODING = None ENCODING = None
PAGES = {r'http://videos.arte.tv/\w+/videos/toutesLesVideos.*': IndexPage, PAGES = {r'http://videos.arte.tv/\w+/videos/toutesLesVideos.*': IndexPage,
r'http://videos.arte.tv/\w+/do_search/videos/.*': IndexPage, r'http://videos.arte.tv/\w+/do_search/videos/.*': IndexPage,
r'http://videos.arte.tv/\w+/videos/(?P<id>.+)\.html': VideoPage r'http://videos.arte.tv/\w+/videos/(?P<id>.+)\.html': VideoPage,
r'http://liveweb.arte.tv/\w+' : ArteLivePage,
r'http://liveweb.arte.tv/\w+/cat/.*' : ArteLiveCategorieVideoPage,
r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml' : ArteLiveVideoPage,
} }
SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'} SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'}
@ -48,6 +51,11 @@ class ArteBrowser(BaseBrowser):
self.location(url) self.location(url)
return self.page.get_video(video, self.lang, self.quality) return self.page.get_video(video, self.lang, self.quality)
def get_live_video(self, url, video=None):
self.location(url)
assert self.is_on_page(ArteLiveVideoPage)
return self.page.get_video(url, video, self.lang, self.quality)
def home(self): def home(self):
self.location('http://videos.arte.tv/fr/videos/toutesLesVideos') self.location('http://videos.arte.tv/fr/videos/toutesLesVideos')
@ -60,3 +68,13 @@ class ArteBrowser(BaseBrowser):
self.home() self.home()
assert self.is_on_page(IndexPage) assert self.is_on_page(IndexPage)
return self.page.iter_videos() return self.page.iter_videos()
def get_arte_live_categories(self):
self.location('http://liveweb.arte.tv/%s' %self.lang)
assert self.is_on_page(ArteLivePage)
return self.page.iter_resources()
def live_videos(self, url):
self.location(url)
assert self.is_on_page(ArteLiveCategorieVideoPage)
return self.page.iter_videos(self.lang)

View file

@ -0,0 +1,27 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Christophe Benz
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.collection import Collection
__all__ = ['ArteLiveCollection']
class ArteLiveCollection(Collection):
@classmethod
def id2url(cls, _id):
return 'http://liveweb.arte.tv/fr/cat/%s/' % _id

View file

@ -21,17 +21,114 @@
import datetime import datetime
import re import re
import urllib import urllib
import HTMLParser
from weboob.tools.browser import BasePage, BrokenPageError from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.capabilities.thumbnail import Thumbnail from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.capabilities import NotAvailable from weboob.capabilities import NotAvailable
from .video import ArteVideo, ArteLiveVideo
from .collection import ArteLiveCollection
from .video import ArteVideo __all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
class ArteLiveVideoPage(BasePage):
def get_video(self, url, video=None, lang='fr', quality='hd'):
if not video:
video = ArteVideo(self.group_dict['id'])
__all__ = ['IndexPage', 'VideoPage'] HD = re.compile("(?<=<urlHd>)(.*)(?=</urlHd>)", re.DOTALL)
SD = re.compile("(?<=<urlSd>)(.*)(?=</urlSd>)", re.DOTALL)
page = self.browser.readurl(url)
urls = {}
try:
urls['hd'] = u'%s' %HD.search(page).group(0).split('?')[0]
except AttributeError:
urls['hd'] = None
try:
urls['sd'] = u'%s' %SD.search(page).group(0).split('?')[0]
except AttributeError:
urls['sd'] = None
video.url = urls[quality]
return video
class ArteLiveCategorieVideoPage(BasePage):
def iter_videos(self, lang='fr'):
videos = list()
xml_url = (self.document.xpath('//link')[0]).attrib['href']
datas = self.browser.readurl(xml_url)
re_items = re.compile("(<item>.*?</item>)", re.DOTALL)
items = re.findall(re_items, datas)
for item in items:
parsed_element = self.get_element(item, lang)
if parsed_element:
video = ArteLiveVideo(parsed_element['ID'])
video.title = parsed_element['title']
video.description = parsed_element['pitch']
video.author = parsed_element['author']
if parsed_element['pict']:
video.thumbnail = Thumbnail(parsed_element['pict'])
video.set_empty_fields(NotAvailable, ('url',))
videos.append(video)
return videos
def get_element(self, chain, lang):
ele = {}
tt = re.compile("(?<=<title>)(.*?)(?=</title>)", re.DOTALL)
lk = re.compile("(?<=<link>)(http://liveweb.arte.tv/{0}/video/.*?)"
"(?=</link>)".format(lang), re.DOTALL)
dt = re.compile("(?<=<pubDate>)(.*?)(?=</pubDate>)", re.DOTALL)
pt = re.compile("(?<=<description>)(.*?)(?=</description>)", re.DOTALL)
at = re.compile("(?<=<author>)(.*?)(?=</author>)", re.DOTALL)
en = re.compile("<enclosure.*?/event/.*?/(.*?)-.*?/>", re.DOTALL)
pix = re.compile("(?<=<enclosure url=\")(.*?)(?=\" type=\"image/)", re.DOTALL)
try:
ele['link'] = lk.search(chain).group(0)
except:
return None
try:
ele['ID'] = int(en.search(chain).group(1))
except:
return None
try:
s = tt.search(chain).group(0)
ele['title'] = s.decode('utf-8', 'replace')
except:
ele['title'] = "No title"
try:
s = (dt.search(chain).group(0))
ele['date'] = s.decode('utf-8', 'replace')
except:
ele['date'] = "No date"
try:
s = (pt.search(chain).group(0))
s = HTMLParser.HTMLParser().unescape(s);
ele['pitch'] = HTMLParser.HTMLParser().unescape(s);
except:
ele['pitch'] = "No description"
try:
s = (at.search(chain).group(0))
ele['author'] = s.decode('utf-8', 'replace')
except:
ele['author'] = "Unknow"
try:
ele['pict'] = pix.search(chain).group(0)
except:
ele['pict'] = None
return ele
class ArteLivePage(BasePage):
def iter_resources(self):
items = list()
for el in self.document.xpath('//ul[@id="categoryArray"]/li'):
m = re.match(r'http://liveweb.arte.tv/*', el.find('a').attrib['href'])
if m:
url = u'%s' %el.find('a').attrib['href']
_id = url.split('/')[-2:-1][0]
item = ArteLiveCollection([u'live', u'%s'%_id], u'%s' %(el.find('a').text))
items.append(item)
return items
class IndexPage(BasePage): class IndexPage(BasePage):
def iter_videos(self): def iter_videos(self):

View file

@ -32,6 +32,15 @@ class ArteTest(BackendTest):
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_live(self):
l1 = list(self.backend.iter_resources([BaseVideo], [u'live']))
assert len(l1)
l2 = list(self.backend.iter_resources([BaseVideo], [u'live',u'%s'%l1[0]]))
assert len(l2)
v = l2[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_latest(self): def test_latest(self):
l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
assert len(l) assert len(l)

View file

@ -21,10 +21,16 @@
from weboob.capabilities.video import BaseVideo from weboob.capabilities.video import BaseVideo
__all__ = ['ArteVideo'] __all__ = ['ArteVideo','ArteLiveVideo']
class ArteVideo(BaseVideo): class ArteVideo(BaseVideo):
@classmethod @classmethod
def id2url(cls, _id): def id2url(cls, _id):
return 'http://videos.arte.tv/fr/videos/%s.html' % _id return 'http://videos.arte.tv/fr/videos/%s.html' % _id
class ArteLiveVideo(BaseVideo):
@classmethod
def id2url(cls, _id):
return 'http://arte.vo.llnwd.net/o21/liveweb/events/event-%s.xml' % _id