update arte module to use arte api

This commit is contained in:
Bezleputh 2013-09-16 20:48:58 +02:00 committed by Florent
commit a5d5011979
5 changed files with 167 additions and 155 deletions

View file

@ -31,6 +31,7 @@ from .collection import ArteLiveCollection
__all__ = ['ArteBackend'] __all__ = ['ArteBackend']
class ArteBackend(BaseBackend, ICapVideo, ICapCollection): class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
NAME = 'arte' NAME = 'arte'
MAINTAINER = u'Romain Bignon' MAINTAINER = u'Romain Bignon'
@ -38,22 +39,42 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
VERSION = '0.h' VERSION = '0.h'
DESCRIPTION = 'Arte French and German TV' DESCRIPTION = 'Arte French and German TV'
LICENSE = 'AGPLv3+' LICENSE = 'AGPLv3+'
order = {'AIRDATE_DESC': 'Date',
'VIEWS': 'Views',
'ALPHA': 'Alphabetic',
'LAST_CHANCE': 'Last chance'
}
CONFIG = BackendConfig(Value('lang', label='Lang of videos', CONFIG = BackendConfig(Value('lang', label='Lang of videos',
choices={'fr': 'French', 'de': 'Deutsch', 'en': 'English'}, default='fr'), choices={'fr': 'French', 'de': 'Deutsch', 'en': 'English'}, default='fr'),
Value('quality', label='Quality of videos', choices=['hd', 'sd'], default='hd')) Value('order', label='Sort order', choices=order, default='AIRDATE_DESC'),
Value('quality', label='Quality of videos', choices=['hd', 'sd', 'md', 'ed'], default='hd'))
TRANSLATION = {'fr': 'F',
'en': 'F',
'de': 'D',
'hd': 'HQ',
'md': 'MQ',
'sd': 'SQ',
'eq': 'EQ'
}
BROWSER = ArteBrowser BROWSER = ArteBrowser
def create_default_browser(self): def create_default_browser(self):
return self.create_browser(lang=self.config['lang'].get(), quality=self.config['quality'].get()) return self.create_browser(lang=self.TRANSLATION[self.config['lang'].get()],
quality=self.TRANSLATION[self.config['quality'].get()],
order=self.config['order'].get())
def parse_id(self, _id): def parse_id(self, _id):
m = re.match('^(\w+)\.(.*)', _id) m = re.match('^(\w+)\.(.*)', _id)
if m: if m:
return m.groups() return m.groups()
m = re.match('https?://videos.arte.tv/\w+/videos/(?P<id>.+)\.html', _id) m = re.match('https?://www.arte.tv/guide/\w+/(?P<id>.+)/(.*)', _id)
if m: if m:
return 'videos', m.group(1) return 'program', m.group(1)
m = re.match('https?://liveweb.arte.tv/\w+/video/(.*)/', _id) m = re.match('https?://liveweb.arte.tv/\w+/video/(.*)/', _id)
if m: if m:
@ -71,6 +92,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
elif site == 'live_url': elif site == 'live_url':
return self.browser.get_live_from_url(_id) return self.browser.get_live_from_url(_id)
elif site == 'program':
return self.browser.get_video_from_program_id(_id)
else: else:
return self.browser.get_video(_id) return self.browser.get_video(_id)
@ -84,9 +108,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
with self.browser: with self.browser:
site, _id = self.parse_id(video.id) site, _id = self.parse_id(video.id)
if isinstance(video,ArteVideo): if isinstance(video, ArteVideo):
video = self.browser.get_video(_id, video) video = self.browser.get_video(_id, video)
if isinstance(video,ArteLiveVideo): if isinstance(video, ArteLiveVideo):
video = self.browser.get_live_video(_id, video) video = self.browser.get_live_video(_id, video)
if 'thumbnail' in fields and video and video.thumbnail: if 'thumbnail' in fields and video and video.thumbnail:
with self.browser: with self.browser:
@ -99,26 +123,26 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
if BaseVideo in objs: if BaseVideo in objs:
collection = self.get_collection(objs, split_path) collection = self.get_collection(objs, split_path)
if collection.path_level == 0: if collection.path_level == 0:
yield Collection([u'latest'],u'Latest Arte videos') yield Collection([u'arte-latest'], u'Latest Arte videos')
yield Collection([u'live'],u'Arte Web Live videos') yield Collection([u'arte-live'], u'Arte Web Live videos')
if collection.path_level == 1: if collection.path_level == 1:
if collection.split_path == [u'latest']: if collection.split_path == [u'arte-latest']:
for video in self.browser.latest_videos(): for video in self.browser.latest_videos():
yield video yield video
if collection.split_path == [u'live']: if collection.split_path == [u'arte-live']:
for categorie in self.browser.get_arte_live_categories(): for categorie in self.browser.get_arte_live_categories():
yield categorie yield categorie
if collection.path_level == 2: if collection.path_level == 2:
if collection.split_path[0] == u'live': if collection.split_path[0] == u'arte-live':
for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.lang)): for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.LIVE_LANG[self.browser.lang])):
yield video yield video
def validate_collection(self, objs, collection): def validate_collection(self, objs, collection):
if collection.path_level == 0: if collection.path_level == 0:
return return
if BaseVideo in objs and ( collection.split_path == [u'latest'] or collection.split_path == [u'live'] ): if BaseVideo in objs and (collection.split_path == [u'arte-latest'] or collection.split_path == [u'arte-live']):
return return
if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'live' : if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'arte-live':
return return
raise CollectionNotFound(collection.split_path) raise CollectionNotFound(collection.split_path)

View file

@ -17,40 +17,53 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import datetime
import urllib
from weboob.capabilities import NotAvailable
from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.tools.json import json as simplejson
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url from weboob.tools.browser.decorators import id2url
from .pages import IndexPage, VideoPage, ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage, ArteLivePlayerPage from .pages import ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage
from .video import ArteVideo, ArteLiveVideo from .video import ArteVideo, ArteLiveVideo
__all__ = ['ArteBrowser'] __all__ = ['ArteBrowser']
class ArteBrowser(BaseBrowser): class ArteBrowser(BaseBrowser):
DOMAIN = u'videos.arte.tv' DOMAIN = u'videos.arte.tv'
ENCODING = None ENCODING = None
PAGES = {r'http://videos.arte.tv/\w+/videos/toutesLesVideos.*': IndexPage, PAGES = {r'http://liveweb.arte.tv/\w+': ArteLivePage,
r'http://videos.arte.tv/\w+/do_search/videos/.*': IndexPage, r'http://liveweb.arte.tv/\w+/cat/.*': ArteLiveCategorieVideoPage,
r'http://videos.arte.tv/\w+/videos/(?P<id>.+)\.html': VideoPage, r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml': ArteLiveVideoPage,
r'http://liveweb.arte.tv/\w+' : ArteLivePage,
r'http://liveweb.arte.tv/\w+/cat/.*' : ArteLiveCategorieVideoPage,
r'http://liveweb.arte.tv/\w+/video/.*': ArteLivePlayerPage,
r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml' : ArteLiveVideoPage,
} }
SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'} LIVE_LANG = {'F': 'fr',
'D': 'de'
}
API_URL = 'http://arte.tv/papi/tvguide'
def __init__(self, lang, quality, *args, **kwargs): def __init__(self, lang, quality, order, *args, **kwargs):
self.lang = lang self.lang = lang
self.quality = quality self.quality = quality
self.order = order
BaseBrowser.__init__(self, *args, **kwargs) BaseBrowser.__init__(self, *args, **kwargs)
@id2url(ArteVideo.id2url) @id2url(ArteVideo.id2url)
def get_video(self, url, video=None): def get_video(self, url, video=None):
self.location(url) _url = url \
return self.page.get_video(video, self.lang, self.quality) + '/' + self.quality \
+ '.json'
response = self.openurl(_url)
result = simplejson.loads(response.read(), self.ENCODING)
if video is None:
video = ArteVideo(result['video']['VID'])
video.url = u'%s' % result['video']['VSR'][0]['VUR']
return video
@id2url(ArteLiveVideo.id2url) @id2url(ArteLiveVideo.id2url)
def get_live_video(self, url, video=None): def get_live_video(self, url, video=None):
@ -61,29 +74,100 @@ class ArteBrowser(BaseBrowser):
def home(self): def home(self):
self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang) self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang)
def get_video_from_program_id(self, _id):
class_name = 'epg'
method_name = 'program'
level = 'L2'
url = self.API_URL \
+ '/' + class_name \
+ '/' + method_name \
+ '/' + self.lang \
+ '/' + level \
+ '/' + _id \
+ '.json'
response = self.openurl(url)
result = simplejson.loads(response.read(), self.ENCODING)
video = self.create_video(result['abstractProgram']['VDO'])
return self.get_video(video.id, video)
def search_videos(self, pattern): def search_videos(self, pattern):
self.location(self.buildurl('/%s/do_search/videos/%s' % (self.lang, self.SEARCH_LANG[self.lang]), q=pattern.encode('utf-8'))) class_name = 'videos/plus7'
assert self.is_on_page(IndexPage) method_name = 'search'
return self.page.iter_videos() level = 'L1'
cluster = 'ALL'
channel = 'ALL'
limit = '10'
offset = '0'
url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset, pattern)
response = self.openurl(url)
result = simplejson.loads(response.read(), self.ENCODING)
return self.create_video_from_plus7(result['videoList'])
def create_video_from_plus7(self, result):
for item in result:
yield self.create_video(item)
def create_video(self, item):
video = ArteVideo(item['VID'])
if 'VSU' in item:
video.title = u'%s : %s' % (item['VTI'], item['VSU'])
else:
video.title = u'%s' % (item['VTI'])
video.rating = int(item['VRT'])
video.thumbnail = Thumbnail(u'%s' % item['programImage'])
video.duration = datetime.timedelta(seconds=int(item['videoDurationSeconds']))
video.set_empty_fields(NotAvailable, ('url',))
video.description = u'%s' % item['VDE']
m = re.match('(\d{2})\s(\d{2})\s(\d{4})(.*?)', item['VDA'])
if m:
dd = int(m.group(1))
mm = int(m.group(2))
yyyy = int(m.group(3))
video.date = datetime.date(yyyy, mm, dd)
return video
def create_url_plus7(self, class_name, method_name, level, cluster, channel, limit, offset, pattern=None):
url = self.API_URL \
+ '/' + class_name \
+ '/' + method_name \
+ '/' + self.lang \
+ '/' + level
if pattern:
url += '/' + urllib.quote(pattern)
url += '/' + channel \
+ '/' + cluster \
+ '/' + '-1' \
+ '/' + self.order \
+ '/' + limit \
+ '/' + offset \
+ '.json'
return url
def latest_videos(self): def latest_videos(self):
self.home() class_name = 'videos'
assert self.is_on_page(IndexPage) method_name = 'plus7'
return self.page.iter_videos() level = 'L1'
cluster = 'ALL'
channel = 'ALL'
limit = '10'
offset = '0'
url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset)
response = self.openurl(url)
result = simplejson.loads(response.read(), self.ENCODING)
return self.create_video_from_plus7(result['videoList'])
def get_arte_live_categories(self): def get_arte_live_categories(self):
self.location('http://liveweb.arte.tv/%s' %self.lang) self.location('http://liveweb.arte.tv/%s' % self.LIVE_LANG[self.lang])
assert self.is_on_page(ArteLivePage) assert self.is_on_page(ArteLivePage)
return self.page.iter_resources() return self.page.iter_resources()
def live_videos(self, url): def live_videos(self, url):
self.location(url) self.location(url)
assert self.is_on_page(ArteLiveCategorieVideoPage) assert self.is_on_page(ArteLiveCategorieVideoPage)
return self.page.iter_videos(self.lang) return self.page.iter_videos(self.LIVE_LANG[self.lang])
def get_live_from_url(self, url):
self.location(url)
assert self.is_on_page(ArteLivePlayerPage)
_id = self.page.retrieve_id()
if _id:
return self.get_live_video(_id)

View file

@ -18,19 +18,17 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
import re import re
import urllib
import HTMLParser import HTMLParser
from weboob.tools.browser import BasePage, BrokenPageError from weboob.tools.browser import BasePage
from weboob.tools.capabilities.thumbnail import Thumbnail from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.capabilities import NotAvailable from weboob.capabilities import NotAvailable
from .video import ArteVideo, ArteLiveVideo from .video import ArteLiveVideo
from .collection import ArteLiveCollection from .collection import ArteLiveCollection
__all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage'] __all__ = ['ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
class ArteLiveVideoPage(BasePage): class ArteLiveVideoPage(BasePage):
@ -44,9 +42,9 @@ class ArteLiveVideoPage(BasePage):
urls[url.tag[-2:]] = url.text urls[url.tag[-2:]] = url.text
if quality in urls: if quality in urls:
video.url = urls[quality] video.url = u'%s' % urls[quality]
else: else:
video.url = urls.popitem()[1] video.url = u'%s' % urls.popitem()[1]
return video return video
@ -127,98 +125,3 @@ class ArteLivePage(BasePage):
item = ArteLiveCollection([u'live', u'%s' % _id], u'%s' % (el.find('a').text)) item = ArteLiveCollection([u'live', u'%s' % _id], u'%s' % (el.find('a').text))
items.append(item) items.append(item)
return items return items
class IndexPage(BasePage):
def iter_videos(self):
videos = self.document.getroot().cssselect("div[class=video]")
for div in videos:
title = div.find('h2').find('a').text
m = re.match(r'/(fr|de|en)/videos/(.*)\.html', div.find('h2').find('a').attrib['href'])
_id = ''
if m:
_id = m.group(2)
rating = rating_max = 0
rates = self.parser.select(div, 'div[class=rateContainer]', 1)
for r in rates.findall('div'):
if 'star-rating-on' in r.attrib['class']:
rating += 1
rating_max += 1
video = ArteVideo(_id)
video.title = unicode(title)
video.rating = rating
video.rating_max = rating_max
thumb = self.parser.select(div, 'img[class=thumbnail]', 1)
video.thumbnail = Thumbnail(u'http://videos.arte.tv' + thumb.attrib['src'])
try:
parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':')
if len(parts) == 2:
hours = 0
minutes, seconds = parts
elif len(parts) == 3:
hours, minutes, seconds = parts
else:
raise BrokenPageError('Unable to parse duration %r' % parts)
except BrokenPageError:
pass
else:
video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
video.set_empty_fields(NotAvailable, ('url',))
yield video
class VideoPage(BasePage):
def get_video(self, video=None, lang='fr', quality='hd'):
if not video:
video = ArteVideo(self.group_dict['id'])
video.title = unicode(self.get_title())
video.url = unicode(self.get_url(lang, quality))
video.set_empty_fields(NotAvailable)
return video
def get_title(self):
return self.document.getroot().cssselect('h1')[0].text
def get_url(self, lang, quality):
obj = self.parser.select(self.document.getroot(), 'object', 1)
movie_url = self.parser.select(obj, 'param[name=movie]', 1)
xml_url = urllib.unquote(movie_url.attrib['value'].split('videorefFileUrl=')[-1])
doc = self.browser.get_document(self.browser.openurl(xml_url))
videos_list = self.parser.select(doc.getroot(), 'video')
videos = {}
for v in videos_list:
videos[v.attrib['lang']] = v.attrib['ref']
if lang in videos:
xml_url = videos[lang]
else:
xml_url = videos.popitem()[1]
doc = self.browser.get_document(self.browser.openurl(xml_url))
obj = self.parser.select(doc.getroot(), 'urls', 1)
videos_list = self.parser.select(obj, 'url')
urls = {}
for v in videos_list:
urls[v.attrib['quality']] = v.text
if quality in urls:
video_url = urls[quality]
else:
video_url = urls.popitem()[1]
return video_url
class ArteLivePlayerPage(BasePage):
def retrieve_id(self):
player_url = self.document.xpath('//div[@class="flash"]/div/object/param')[0].attrib['value']
_id = re.match('(.*)&eventId=(\d*)&(.*)', player_url)
if _id:
return u'%s' % _id.group(2)

View file

@ -30,20 +30,20 @@ class ArteTest(BackendTest):
if len(l) > 0: if len(l) > 0:
v = l[0] v = l[0]
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
def test_live(self): def test_live(self):
l1 = list(self.backend.iter_resources([BaseVideo], [u'live'])) l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-live']))
assert len(l1) assert len(l1)
l2 = list(self.backend.iter_resources([BaseVideo], [u'live',u'%s'%l1[0]])) l2 = list(self.backend.iter_resources([BaseVideo], [u'arte-live', u'%s' % l1[0]]))
assert len(l2) assert len(l2)
v = l2[0] v = l2[0]
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
def test_latest(self): def test_latest(self):
l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) l = list(self.backend.iter_resources([BaseVideo], [u'arte-latest']))
assert len(l) assert len(l)
v = l[0] v = l[0]
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))

View file

@ -21,13 +21,14 @@
from weboob.capabilities.video import BaseVideo from weboob.capabilities.video import BaseVideo
__all__ = ['ArteVideo','ArteLiveVideo'] __all__ = ['ArteVideo', 'ArteLiveVideo']
class ArteVideo(BaseVideo): class ArteVideo(BaseVideo):
@classmethod @classmethod
def id2url(cls, _id): def id2url(cls, _id):
return 'http://videos.arte.tv/fr/videos/%s.html' % _id lang = _id[-1:]
return 'http://arte.tv/papi/tvguide/videos/stream/%s/%s/HBBTV' % (lang, _id)
class ArteLiveVideo(BaseVideo): class ArteLiveVideo(BaseVideo):