update arte module to use arte api

This commit is contained in:
Bezleputh 2013-09-16 20:48:58 +02:00 committed by Florent
commit a5d5011979
5 changed files with 167 additions and 155 deletions

View file

@ -31,6 +31,7 @@ from .collection import ArteLiveCollection
__all__ = ['ArteBackend']
class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
NAME = 'arte'
MAINTAINER = u'Romain Bignon'
@ -38,22 +39,42 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
VERSION = '0.h'
DESCRIPTION = 'Arte French and German TV'
LICENSE = 'AGPLv3+'
order = {'AIRDATE_DESC': 'Date',
'VIEWS': 'Views',
'ALPHA': 'Alphabetic',
'LAST_CHANCE': 'Last chance'
}
CONFIG = BackendConfig(Value('lang', label='Lang of videos',
choices={'fr': 'French', 'de': 'Deutsch', 'en': 'English'}, default='fr'),
Value('quality', label='Quality of videos', choices=['hd', 'sd'], default='hd'))
Value('order', label='Sort order', choices=order, default='AIRDATE_DESC'),
Value('quality', label='Quality of videos', choices=['hd', 'sd', 'md', 'ed'], default='hd'))
TRANSLATION = {'fr': 'F',
'en': 'F',
'de': 'D',
'hd': 'HQ',
'md': 'MQ',
'sd': 'SQ',
'eq': 'EQ'
}
BROWSER = ArteBrowser
def create_default_browser(self):
return self.create_browser(lang=self.config['lang'].get(), quality=self.config['quality'].get())
return self.create_browser(lang=self.TRANSLATION[self.config['lang'].get()],
quality=self.TRANSLATION[self.config['quality'].get()],
order=self.config['order'].get())
def parse_id(self, _id):
m = re.match('^(\w+)\.(.*)', _id)
if m:
return m.groups()
m = re.match('https?://videos.arte.tv/\w+/videos/(?P<id>.+)\.html', _id)
m = re.match('https?://www.arte.tv/guide/\w+/(?P<id>.+)/(.*)', _id)
if m:
return 'videos', m.group(1)
return 'program', m.group(1)
m = re.match('https?://liveweb.arte.tv/\w+/video/(.*)/', _id)
if m:
@ -71,6 +92,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
elif site == 'live_url':
return self.browser.get_live_from_url(_id)
elif site == 'program':
return self.browser.get_video_from_program_id(_id)
else:
return self.browser.get_video(_id)
@ -84,9 +108,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
with self.browser:
site, _id = self.parse_id(video.id)
if isinstance(video,ArteVideo):
if isinstance(video, ArteVideo):
video = self.browser.get_video(_id, video)
if isinstance(video,ArteLiveVideo):
if isinstance(video, ArteLiveVideo):
video = self.browser.get_live_video(_id, video)
if 'thumbnail' in fields and video and video.thumbnail:
with self.browser:
@ -99,26 +123,26 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
if BaseVideo in objs:
collection = self.get_collection(objs, split_path)
if collection.path_level == 0:
yield Collection([u'latest'],u'Latest Arte videos')
yield Collection([u'live'],u'Arte Web Live videos')
yield Collection([u'arte-latest'], u'Latest Arte videos')
yield Collection([u'arte-live'], u'Arte Web Live videos')
if collection.path_level == 1:
if collection.split_path == [u'latest']:
if collection.split_path == [u'arte-latest']:
for video in self.browser.latest_videos():
yield video
if collection.split_path == [u'live']:
if collection.split_path == [u'arte-live']:
for categorie in self.browser.get_arte_live_categories():
yield categorie
if collection.path_level == 2:
if collection.split_path[0] == u'live':
for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.lang)):
if collection.split_path[0] == u'arte-live':
for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.LIVE_LANG[self.browser.lang])):
yield video
def validate_collection(self, objs, collection):
if collection.path_level == 0:
return
if BaseVideo in objs and ( collection.split_path == [u'latest'] or collection.split_path == [u'live'] ):
if BaseVideo in objs and (collection.split_path == [u'arte-latest'] or collection.split_path == [u'arte-live']):
return
if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'live' :
if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'arte-live':
return
raise CollectionNotFound(collection.split_path)

View file

@ -17,40 +17,53 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import datetime
import urllib
from weboob.capabilities import NotAvailable
from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.tools.json import json as simplejson
from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url
from .pages import IndexPage, VideoPage, ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage, ArteLivePlayerPage
from .pages import ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage
from .video import ArteVideo, ArteLiveVideo
__all__ = ['ArteBrowser']
class ArteBrowser(BaseBrowser):
DOMAIN = u'videos.arte.tv'
ENCODING = None
PAGES = {r'http://videos.arte.tv/\w+/videos/toutesLesVideos.*': IndexPage,
r'http://videos.arte.tv/\w+/do_search/videos/.*': IndexPage,
r'http://videos.arte.tv/\w+/videos/(?P<id>.+)\.html': VideoPage,
r'http://liveweb.arte.tv/\w+' : ArteLivePage,
r'http://liveweb.arte.tv/\w+/cat/.*' : ArteLiveCategorieVideoPage,
r'http://liveweb.arte.tv/\w+/video/.*': ArteLivePlayerPage,
r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml' : ArteLiveVideoPage,
}
PAGES = {r'http://liveweb.arte.tv/\w+': ArteLivePage,
r'http://liveweb.arte.tv/\w+/cat/.*': ArteLiveCategorieVideoPage,
r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml': ArteLiveVideoPage,
}
SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'}
LIVE_LANG = {'F': 'fr',
'D': 'de'
}
API_URL = 'http://arte.tv/papi/tvguide'
def __init__(self, lang, quality, *args, **kwargs):
def __init__(self, lang, quality, order, *args, **kwargs):
self.lang = lang
self.quality = quality
self.order = order
BaseBrowser.__init__(self, *args, **kwargs)
@id2url(ArteVideo.id2url)
def get_video(self, url, video=None):
self.location(url)
return self.page.get_video(video, self.lang, self.quality)
_url = url \
+ '/' + self.quality \
+ '.json'
response = self.openurl(_url)
result = simplejson.loads(response.read(), self.ENCODING)
if video is None:
video = ArteVideo(result['video']['VID'])
video.url = u'%s' % result['video']['VSR'][0]['VUR']
return video
@id2url(ArteLiveVideo.id2url)
def get_live_video(self, url, video=None):
@ -61,29 +74,100 @@ class ArteBrowser(BaseBrowser):
def home(self):
self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang)
def get_video_from_program_id(self, _id):
class_name = 'epg'
method_name = 'program'
level = 'L2'
url = self.API_URL \
+ '/' + class_name \
+ '/' + method_name \
+ '/' + self.lang \
+ '/' + level \
+ '/' + _id \
+ '.json'
response = self.openurl(url)
result = simplejson.loads(response.read(), self.ENCODING)
video = self.create_video(result['abstractProgram']['VDO'])
return self.get_video(video.id, video)
def search_videos(self, pattern):
self.location(self.buildurl('/%s/do_search/videos/%s' % (self.lang, self.SEARCH_LANG[self.lang]), q=pattern.encode('utf-8')))
assert self.is_on_page(IndexPage)
return self.page.iter_videos()
class_name = 'videos/plus7'
method_name = 'search'
level = 'L1'
cluster = 'ALL'
channel = 'ALL'
limit = '10'
offset = '0'
url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset, pattern)
response = self.openurl(url)
result = simplejson.loads(response.read(), self.ENCODING)
return self.create_video_from_plus7(result['videoList'])
def create_video_from_plus7(self, result):
for item in result:
yield self.create_video(item)
def create_video(self, item):
video = ArteVideo(item['VID'])
if 'VSU' in item:
video.title = u'%s : %s' % (item['VTI'], item['VSU'])
else:
video.title = u'%s' % (item['VTI'])
video.rating = int(item['VRT'])
video.thumbnail = Thumbnail(u'%s' % item['programImage'])
video.duration = datetime.timedelta(seconds=int(item['videoDurationSeconds']))
video.set_empty_fields(NotAvailable, ('url',))
video.description = u'%s' % item['VDE']
m = re.match('(\d{2})\s(\d{2})\s(\d{4})(.*?)', item['VDA'])
if m:
dd = int(m.group(1))
mm = int(m.group(2))
yyyy = int(m.group(3))
video.date = datetime.date(yyyy, mm, dd)
return video
def create_url_plus7(self, class_name, method_name, level, cluster, channel, limit, offset, pattern=None):
url = self.API_URL \
+ '/' + class_name \
+ '/' + method_name \
+ '/' + self.lang \
+ '/' + level
if pattern:
url += '/' + urllib.quote(pattern)
url += '/' + channel \
+ '/' + cluster \
+ '/' + '-1' \
+ '/' + self.order \
+ '/' + limit \
+ '/' + offset \
+ '.json'
return url
def latest_videos(self):
self.home()
assert self.is_on_page(IndexPage)
return self.page.iter_videos()
class_name = 'videos'
method_name = 'plus7'
level = 'L1'
cluster = 'ALL'
channel = 'ALL'
limit = '10'
offset = '0'
url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset)
response = self.openurl(url)
result = simplejson.loads(response.read(), self.ENCODING)
return self.create_video_from_plus7(result['videoList'])
def get_arte_live_categories(self):
self.location('http://liveweb.arte.tv/%s' %self.lang)
self.location('http://liveweb.arte.tv/%s' % self.LIVE_LANG[self.lang])
assert self.is_on_page(ArteLivePage)
return self.page.iter_resources()
def live_videos(self, url):
self.location(url)
assert self.is_on_page(ArteLiveCategorieVideoPage)
return self.page.iter_videos(self.lang)
def get_live_from_url(self, url):
self.location(url)
assert self.is_on_page(ArteLivePlayerPage)
_id = self.page.retrieve_id()
if _id:
return self.get_live_video(_id)
return self.page.iter_videos(self.LIVE_LANG[self.lang])

View file

@ -18,19 +18,17 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
import re
import urllib
import HTMLParser
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.browser import BasePage
from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.capabilities import NotAvailable
from .video import ArteVideo, ArteLiveVideo
from .video import ArteLiveVideo
from .collection import ArteLiveCollection
__all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
__all__ = ['ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
class ArteLiveVideoPage(BasePage):
@ -44,9 +42,9 @@ class ArteLiveVideoPage(BasePage):
urls[url.tag[-2:]] = url.text
if quality in urls:
video.url = urls[quality]
video.url = u'%s' % urls[quality]
else:
video.url = urls.popitem()[1]
video.url = u'%s' % urls.popitem()[1]
return video
@ -127,98 +125,3 @@ class ArteLivePage(BasePage):
item = ArteLiveCollection([u'live', u'%s' % _id], u'%s' % (el.find('a').text))
items.append(item)
return items
class IndexPage(BasePage):
def iter_videos(self):
videos = self.document.getroot().cssselect("div[class=video]")
for div in videos:
title = div.find('h2').find('a').text
m = re.match(r'/(fr|de|en)/videos/(.*)\.html', div.find('h2').find('a').attrib['href'])
_id = ''
if m:
_id = m.group(2)
rating = rating_max = 0
rates = self.parser.select(div, 'div[class=rateContainer]', 1)
for r in rates.findall('div'):
if 'star-rating-on' in r.attrib['class']:
rating += 1
rating_max += 1
video = ArteVideo(_id)
video.title = unicode(title)
video.rating = rating
video.rating_max = rating_max
thumb = self.parser.select(div, 'img[class=thumbnail]', 1)
video.thumbnail = Thumbnail(u'http://videos.arte.tv' + thumb.attrib['src'])
try:
parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':')
if len(parts) == 2:
hours = 0
minutes, seconds = parts
elif len(parts) == 3:
hours, minutes, seconds = parts
else:
raise BrokenPageError('Unable to parse duration %r' % parts)
except BrokenPageError:
pass
else:
video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
video.set_empty_fields(NotAvailable, ('url',))
yield video
class VideoPage(BasePage):
def get_video(self, video=None, lang='fr', quality='hd'):
if not video:
video = ArteVideo(self.group_dict['id'])
video.title = unicode(self.get_title())
video.url = unicode(self.get_url(lang, quality))
video.set_empty_fields(NotAvailable)
return video
def get_title(self):
return self.document.getroot().cssselect('h1')[0].text
def get_url(self, lang, quality):
obj = self.parser.select(self.document.getroot(), 'object', 1)
movie_url = self.parser.select(obj, 'param[name=movie]', 1)
xml_url = urllib.unquote(movie_url.attrib['value'].split('videorefFileUrl=')[-1])
doc = self.browser.get_document(self.browser.openurl(xml_url))
videos_list = self.parser.select(doc.getroot(), 'video')
videos = {}
for v in videos_list:
videos[v.attrib['lang']] = v.attrib['ref']
if lang in videos:
xml_url = videos[lang]
else:
xml_url = videos.popitem()[1]
doc = self.browser.get_document(self.browser.openurl(xml_url))
obj = self.parser.select(doc.getroot(), 'urls', 1)
videos_list = self.parser.select(obj, 'url')
urls = {}
for v in videos_list:
urls[v.attrib['quality']] = v.text
if quality in urls:
video_url = urls[quality]
else:
video_url = urls.popitem()[1]
return video_url
class ArteLivePlayerPage(BasePage):
def retrieve_id(self):
player_url = self.document.xpath('//div[@class="flash"]/div/object/param')[0].attrib['value']
_id = re.match('(.*)&eventId=(\d*)&(.*)', player_url)
if _id:
return u'%s' % _id.group(2)

View file

@ -30,20 +30,20 @@ class ArteTest(BackendTest):
if len(l) > 0:
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
def test_live(self):
l1 = list(self.backend.iter_resources([BaseVideo], [u'live']))
l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-live']))
assert len(l1)
l2 = list(self.backend.iter_resources([BaseVideo], [u'live',u'%s'%l1[0]]))
l2 = list(self.backend.iter_resources([BaseVideo], [u'arte-live', u'%s' % l1[0]]))
assert len(l2)
v = l2[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
def test_latest(self):
l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
l = list(self.backend.iter_resources([BaseVideo], [u'arte-latest']))
assert len(l)
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))

View file

@ -21,13 +21,14 @@
from weboob.capabilities.video import BaseVideo
__all__ = ['ArteVideo','ArteLiveVideo']
__all__ = ['ArteVideo', 'ArteLiveVideo']
class ArteVideo(BaseVideo):
@classmethod
def id2url(cls, _id):
return 'http://videos.arte.tv/fr/videos/%s.html' % _id
lang = _id[-1:]
return 'http://arte.tv/papi/tvguide/videos/stream/%s/%s/HBBTV' % (lang, _id)
class ArteLiveVideo(BaseVideo):