[arte] fix : arte-live site changes

This commit is contained in:
Bezleputh 2014-02-06 19:27:47 +01:00
commit d61d3ba6a4
6 changed files with 88 additions and 136 deletions

View file

@ -27,7 +27,7 @@ from weboob.tools.value import Value
from .browser import ArteBrowser from .browser import ArteBrowser
from .video import ArteVideo, ArteLiveVideo from .video import ArteVideo, ArteLiveVideo
from .collection import ArteLiveCollection
__all__ = ['ArteBackend'] __all__ = ['ArteBackend']
@ -76,9 +76,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
if m: if m:
return 'program', m.group(1) return 'program', m.group(1)
m = re.match('https?://liveweb.arte.tv/\w+/video/(.*)/', _id) m = re.match('https?://concert.arte.tv/(\w+)/(.*)', _id)
if m: if m:
return 'live_url', _id return 'live', '/%s/%s' % (m.group(1), m.group(2))
return 'videos', _id return 'videos', _id
@ -89,9 +89,6 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
if site == 'live': if site == 'live':
return self.browser.get_live_video(_id) return self.browser.get_live_video(_id)
elif site == 'live_url':
return self.browser.get_live_from_url(_id)
elif site == 'program': elif site == 'program':
return self.browser.get_video_from_program_id(_id) return self.browser.get_video_from_program_id(_id)
@ -134,7 +131,7 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
yield categorie yield categorie
if collection.path_level == 2: if collection.path_level == 2:
if collection.split_path[0] == u'arte-live': if collection.split_path[0] == u'arte-live':
for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.LIVE_LANG[self.browser.lang])): for video in self.browser.live_videos(collection.basename):
yield video yield video
def validate_collection(self, objs, collection): def validate_collection(self, objs, collection):

View file

@ -27,7 +27,7 @@ from weboob.tools.json import json as simplejson
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url from weboob.tools.browser.decorators import id2url
from .pages import ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage from .pages import ArteLivePage, ArteLiveVideoPage
from .video import ArteVideo, ArteLiveVideo from .video import ArteVideo, ArteLiveVideo
__all__ = ['ArteBrowser'] __all__ = ['ArteBrowser']
@ -36,14 +36,14 @@ __all__ = ['ArteBrowser']
class ArteBrowser(BaseBrowser): class ArteBrowser(BaseBrowser):
DOMAIN = u'videos.arte.tv' DOMAIN = u'videos.arte.tv'
ENCODING = None ENCODING = None
PAGES = {r'http://liveweb.arte.tv/\w+': ArteLivePage, PAGES = {r'http://concert.arte.tv/\w+': ArteLivePage,
r'http://liveweb.arte.tv/\w+/cat/.*': ArteLiveCategorieVideoPage, r'http://concert.arte.tv/(?P<id>.+)': ArteLiveVideoPage,
r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml': ArteLiveVideoPage, }
}
LIVE_LANG = {'F': 'fr', LIVE_LANG = {'F': 'fr',
'D': 'de' 'D': 'de'
} }
API_URL = 'http://arte.tv/papi/tvguide' API_URL = 'http://arte.tv/papi/tvguide'
def __init__(self, lang, quality, order, *args, **kwargs): def __init__(self, lang, quality, order, *args, **kwargs):
@ -85,7 +85,39 @@ class ArteBrowser(BaseBrowser):
def get_live_video(self, url, video=None): def get_live_video(self, url, video=None):
self.location(url) self.location(url)
assert self.is_on_page(ArteLiveVideoPage) assert self.is_on_page(ArteLiveVideoPage)
return self.page.get_video(video, self.lang, self.quality) json_url, video = self.page.get_video(video)
return self.fill_live_video(video, json_url)
def fill_live_video(self, video, json_url):
response = self.openurl(json_url)
result = simplejson.loads(response.read(), self.ENCODING)
quality = None
if 'VSR' in result['videoJsonPlayer']:
for item in result['videoJsonPlayer']['VSR']:
if self.quality in item:
quality = item
break
if not quality:
url = result['videoJsonPlayer']['VSR'][0]['url']
ext = result['videoJsonPlayer']['VSR'][0]['mediaType']
else:
url = result['videoJsonPlayer']['VSR'][quality]['url']
ext = result['videoJsonPlayer']['VSR'][quality]['mediaType']
video.url = u'%s' % url
video.ext = u'%s' % ext
video.date = datetime.datetime.strptime(result['videoJsonPlayer']['VDA'][:-6], '%d/%m/%Y %H:%M:%S')
if 'VDU' in result['videoJsonPlayer'].keys():
video.duration = int(result['videoJsonPlayer']['VDU'])
if 'IUR' in result['videoJsonPlayer']['VTU'].keys():
video.thumbnail = BaseImage(result['videoJsonPlayer']['VTU']['IUR'])
video.thumbnail.url = video.thumbnail.id
return video
def home(self): def home(self):
self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang) self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang)
@ -182,11 +214,11 @@ class ArteBrowser(BaseBrowser):
return self.create_video_from_plus7(result['videoList']) return self.create_video_from_plus7(result['videoList'])
def get_arte_live_categories(self): def get_arte_live_categories(self):
self.location('http://liveweb.arte.tv/%s' % self.LIVE_LANG[self.lang]) self.location('http://concert.arte.tv/%s' % self.LIVE_LANG[self.lang])
assert self.is_on_page(ArteLivePage) assert self.is_on_page(ArteLivePage)
return self.page.iter_resources() return self.page.iter_resources()
def live_videos(self, url): def live_videos(self, cat):
self.location(url) self.location('http://concert.arte.tv/%s' % self.LIVE_LANG[self.lang])
assert self.is_on_page(ArteLiveCategorieVideoPage) assert self.is_on_page(ArteLivePage)
return self.page.iter_videos(self.LIVE_LANG[self.lang]) return self.page.iter_videos(cat, lang=self.LIVE_LANG[self.lang])

View file

@ -1,27 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Christophe Benz
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.collection import Collection
__all__ = ['ArteLiveCollection']
class ArteLiveCollection(Collection):
@classmethod
def id2url(cls, _id, lang):
return 'http://liveweb.arte.tv/%s/cat/%s/' % (lang, _id)

View file

@ -17,112 +17,62 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import HTMLParser
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.tools.misc import html2text
from weboob.capabilities import NotAvailable from weboob.capabilities import NotAvailable
from weboob.capabilities.image import BaseImage from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection
from .video import ArteLiveVideo from .video import ArteLiveVideo
from .collection import ArteLiveCollection
__all__ = ['ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage'] __all__ = ['ArteLivePage', 'ArteLiveVideoPage']
class ArteLiveVideoPage(BasePage): class ArteLiveVideoPage(BasePage):
def get_video(self, video=None, lang='fr', quality='hd'): def get_video(self, video=None):
if not video: if not video:
video = ArteLiveVideo(self.group_dict['id']) video = ArteLiveVideo(self.group_dict['id'])
urls = {} div = self.document.xpath('//div[@class="bloc-presentation"]')[0]
for url in self.document.xpath('//video')[0].getchildren():
if url.tag.startswith('url'):
urls[url.tag[-2:]] = url.text
if quality in urls: description = self.parser.select(div,
video.url = u'%s' % urls[quality] 'div[@class="field field-name-body field-type-text-with-summary field-label-hidden bloc-rte"]',
else: 1,
video.url = u'%s' % urls.popitem()[1] method='xpath')
return video video.description = html2text(self.parser.tostring(description))
json_url = self.document.xpath('//div[@class="video-container"]')[0].attrib['arte_vp_url']
class ArteLiveCategorieVideoPage(BasePage): return json_url, video
def iter_videos(self, lang='fr'):
videos = list()
xml_url = (self.document.xpath('//link')[0]).attrib['href']
datas = self.browser.readurl(xml_url)
re_items = re.compile("(<item>.*?</item>)", re.DOTALL)
items = re.findall(re_items, datas)
for item in items:
parsed_element = self.get_element(item, lang)
if parsed_element:
video = ArteLiveVideo(parsed_element['ID'])
video.title = parsed_element['title']
video.description = parsed_element['pitch']
video.author = parsed_element['author']
if parsed_element['pict']:
video.thumbnail = BaseImage(parsed_element['pict'])
video.thumbnail.url = video.thumbnail.id
video.set_empty_fields(NotAvailable, ('url',))
videos.append(video)
return videos
def get_element(self, chain, lang):
ele = {}
tt = re.compile("(?<=<title>)(.*?)(?=</title>)", re.DOTALL)
lk = re.compile("(?<=<link>)(http://liveweb.arte.tv/{0}/video/.*?)"
"(?=</link>)".format(lang), re.DOTALL)
dt = re.compile("(?<=<pubDate>)(.*?)(?=</pubDate>)", re.DOTALL)
pt = re.compile("(?<=<description>)(.*?)(?=</description>)", re.DOTALL)
at = re.compile("(?<=<author>)(.*?)(?=</author>)", re.DOTALL)
en = re.compile("<enclosure.*?/event/.*?/(.*?)-.*?/>", re.DOTALL)
pix = re.compile("(?<=<enclosure url=\")(.*?)(?=\" type=\"image/)", re.DOTALL)
try:
ele['link'] = lk.search(chain).group(0)
except:
return None
try:
ele['ID'] = int(en.search(chain).group(1))
except:
return None
try:
s = tt.search(chain).group(0)
ele['title'] = s.decode('utf-8', 'replace')
except:
ele['title'] = "No title"
try:
s = (dt.search(chain).group(0))
ele['date'] = s.decode('utf-8', 'replace')
except:
ele['date'] = "No date"
try:
s = (pt.search(chain).group(0))
s = HTMLParser.HTMLParser().unescape(s)
ele['pitch'] = HTMLParser.HTMLParser().unescape(s)
except:
ele['pitch'] = "No description"
try:
s = (at.search(chain).group(0))
ele['author'] = s.decode('utf-8', 'replace')
except:
ele['author'] = "Unknow"
try:
ele['pict'] = pix.search(chain).group(0)
except:
ele['pict'] = None
return ele
class ArteLivePage(BasePage): class ArteLivePage(BasePage):
def iter_resources(self): def iter_resources(self):
items = list() items = list()
for el in self.document.xpath('//ul[@id="categoryArray"]/li'): for el in self.document.xpath('//ul[@class="filter-liste"]/li'):
a = el.find('a') _id = el.attrib['data-target'].replace('video_box_tab_','')
m = re.match(r'http://liveweb.arte.tv/*', a.attrib['href']) text = self.parser.select(el, 'a/span', 1, method='xpath').text
if m: item = Collection([u'arte-live', u'%s' % _id], u'%s' % (text))
url = u'%s' % a.attrib['href'] items.append(item)
_id = url.split('/')[-2:-1][0]
item = ArteLiveCollection([u'arte-live', u'%s' % _id], u'%s' % (a.text))
items.append(item)
return items return items
def iter_videos(self, cat, lang='fr'):
articles = self.document.xpath('//div[@id="video_box_tab_%s"]/article' % cat)
videos = list()
for article in articles:
_id = article.attrib['about']
title = self.parser.select(article,
'div/div[@class="info-article "]/div/h3/a',
1,
method='xpath').text
thumbnail = self.parser.select(article,
'div/div/a/figure/span/span',
1,
method='xpath').attrib['data-src']
video = ArteLiveVideo(_id)
video.title = u'%s' % title
video.thumbnail = BaseImage(thumbnail)
video.thumbnail.url = video.thumbnail.id
video.set_empty_fields(NotAvailable, ('url',))
videos.append(video)
return videos

View file

@ -35,7 +35,7 @@ class ArteTest(BackendTest):
def test_live(self): def test_live(self):
l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-live'])) l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-live']))
assert len(l1) assert len(l1)
l2 = list(self.backend.iter_resources([BaseVideo], [u'arte-live', u'%s' % l1[0]])) l2 = list(self.backend.iter_resources([BaseVideo], l1[0].split_path))
assert len(l2) assert len(l2)
v = l2[0] v = l2[0]
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))

View file

@ -37,4 +37,4 @@ class ArteLiveVideo(BaseVideo):
@classmethod @classmethod
def id2url(cls, _id): def id2url(cls, _id):
return 'http://arte.vo.llnwd.net/o21/liveweb/events/event-%s.xml' % _id return 'http://concert.arte.tv%s' % _id