[arte] fix : arte-live site changes

2014-02-06 19:27:47 +01:00 · 2014-02-06 19:27:47 +01:00 · d61d3ba6a4
commit d61d3ba6a4
parent 7f15df33b0
6 changed files with 88 additions and 136 deletions
--- a/modules/arte/backend.py
+++ b/modules/arte/backend.py
@ -27,7 +27,7 @@ from weboob.tools.value import Value

 from .browser import ArteBrowser
 from .video import ArteVideo, ArteLiveVideo
-from .collection import ArteLiveCollection
+

 __all__ = ['ArteBackend']

@ -76,9 +76,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
        if m:
            return 'program', m.group(1)

-        m = re.match('https?://liveweb.arte.tv/\w+/video/(.*)/', _id)
+        m = re.match('https?://concert.arte.tv/(\w+)/(.*)', _id)
        if m:
-            return 'live_url', _id
+            return 'live', '/%s/%s' % (m.group(1), m.group(2))

        return 'videos', _id

@ -89,9 +89,6 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
            if site == 'live':
                return self.browser.get_live_video(_id)

-            elif site == 'live_url':
-                return self.browser.get_live_from_url(_id)
-
            elif site == 'program':
                return self.browser.get_video_from_program_id(_id)

@ -134,7 +131,7 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
                            yield categorie
                if collection.path_level == 2:
                    if collection.split_path[0] == u'arte-live':
-                        for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.LIVE_LANG[self.browser.lang])):
+                        for video in self.browser.live_videos(collection.basename):
                            yield video

    def validate_collection(self, objs, collection):
--- a/modules/arte/browser.py
+++ b/modules/arte/browser.py
@ -27,7 +27,7 @@ from weboob.tools.json import json as simplejson
 from weboob.tools.browser import BaseBrowser
 from weboob.tools.browser.decorators import id2url

-from .pages import ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage
+from .pages import ArteLivePage, ArteLiveVideoPage
 from .video import ArteVideo, ArteLiveVideo

 __all__ = ['ArteBrowser']
@ -36,14 +36,14 @@ __all__ = ['ArteBrowser']
 class ArteBrowser(BaseBrowser):
    DOMAIN = u'videos.arte.tv'
    ENCODING = None
-    PAGES = {r'http://liveweb.arte.tv/\w+': ArteLivePage,
-             r'http://liveweb.arte.tv/\w+/cat/.*': ArteLiveCategorieVideoPage,
-             r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml': ArteLiveVideoPage,
-             }
+    PAGES = {r'http://concert.arte.tv/\w+': ArteLivePage,
+             r'http://concert.arte.tv/(?P<id>.+)': ArteLiveVideoPage,
+            }

    LIVE_LANG = {'F': 'fr',
                 'D': 'de'
                 }
+
    API_URL = 'http://arte.tv/papi/tvguide'

    def __init__(self, lang, quality, order, *args, **kwargs):
@ -85,7 +85,39 @@ class ArteBrowser(BaseBrowser):
    def get_live_video(self, url, video=None):
        self.location(url)
        assert self.is_on_page(ArteLiveVideoPage)
-        return self.page.get_video(video, self.lang, self.quality)
+        json_url, video = self.page.get_video(video)
+        return self.fill_live_video(video, json_url)
+
+    def fill_live_video(self, video, json_url):
+
+        response = self.openurl(json_url)
+        result = simplejson.loads(response.read(), self.ENCODING)
+
+        quality = None
+        if 'VSR' in result['videoJsonPlayer']:
+            for item in result['videoJsonPlayer']['VSR']:
+                if self.quality in item:
+                    quality = item
+                    break
+
+            if not quality:
+                url = result['videoJsonPlayer']['VSR'][0]['url']
+                ext = result['videoJsonPlayer']['VSR'][0]['mediaType']
+            else:
+                url = result['videoJsonPlayer']['VSR'][quality]['url']
+                ext = result['videoJsonPlayer']['VSR'][quality]['mediaType']
+
+            video.url = u'%s' % url
+            video.ext = u'%s' % ext
+            video.date = datetime.datetime.strptime(result['videoJsonPlayer']['VDA'][:-6], '%d/%m/%Y %H:%M:%S')
+
+            if 'VDU' in result['videoJsonPlayer'].keys():
+                video.duration = int(result['videoJsonPlayer']['VDU'])
+
+            if 'IUR' in result['videoJsonPlayer']['VTU'].keys():
+                video.thumbnail = BaseImage(result['videoJsonPlayer']['VTU']['IUR'])
+                video.thumbnail.url = video.thumbnail.id
+        return video

    def home(self):
        self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang)
@ -182,11 +214,11 @@ class ArteBrowser(BaseBrowser):
        return self.create_video_from_plus7(result['videoList'])

    def get_arte_live_categories(self):
-        self.location('http://liveweb.arte.tv/%s' % self.LIVE_LANG[self.lang])
+        self.location('http://concert.arte.tv/%s' % self.LIVE_LANG[self.lang])
        assert self.is_on_page(ArteLivePage)
        return self.page.iter_resources()

-    def live_videos(self, url):
-        self.location(url)
-        assert self.is_on_page(ArteLiveCategorieVideoPage)
-        return self.page.iter_videos(self.LIVE_LANG[self.lang])
+    def live_videos(self, cat):
+        self.location('http://concert.arte.tv/%s' % self.LIVE_LANG[self.lang])
+        assert self.is_on_page(ArteLivePage)
+        return self.page.iter_videos(cat, lang=self.LIVE_LANG[self.lang])
--- a/modules/arte/collection.py
+++ b/modules/arte/collection.py
@ -1,27 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright(C) 2010-2011 Christophe Benz
-#
-# This file is part of weboob.
-#
-# weboob is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# weboob is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with weboob. If not, see <http://www.gnu.org/licenses/>.
-
-from weboob.capabilities.collection import Collection
-
-__all__ = ['ArteLiveCollection']
-
-class ArteLiveCollection(Collection):
-    @classmethod
-    def id2url(cls, _id, lang):
-        return 'http://liveweb.arte.tv/%s/cat/%s/' % (lang, _id)
--- a/modules/arte/pages.py
+++ b/modules/arte/pages.py
@ -17,112 +17,62 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

-import re
-import HTMLParser

 from weboob.tools.browser import BasePage
+from weboob.tools.misc import html2text
 from weboob.capabilities import NotAvailable
 from weboob.capabilities.image import BaseImage
-
+from weboob.capabilities.collection import Collection
 from .video import ArteLiveVideo
-from .collection import ArteLiveCollection

-__all__ = ['ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
+__all__ = ['ArteLivePage', 'ArteLiveVideoPage']


 class ArteLiveVideoPage(BasePage):
-    def get_video(self, video=None, lang='fr', quality='hd'):
+    def get_video(self, video=None):
        if not video:
            video = ArteLiveVideo(self.group_dict['id'])

-        urls = {}
-        for url in self.document.xpath('//video')[0].getchildren():
-            if url.tag.startswith('url'):
-                urls[url.tag[-2:]] = url.text
+        div = self.document.xpath('//div[@class="bloc-presentation"]')[0]

-        if quality in urls:
-            video.url = u'%s' % urls[quality]
-        else:
-            video.url = u'%s' % urls.popitem()[1]
-        return video
+        description = self.parser.select(div,
+                                         'div[@class="field field-name-body field-type-text-with-summary field-label-hidden bloc-rte"]',
+                                         1,
+                                         method='xpath')
+        video.description = html2text(self.parser.tostring(description))

-
-class ArteLiveCategorieVideoPage(BasePage):
-    def iter_videos(self, lang='fr'):
-        videos = list()
-        xml_url = (self.document.xpath('//link')[0]).attrib['href']
-        datas = self.browser.readurl(xml_url)
-        re_items = re.compile("(<item>.*?</item>)", re.DOTALL)
-        items = re.findall(re_items, datas)
-        for item in items:
-            parsed_element = self.get_element(item, lang)
-            if parsed_element:
-                video = ArteLiveVideo(parsed_element['ID'])
-                video.title = parsed_element['title']
-                video.description = parsed_element['pitch']
-                video.author = parsed_element['author']
-                if parsed_element['pict']:
-                    video.thumbnail = BaseImage(parsed_element['pict'])
-                    video.thumbnail.url = video.thumbnail.id
-                video.set_empty_fields(NotAvailable, ('url',))
-                videos.append(video)
-        return videos
-
-    def get_element(self, chain, lang):
-        ele = {}
-        tt = re.compile("(?<=<title>)(.*?)(?=</title>)", re.DOTALL)
-        lk = re.compile("(?<=<link>)(http://liveweb.arte.tv/{0}/video/.*?)"
-                        "(?=</link>)".format(lang), re.DOTALL)
-        dt = re.compile("(?<=<pubDate>)(.*?)(?=</pubDate>)", re.DOTALL)
-        pt = re.compile("(?<=<description>)(.*?)(?=</description>)", re.DOTALL)
-        at = re.compile("(?<=<author>)(.*?)(?=</author>)", re.DOTALL)
-        en = re.compile("<enclosure.*?/event/.*?/(.*?)-.*?/>", re.DOTALL)
-        pix = re.compile("(?<=<enclosure url=\")(.*?)(?=\" type=\"image/)", re.DOTALL)
-        try:
-            ele['link'] = lk.search(chain).group(0)
-        except:
-            return None
-        try:
-            ele['ID'] = int(en.search(chain).group(1))
-        except:
-            return None
-        try:
-            s = tt.search(chain).group(0)
-            ele['title'] = s.decode('utf-8', 'replace')
-        except:
-            ele['title'] = "No title"
-        try:
-            s = (dt.search(chain).group(0))
-            ele['date'] = s.decode('utf-8', 'replace')
-        except:
-            ele['date'] = "No date"
-        try:
-            s = (pt.search(chain).group(0))
-            s = HTMLParser.HTMLParser().unescape(s)
-            ele['pitch'] = HTMLParser.HTMLParser().unescape(s)
-        except:
-            ele['pitch'] = "No description"
-        try:
-            s = (at.search(chain).group(0))
-            ele['author'] = s.decode('utf-8', 'replace')
-        except:
-            ele['author'] = "Unknow"
-        try:
-            ele['pict'] = pix.search(chain).group(0)
-        except:
-            ele['pict'] = None
-        return ele
+        json_url = self.document.xpath('//div[@class="video-container"]')[0].attrib['arte_vp_url']
+        return json_url, video


 class ArteLivePage(BasePage):
    def iter_resources(self):
        items = list()
-        for el in self.document.xpath('//ul[@id="categoryArray"]/li'):
-            a = el.find('a')
-            m = re.match(r'http://liveweb.arte.tv/*', a.attrib['href'])
-            if m:
-                url = u'%s' % a.attrib['href']
-                _id = url.split('/')[-2:-1][0]
-                item = ArteLiveCollection([u'arte-live', u'%s' % _id], u'%s' % (a.text))
-                items.append(item)
+        for el in self.document.xpath('//ul[@class="filter-liste"]/li'):
+            _id = el.attrib['data-target'].replace('video_box_tab_','')
+            text = self.parser.select(el, 'a/span', 1, method='xpath').text
+            item = Collection([u'arte-live', u'%s' % _id], u'%s' % (text))
+            items.append(item)
        return items
+
+    def iter_videos(self, cat, lang='fr'):
+        articles = self.document.xpath('//div[@id="video_box_tab_%s"]/article' % cat)
+        videos = list()
+        for article in articles:
+            _id = article.attrib['about']
+            title = self.parser.select(article,
+                                   'div/div[@class="info-article "]/div/h3/a',
+                                   1,
+                                   method='xpath').text
+            thumbnail = self.parser.select(article,
+                                          'div/div/a/figure/span/span',
+                                          1,
+                                          method='xpath').attrib['data-src']
+
+            video = ArteLiveVideo(_id)
+            video.title = u'%s' % title
+            video.thumbnail = BaseImage(thumbnail)
+            video.thumbnail.url = video.thumbnail.id
+            video.set_empty_fields(NotAvailable, ('url',))
+            videos.append(video)
+        return videos
--- a/modules/arte/test.py
+++ b/modules/arte/test.py
@ -35,7 +35,7 @@ class ArteTest(BackendTest):
    def test_live(self):
        l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-live']))
        assert len(l1)
-        l2 = list(self.backend.iter_resources([BaseVideo], [u'arte-live', u'%s' % l1[0]]))
+        l2 = list(self.backend.iter_resources([BaseVideo], l1[0].split_path))
        assert len(l2)
        v = l2[0]
        self.backend.fillobj(v, ('url',))
--- a/modules/arte/video.py
+++ b/modules/arte/video.py
@ -37,4 +37,4 @@ class ArteLiveVideo(BaseVideo):

    @classmethod
    def id2url(cls, _id):
-        return 'http://arte.vo.llnwd.net/o21/liveweb/events/event-%s.xml' % _id
+        return 'http://concert.arte.tv%s' % _id