update arte module to use arte api

2013-09-16 20:48:58 +02:00 · 2013-09-16 20:48:58 +02:00 · a5d5011979
commit a5d5011979
parent 2a9978989a
5 changed files with 167 additions and 155 deletions
--- a/modules/arte/backend.py
+++ b/modules/arte/backend.py
@ -31,6 +31,7 @@ from .collection import ArteLiveCollection

 __all__ = ['ArteBackend']

+
 class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
    NAME = 'arte'
    MAINTAINER = u'Romain Bignon'
@ -38,22 +39,42 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
    VERSION = '0.h'
    DESCRIPTION = 'Arte French and German TV'
    LICENSE = 'AGPLv3+'
+
+    order = {'AIRDATE_DESC': 'Date',
+             'VIEWS': 'Views',
+             'ALPHA': 'Alphabetic',
+             'LAST_CHANCE': 'Last chance'
+             }
+
    CONFIG = BackendConfig(Value('lang', label='Lang of videos',
                                 choices={'fr': 'French', 'de': 'Deutsch', 'en': 'English'}, default='fr'),
-                           Value('quality', label='Quality of videos', choices=['hd', 'sd'], default='hd'))
+                           Value('order', label='Sort order', choices=order, default='AIRDATE_DESC'),
+                           Value('quality', label='Quality of videos', choices=['hd', 'sd', 'md', 'ed'], default='hd'))
+
+    TRANSLATION  = {'fr': 'F',
+                    'en': 'F',
+                    'de': 'D',
+                    'hd': 'HQ',
+                    'md': 'MQ',
+                    'sd': 'SQ',
+                    'eq': 'EQ'
+                    }
+
    BROWSER = ArteBrowser

    def create_default_browser(self):
-        return self.create_browser(lang=self.config['lang'].get(), quality=self.config['quality'].get())
+        return self.create_browser(lang=self.TRANSLATION[self.config['lang'].get()],
+                                   quality=self.TRANSLATION[self.config['quality'].get()],
+                                   order=self.config['order'].get())

    def parse_id(self, _id):
        m = re.match('^(\w+)\.(.*)', _id)
        if m:
            return m.groups()

-        m = re.match('https?://videos.arte.tv/\w+/videos/(?P<id>.+)\.html', _id)
+        m = re.match('https?://www.arte.tv/guide/\w+/(?P<id>.+)/(.*)', _id)
        if m:
-            return 'videos', m.group(1)
+            return 'program', m.group(1)

        m = re.match('https?://liveweb.arte.tv/\w+/video/(.*)/', _id)
        if m:
@ -71,6 +92,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
            elif site == 'live_url':
                return self.browser.get_live_from_url(_id)

+            elif site == 'program':
+                return self.browser.get_video_from_program_id(_id)
+
            else:
                return self.browser.get_video(_id)

@ -84,9 +108,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
            with self.browser:
                site, _id = self.parse_id(video.id)

-                if isinstance(video,ArteVideo):
+                if isinstance(video, ArteVideo):
                    video = self.browser.get_video(_id, video)
-                if isinstance(video,ArteLiveVideo):
+                if isinstance(video, ArteLiveVideo):
                    video = self.browser.get_live_video(_id, video)
        if 'thumbnail' in fields and video and video.thumbnail:
            with self.browser:
@ -99,26 +123,26 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
            if BaseVideo in objs:
                collection = self.get_collection(objs, split_path)
                if collection.path_level == 0:
-                    yield Collection([u'latest'],u'Latest Arte videos')
-                    yield Collection([u'live'],u'Arte Web Live videos')
+                    yield Collection([u'arte-latest'], u'Latest Arte videos')
+                    yield Collection([u'arte-live'], u'Arte Web Live videos')
                if collection.path_level == 1:
-                    if collection.split_path == [u'latest']:
+                    if collection.split_path == [u'arte-latest']:
                        for video in self.browser.latest_videos():
                            yield video
-                    if collection.split_path == [u'live']:
+                    if collection.split_path == [u'arte-live']:
                        for categorie in self.browser.get_arte_live_categories():
                            yield categorie
                if collection.path_level == 2:
-                    if collection.split_path[0] == u'live':
-                        for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.lang)):
+                    if collection.split_path[0] == u'arte-live':
+                        for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.LIVE_LANG[self.browser.lang])):
                            yield video

    def validate_collection(self, objs, collection):
        if collection.path_level == 0:
            return
-        if BaseVideo in objs and ( collection.split_path == [u'latest'] or collection.split_path == [u'live'] ):
+        if BaseVideo in objs and (collection.split_path == [u'arte-latest'] or collection.split_path == [u'arte-live']):
            return
-        if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'live' :
+        if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'arte-live':
            return
        raise CollectionNotFound(collection.split_path)

--- a/modules/arte/browser.py
+++ b/modules/arte/browser.py
@ -17,40 +17,53 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

+import re
+import datetime
+import urllib

+from weboob.capabilities import NotAvailable
+from weboob.tools.capabilities.thumbnail import Thumbnail
+from weboob.tools.json import json as simplejson
 from weboob.tools.browser import BaseBrowser
 from weboob.tools.browser.decorators import id2url

-from .pages import IndexPage, VideoPage, ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage, ArteLivePlayerPage
+from .pages import ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage
 from .video import ArteVideo, ArteLiveVideo

-
 __all__ = ['ArteBrowser']


 class ArteBrowser(BaseBrowser):
    DOMAIN = u'videos.arte.tv'
    ENCODING = None
-    PAGES = {r'http://videos.arte.tv/\w+/videos/toutesLesVideos.*': IndexPage,
-             r'http://videos.arte.tv/\w+/do_search/videos/.*': IndexPage,
-             r'http://videos.arte.tv/\w+/videos/(?P<id>.+)\.html': VideoPage,
-             r'http://liveweb.arte.tv/\w+' : ArteLivePage,
-             r'http://liveweb.arte.tv/\w+/cat/.*' : ArteLiveCategorieVideoPage,
-             r'http://liveweb.arte.tv/\w+/video/.*': ArteLivePlayerPage,
-             r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml' : ArteLiveVideoPage,
-            }
+    PAGES = {r'http://liveweb.arte.tv/\w+': ArteLivePage,
+             r'http://liveweb.arte.tv/\w+/cat/.*': ArteLiveCategorieVideoPage,
+             r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml': ArteLiveVideoPage,
+             }

-    SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'}
+    LIVE_LANG = {'F': 'fr',
+                 'D': 'de'
+                 }
+    API_URL = 'http://arte.tv/papi/tvguide'

-    def __init__(self, lang, quality, *args, **kwargs):
+    def __init__(self, lang, quality, order, *args, **kwargs):
        self.lang = lang
        self.quality = quality
+        self.order = order
        BaseBrowser.__init__(self, *args, **kwargs)

    @id2url(ArteVideo.id2url)
    def get_video(self, url, video=None):
-        self.location(url)
-        return self.page.get_video(video, self.lang, self.quality)
+        _url = url \
+            + '/' + self.quality \
+            + '.json'
+
+        response = self.openurl(_url)
+        result = simplejson.loads(response.read(), self.ENCODING)
+        if video is None:
+            video = ArteVideo(result['video']['VID'])
+        video.url = u'%s' % result['video']['VSR'][0]['VUR']
+        return video

    @id2url(ArteLiveVideo.id2url)
    def get_live_video(self, url, video=None):
@ -61,29 +74,100 @@ class ArteBrowser(BaseBrowser):
    def home(self):
        self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang)

+    def get_video_from_program_id(self, _id):
+        class_name = 'epg'
+        method_name = 'program'
+        level = 'L2'
+        url = self.API_URL \
+            + '/' + class_name \
+            + '/' + method_name \
+            + '/' + self.lang \
+            + '/' + level \
+            + '/' + _id \
+            + '.json'
+
+        response = self.openurl(url)
+        result = simplejson.loads(response.read(), self.ENCODING)
+        video = self.create_video(result['abstractProgram']['VDO'])
+        return self.get_video(video.id, video)
+
    def search_videos(self, pattern):
-        self.location(self.buildurl('/%s/do_search/videos/%s' % (self.lang, self.SEARCH_LANG[self.lang]), q=pattern.encode('utf-8')))
-        assert self.is_on_page(IndexPage)
-        return self.page.iter_videos()
+        class_name = 'videos/plus7'
+        method_name = 'search'
+        level = 'L1'
+        cluster = 'ALL'
+        channel = 'ALL'
+        limit = '10'
+        offset = '0'
+
+        url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset, pattern)
+        response = self.openurl(url)
+        result = simplejson.loads(response.read(), self.ENCODING)
+        return self.create_video_from_plus7(result['videoList'])
+
+    def create_video_from_plus7(self, result):
+        for item in result:
+            yield self.create_video(item)
+
+    def create_video(self, item):
+        video = ArteVideo(item['VID'])
+        if 'VSU' in item:
+            video.title = u'%s : %s' % (item['VTI'], item['VSU'])
+        else:
+            video.title = u'%s' % (item['VTI'])
+        video.rating = int(item['VRT'])
+        video.thumbnail = Thumbnail(u'%s' % item['programImage'])
+        video.duration = datetime.timedelta(seconds=int(item['videoDurationSeconds']))
+        video.set_empty_fields(NotAvailable, ('url',))
+        video.description = u'%s' % item['VDE']
+        m = re.match('(\d{2})\s(\d{2})\s(\d{4})(.*?)', item['VDA'])
+        if m:
+            dd = int(m.group(1))
+            mm = int(m.group(2))
+            yyyy = int(m.group(3))
+            video.date = datetime.date(yyyy, mm, dd)
+        return video
+
+    def create_url_plus7(self, class_name, method_name, level, cluster, channel, limit, offset, pattern=None):
+        url = self.API_URL \
+            + '/' + class_name \
+            + '/' + method_name \
+            + '/' + self.lang \
+            + '/' + level
+
+        if pattern:
+            url += '/' + urllib.quote(pattern)
+
+        url += '/' + channel \
+            + '/' + cluster \
+            + '/' + '-1' \
+            + '/' + self.order \
+            + '/' + limit \
+            + '/' + offset \
+            + '.json'
+
+        return url

    def latest_videos(self):
-        self.home()
-        assert self.is_on_page(IndexPage)
-        return self.page.iter_videos()
+        class_name = 'videos'
+        method_name = 'plus7'
+        level = 'L1'
+        cluster = 'ALL'
+        channel = 'ALL'
+        limit = '10'
+        offset = '0'
+
+        url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset)
+        response = self.openurl(url)
+        result = simplejson.loads(response.read(), self.ENCODING)
+        return self.create_video_from_plus7(result['videoList'])

    def get_arte_live_categories(self):
-        self.location('http://liveweb.arte.tv/%s' %self.lang)
+        self.location('http://liveweb.arte.tv/%s' % self.LIVE_LANG[self.lang])
        assert self.is_on_page(ArteLivePage)
        return self.page.iter_resources()

    def live_videos(self, url):
        self.location(url)
        assert self.is_on_page(ArteLiveCategorieVideoPage)
-        return self.page.iter_videos(self.lang)
-
-    def get_live_from_url(self, url):
-        self.location(url)
-        assert self.is_on_page(ArteLivePlayerPage)
-        _id = self.page.retrieve_id()
-        if _id:
-            return self.get_live_video(_id)
+        return self.page.iter_videos(self.LIVE_LANG[self.lang])
--- a/modules/arte/pages.py
+++ b/modules/arte/pages.py
@ -18,19 +18,17 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.


-import datetime
 import re
-import urllib
 import HTMLParser

-from weboob.tools.browser import BasePage, BrokenPageError
+from weboob.tools.browser import BasePage
 from weboob.tools.capabilities.thumbnail import Thumbnail
 from weboob.capabilities import NotAvailable

-from .video import ArteVideo, ArteLiveVideo
+from .video import ArteLiveVideo
 from .collection import ArteLiveCollection

-__all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
+__all__ = ['ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']


 class ArteLiveVideoPage(BasePage):
@ -44,9 +42,9 @@ class ArteLiveVideoPage(BasePage):
                urls[url.tag[-2:]] = url.text

        if quality in urls:
-            video.url = urls[quality]
+            video.url = u'%s' % urls[quality]
        else:
-            video.url = urls.popitem()[1]
+            video.url = u'%s' % urls.popitem()[1]
        return video


@ -127,98 +125,3 @@ class ArteLivePage(BasePage):
                item = ArteLiveCollection([u'live', u'%s' % _id], u'%s' % (el.find('a').text))
                items.append(item)
        return items
-
-
-class IndexPage(BasePage):
-    def iter_videos(self):
-        videos = self.document.getroot().cssselect("div[class=video]")
-        for div in videos:
-            title = div.find('h2').find('a').text
-            m = re.match(r'/(fr|de|en)/videos/(.*)\.html', div.find('h2').find('a').attrib['href'])
-            _id = ''
-            if m:
-                _id = m.group(2)
-            rating = rating_max = 0
-            rates = self.parser.select(div, 'div[class=rateContainer]', 1)
-            for r in rates.findall('div'):
-                if 'star-rating-on' in r.attrib['class']:
-                    rating += 1
-                rating_max += 1
-
-            video = ArteVideo(_id)
-            video.title = unicode(title)
-            video.rating = rating
-            video.rating_max = rating_max
-
-            thumb = self.parser.select(div, 'img[class=thumbnail]', 1)
-            video.thumbnail = Thumbnail(u'http://videos.arte.tv' + thumb.attrib['src'])
-
-            try:
-                parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':')
-                if len(parts) == 2:
-                    hours = 0
-                    minutes, seconds = parts
-                elif len(parts) == 3:
-                    hours, minutes, seconds = parts
-                else:
-                    raise BrokenPageError('Unable to parse duration %r' % parts)
-            except BrokenPageError:
-                pass
-            else:
-                video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
-
-            video.set_empty_fields(NotAvailable, ('url',))
-
-            yield video
-
-
-class VideoPage(BasePage):
-    def get_video(self, video=None, lang='fr', quality='hd'):
-        if not video:
-            video = ArteVideo(self.group_dict['id'])
-        video.title = unicode(self.get_title())
-        video.url = unicode(self.get_url(lang, quality))
-        video.set_empty_fields(NotAvailable)
-        return video
-
-    def get_title(self):
-        return self.document.getroot().cssselect('h1')[0].text
-
-    def get_url(self, lang, quality):
-        obj = self.parser.select(self.document.getroot(), 'object', 1)
-        movie_url = self.parser.select(obj, 'param[name=movie]', 1)
-        xml_url = urllib.unquote(movie_url.attrib['value'].split('videorefFileUrl=')[-1])
-
-        doc = self.browser.get_document(self.browser.openurl(xml_url))
-        videos_list = self.parser.select(doc.getroot(), 'video')
-        videos = {}
-        for v in videos_list:
-            videos[v.attrib['lang']] = v.attrib['ref']
-
-        if lang in videos:
-            xml_url = videos[lang]
-        else:
-            xml_url = videos.popitem()[1]
-
-        doc = self.browser.get_document(self.browser.openurl(xml_url))
-
-        obj = self.parser.select(doc.getroot(), 'urls', 1)
-        videos_list = self.parser.select(obj, 'url')
-        urls = {}
-        for v in videos_list:
-            urls[v.attrib['quality']] = v.text
-
-        if quality in urls:
-            video_url = urls[quality]
-        else:
-            video_url = urls.popitem()[1]
-
-        return video_url
-
-
-class ArteLivePlayerPage(BasePage):
-    def retrieve_id(self):
-        player_url = self.document.xpath('//div[@class="flash"]/div/object/param')[0].attrib['value']
-        _id = re.match('(.*)&eventId=(\d*)&(.*)', player_url)
-        if _id:
-            return u'%s' % _id.group(2)
--- a/modules/arte/test.py
+++ b/modules/arte/test.py
@ -30,20 +30,20 @@ class ArteTest(BackendTest):
        if len(l) > 0:
            v = l[0]
            self.backend.fillobj(v, ('url',))
-            self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+            self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))

    def test_live(self):
-        l1 = list(self.backend.iter_resources([BaseVideo], [u'live']))
+        l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-live']))
        assert len(l1)
-        l2 = list(self.backend.iter_resources([BaseVideo], [u'live',u'%s'%l1[0]]))
+        l2 = list(self.backend.iter_resources([BaseVideo], [u'arte-live', u'%s' % l1[0]]))
        assert len(l2)
        v = l2[0]
        self.backend.fillobj(v, ('url',))
-        self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+        self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))

    def test_latest(self):
-        l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
+        l = list(self.backend.iter_resources([BaseVideo], [u'arte-latest']))
        assert len(l)
        v = l[0]
        self.backend.fillobj(v, ('url',))
-        self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+        self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
--- a/modules/arte/video.py
+++ b/modules/arte/video.py
@ -21,13 +21,14 @@
 from weboob.capabilities.video import BaseVideo


-__all__ = ['ArteVideo','ArteLiveVideo']
+__all__ = ['ArteVideo', 'ArteLiveVideo']


 class ArteVideo(BaseVideo):
    @classmethod
    def id2url(cls, _id):
-        return 'http://videos.arte.tv/fr/videos/%s.html' % _id
+        lang = _id[-1:]
+        return 'http://arte.tv/papi/tvguide/videos/stream/%s/%s/HBBTV' % (lang, _id)


 class ArteLiveVideo(BaseVideo):