update arte module to use arte api

2013-09-16 20:48:58 +02:00 · 2013-09-16 20:48:58 +02:00 · a5d5011979
commit a5d5011979
parent 2a9978989a
5 changed files with 167 additions and 155 deletions
--- a/modules/arte/backend.py
+++ b/modules/arte/backend.py
@ -31,6 +31,7 @@ from .collection import ArteLiveCollection
 __all__ = ['ArteBackend']
 class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
    NAME = 'arte'
    MAINTAINER = u'Romain Bignon'
@ -38,22 +39,42 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
    VERSION = '0.h'
    DESCRIPTION = 'Arte French and German TV'
    LICENSE = 'AGPLv3+'
    order = {'AIRDATE_DESC': 'Date',
             'VIEWS': 'Views',
             'ALPHA': 'Alphabetic',
             'LAST_CHANCE': 'Last chance'
             }
    CONFIG = BackendConfig(Value('lang', label='Lang of videos',
                                 choices={'fr': 'French', 'de': 'Deutsch', 'en': 'English'}, default='fr'),
-                           Value('quality', label='Quality of videos', choices=['hd', 'sd'], default='hd'))
+                           Value('order', label='Sort order', choices=order, default='AIRDATE_DESC'),
                           Value('quality', label='Quality of videos', choices=['hd', 'sd', 'md', 'ed'], default='hd'))
    TRANSLATION  = {'fr': 'F',
                    'en': 'F',
                    'de': 'D',
                    'hd': 'HQ',
                    'md': 'MQ',
                    'sd': 'SQ',
                    'eq': 'EQ'
                    }
    BROWSER = ArteBrowser
    def create_default_browser(self):
-        return self.create_browser(lang=self.config['lang'].get(), quality=self.config['quality'].get())
+        return self.create_browser(lang=self.TRANSLATION[self.config['lang'].get()],
                                   quality=self.TRANSLATION[self.config['quality'].get()],
                                   order=self.config['order'].get())
    def parse_id(self, _id):
        m = re.match('^(\w+)\.(.*)', _id)
        if m:
            return m.groups()
-        m = re.match('https?://videos.arte.tv/\w+/videos/(?P<id>.+)\.html', _id)
+        m = re.match('https?://www.arte.tv/guide/\w+/(?P<id>.+)/(.*)', _id)
        if m:
-            return 'videos', m.group(1)
+            return 'program', m.group(1)
        m = re.match('https?://liveweb.arte.tv/\w+/video/(.*)/', _id)
        if m:
@ -71,6 +92,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
            elif site == 'live_url':
                return self.browser.get_live_from_url(_id)
            elif site == 'program':
                return self.browser.get_video_from_program_id(_id)
            else:
                return self.browser.get_video(_id)
@ -84,9 +108,9 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
            with self.browser:
                site, _id = self.parse_id(video.id)
-                if isinstance(video,ArteVideo):
+                if isinstance(video, ArteVideo):
                    video = self.browser.get_video(_id, video)
-                if isinstance(video,ArteLiveVideo):
+                if isinstance(video, ArteLiveVideo):
                    video = self.browser.get_live_video(_id, video)
        if 'thumbnail' in fields and video and video.thumbnail:
            with self.browser:
@ -99,26 +123,26 @@ class ArteBackend(BaseBackend, ICapVideo, ICapCollection):
            if BaseVideo in objs:
                collection = self.get_collection(objs, split_path)
                if collection.path_level == 0:
-                    yield Collection([u'latest'],u'Latest Arte videos')
+                    yield Collection([u'arte-latest'], u'Latest Arte videos')
-                    yield Collection([u'live'],u'Arte Web Live videos')
+                    yield Collection([u'arte-live'], u'Arte Web Live videos')
                if collection.path_level == 1:
-                    if collection.split_path == [u'latest']:
+                    if collection.split_path == [u'arte-latest']:
                        for video in self.browser.latest_videos():
                            yield video
-                    if collection.split_path == [u'live']:
+                    if collection.split_path == [u'arte-live']:
                        for categorie in self.browser.get_arte_live_categories():
                            yield categorie
                if collection.path_level == 2:
-                    if collection.split_path[0] == u'live':
+                    if collection.split_path[0] == u'arte-live':
-                        for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.lang)):
+                        for video in self.browser.live_videos(ArteLiveCollection.id2url(collection.basename, self.browser.LIVE_LANG[self.browser.lang])):
                            yield video
    def validate_collection(self, objs, collection):
        if collection.path_level == 0:
            return
-        if BaseVideo in objs and ( collection.split_path == [u'latest'] or collection.split_path == [u'live'] ):
+        if BaseVideo in objs and (collection.split_path == [u'arte-latest'] or collection.split_path == [u'arte-live']):
            return
-        if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'live' :
+        if BaseVideo in objs and collection.path_level == 2 and collection.split_path[0] == u'arte-live':
            return
        raise CollectionNotFound(collection.split_path)
--- a/modules/arte/browser.py
+++ b/modules/arte/browser.py
@ -17,40 +17,53 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 import re
 import datetime
 import urllib
 from weboob.capabilities import NotAvailable
 from weboob.tools.capabilities.thumbnail import Thumbnail
 from weboob.tools.json import json as simplejson
 from weboob.tools.browser import BaseBrowser
 from weboob.tools.browser.decorators import id2url
-from .pages import IndexPage, VideoPage, ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage, ArteLivePlayerPage
+from .pages import ArteLivePage, ArteLiveCategorieVideoPage, ArteLiveVideoPage
 from .video import ArteVideo, ArteLiveVideo
 __all__ = ['ArteBrowser']
 class ArteBrowser(BaseBrowser):
    DOMAIN = u'videos.arte.tv'
    ENCODING = None
-    PAGES = {r'http://videos.arte.tv/\w+/videos/toutesLesVideos.*': IndexPage,
+    PAGES = {r'http://liveweb.arte.tv/\w+': ArteLivePage,
-             r'http://videos.arte.tv/\w+/do_search/videos/.*': IndexPage,
+             r'http://liveweb.arte.tv/\w+/cat/.*': ArteLiveCategorieVideoPage,
-             r'http://videos.arte.tv/\w+/videos/(?P<id>.+)\.html': VideoPage,
+             r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml': ArteLiveVideoPage,
             r'http://liveweb.arte.tv/\w+' : ArteLivePage,
             r'http://liveweb.arte.tv/\w+/cat/.*' : ArteLiveCategorieVideoPage,
             r'http://liveweb.arte.tv/\w+/video/.*': ArteLivePlayerPage,
             r'http://arte.vo.llnwd.net/o21/liveweb/events/event-(?P<id>.+).xml' : ArteLiveVideoPage,
             }
-    SEARCH_LANG = {'fr': 'recherche', 'de': 'suche', 'en': 'search'}
+    LIVE_LANG = {'F': 'fr',
                 'D': 'de'
                 }
    API_URL = 'http://arte.tv/papi/tvguide'
-    def __init__(self, lang, quality, *args, **kwargs):
+    def __init__(self, lang, quality, order, *args, **kwargs):
        self.lang = lang
        self.quality = quality
        self.order = order
        BaseBrowser.__init__(self, *args, **kwargs)
    @id2url(ArteVideo.id2url)
    def get_video(self, url, video=None):
-        self.location(url)
+        _url = url \
-        return self.page.get_video(video, self.lang, self.quality)
+            + '/' + self.quality \
            + '.json'
        response = self.openurl(_url)
        result = simplejson.loads(response.read(), self.ENCODING)
        if video is None:
            video = ArteVideo(result['video']['VID'])
        video.url = u'%s' % result['video']['VSR'][0]['VUR']
        return video
    @id2url(ArteLiveVideo.id2url)
    def get_live_video(self, url, video=None):
@ -61,29 +74,100 @@ class ArteBrowser(BaseBrowser):
    def home(self):
        self.location('http://videos.arte.tv/%s/videos/toutesLesVideos' % self.lang)
    def get_video_from_program_id(self, _id):
        class_name = 'epg'
        method_name = 'program'
        level = 'L2'
        url = self.API_URL \
            + '/' + class_name \
            + '/' + method_name \
            + '/' + self.lang \
            + '/' + level \
            + '/' + _id \
            + '.json'
        response = self.openurl(url)
        result = simplejson.loads(response.read(), self.ENCODING)
        video = self.create_video(result['abstractProgram']['VDO'])
        return self.get_video(video.id, video)
    def search_videos(self, pattern):
-        self.location(self.buildurl('/%s/do_search/videos/%s' % (self.lang, self.SEARCH_LANG[self.lang]), q=pattern.encode('utf-8')))
+        class_name = 'videos/plus7'
-        assert self.is_on_page(IndexPage)
+        method_name = 'search'
-        return self.page.iter_videos()
+        level = 'L1'
        cluster = 'ALL'
        channel = 'ALL'
        limit = '10'
        offset = '0'
        url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset, pattern)
        response = self.openurl(url)
        result = simplejson.loads(response.read(), self.ENCODING)
        return self.create_video_from_plus7(result['videoList'])
    def create_video_from_plus7(self, result):
        for item in result:
            yield self.create_video(item)
    def create_video(self, item):
        video = ArteVideo(item['VID'])
        if 'VSU' in item:
            video.title = u'%s : %s' % (item['VTI'], item['VSU'])
        else:
            video.title = u'%s' % (item['VTI'])
        video.rating = int(item['VRT'])
        video.thumbnail = Thumbnail(u'%s' % item['programImage'])
        video.duration = datetime.timedelta(seconds=int(item['videoDurationSeconds']))
        video.set_empty_fields(NotAvailable, ('url',))
        video.description = u'%s' % item['VDE']
        m = re.match('(\d{2})\s(\d{2})\s(\d{4})(.*?)', item['VDA'])
        if m:
            dd = int(m.group(1))
            mm = int(m.group(2))
            yyyy = int(m.group(3))
            video.date = datetime.date(yyyy, mm, dd)
        return video
    def create_url_plus7(self, class_name, method_name, level, cluster, channel, limit, offset, pattern=None):
        url = self.API_URL \
            + '/' + class_name \
            + '/' + method_name \
            + '/' + self.lang \
            + '/' + level
        if pattern:
            url += '/' + urllib.quote(pattern)
        url += '/' + channel \
            + '/' + cluster \
            + '/' + '-1' \
            + '/' + self.order \
            + '/' + limit \
            + '/' + offset \
            + '.json'
        return url
    def latest_videos(self):
-        self.home()
+        class_name = 'videos'
-        assert self.is_on_page(IndexPage)
+        method_name = 'plus7'
-        return self.page.iter_videos()
+        level = 'L1'
        cluster = 'ALL'
        channel = 'ALL'
        limit = '10'
        offset = '0'
        url = self.create_url_plus7(class_name, method_name, level, cluster, channel, limit, offset)
        response = self.openurl(url)
        result = simplejson.loads(response.read(), self.ENCODING)
        return self.create_video_from_plus7(result['videoList'])
    def get_arte_live_categories(self):
-        self.location('http://liveweb.arte.tv/%s' %self.lang)
+        self.location('http://liveweb.arte.tv/%s' % self.LIVE_LANG[self.lang])
        assert self.is_on_page(ArteLivePage)
        return self.page.iter_resources()
    def live_videos(self, url):
        self.location(url)
        assert self.is_on_page(ArteLiveCategorieVideoPage)
-        return self.page.iter_videos(self.lang)
+        return self.page.iter_videos(self.LIVE_LANG[self.lang])
    def get_live_from_url(self, url):
        self.location(url)
        assert self.is_on_page(ArteLivePlayerPage)
        _id = self.page.retrieve_id()
        if _id:
            return self.get_live_video(_id)
--- a/modules/arte/pages.py
+++ b/modules/arte/pages.py
@ -18,19 +18,17 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 import datetime
 import re
 import urllib
 import HTMLParser
-from weboob.tools.browser import BasePage, BrokenPageError
+from weboob.tools.browser import BasePage
 from weboob.tools.capabilities.thumbnail import Thumbnail
 from weboob.capabilities import NotAvailable
-from .video import ArteVideo, ArteLiveVideo
+from .video import ArteLiveVideo
 from .collection import ArteLiveCollection
-__all__ = ['IndexPage', 'VideoPage', 'ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
+__all__ = ['ArteLivePage', 'ArteLiveCategorieVideoPage', 'ArteLiveVideoPage']
 class ArteLiveVideoPage(BasePage):
@ -44,9 +42,9 @@ class ArteLiveVideoPage(BasePage):
                urls[url.tag[-2:]] = url.text
        if quality in urls:
-            video.url = urls[quality]
+            video.url = u'%s' % urls[quality]
        else:
-            video.url = urls.popitem()[1]
+            video.url = u'%s' % urls.popitem()[1]
        return video
@ -127,98 +125,3 @@ class ArteLivePage(BasePage):
                item = ArteLiveCollection([u'live', u'%s' % _id], u'%s' % (el.find('a').text))
                items.append(item)
        return items
 class IndexPage(BasePage):
    def iter_videos(self):
        videos = self.document.getroot().cssselect("div[class=video]")
        for div in videos:
            title = div.find('h2').find('a').text
            m = re.match(r'/(fr|de|en)/videos/(.*)\.html', div.find('h2').find('a').attrib['href'])
            _id = ''
            if m:
                _id = m.group(2)
            rating = rating_max = 0
            rates = self.parser.select(div, 'div[class=rateContainer]', 1)
            for r in rates.findall('div'):
                if 'star-rating-on' in r.attrib['class']:
                    rating += 1
                rating_max += 1
            video = ArteVideo(_id)
            video.title = unicode(title)
            video.rating = rating
            video.rating_max = rating_max
            thumb = self.parser.select(div, 'img[class=thumbnail]', 1)
            video.thumbnail = Thumbnail(u'http://videos.arte.tv' + thumb.attrib['src'])
            try:
                parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':')
                if len(parts) == 2:
                    hours = 0
                    minutes, seconds = parts
                elif len(parts) == 3:
                    hours, minutes, seconds = parts
                else:
                    raise BrokenPageError('Unable to parse duration %r' % parts)
            except BrokenPageError:
                pass
            else:
                video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
            video.set_empty_fields(NotAvailable, ('url',))
            yield video
 class VideoPage(BasePage):
    def get_video(self, video=None, lang='fr', quality='hd'):
        if not video:
            video = ArteVideo(self.group_dict['id'])
        video.title = unicode(self.get_title())
        video.url = unicode(self.get_url(lang, quality))
        video.set_empty_fields(NotAvailable)
        return video
    def get_title(self):
        return self.document.getroot().cssselect('h1')[0].text
    def get_url(self, lang, quality):
        obj = self.parser.select(self.document.getroot(), 'object', 1)
        movie_url = self.parser.select(obj, 'param[name=movie]', 1)
        xml_url = urllib.unquote(movie_url.attrib['value'].split('videorefFileUrl=')[-1])
        doc = self.browser.get_document(self.browser.openurl(xml_url))
        videos_list = self.parser.select(doc.getroot(), 'video')
        videos = {}
        for v in videos_list:
            videos[v.attrib['lang']] = v.attrib['ref']
        if lang in videos:
            xml_url = videos[lang]
        else:
            xml_url = videos.popitem()[1]
        doc = self.browser.get_document(self.browser.openurl(xml_url))
        obj = self.parser.select(doc.getroot(), 'urls', 1)
        videos_list = self.parser.select(obj, 'url')
        urls = {}
        for v in videos_list:
            urls[v.attrib['quality']] = v.text
        if quality in urls:
            video_url = urls[quality]
        else:
            video_url = urls.popitem()[1]
        return video_url
 class ArteLivePlayerPage(BasePage):
    def retrieve_id(self):
        player_url = self.document.xpath('//div[@class="flash"]/div/object/param')[0].attrib['value']
        _id = re.match('(.*)&eventId=(\d*)&(.*)', player_url)
        if _id:
            return u'%s' % _id.group(2)
--- a/modules/arte/test.py
+++ b/modules/arte/test.py
@ -30,20 +30,20 @@ class ArteTest(BackendTest):
        if len(l) > 0:
            v = l[0]
            self.backend.fillobj(v, ('url',))
-            self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+            self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
    def test_live(self):
-        l1 = list(self.backend.iter_resources([BaseVideo], [u'live']))
+        l1 = list(self.backend.iter_resources([BaseVideo], [u'arte-live']))
        assert len(l1)
-        l2 = list(self.backend.iter_resources([BaseVideo], [u'live',u'%s'%l1[0]]))
+        l2 = list(self.backend.iter_resources([BaseVideo], [u'arte-live', u'%s' % l1[0]]))
        assert len(l2)
        v = l2[0]
        self.backend.fillobj(v, ('url',))
-        self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+        self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
    def test_latest(self):
-        l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
+        l = list(self.backend.iter_resources([BaseVideo], [u'arte-latest']))
        assert len(l)
        v = l[0]
        self.backend.fillobj(v, ('url',))
-        self.assertTrue(v.url and v.url.startswith('rtmp://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+        self.assertTrue(v.url, 'URL for video "%s" not found' % (v.id))
--- a/modules/arte/video.py
+++ b/modules/arte/video.py
@ -21,13 +21,14 @@
 from weboob.capabilities.video import BaseVideo
-__all__ = ['ArteVideo','ArteLiveVideo']
+__all__ = ['ArteVideo', 'ArteLiveVideo']
 class ArteVideo(BaseVideo):
    @classmethod
    def id2url(cls, _id):
-        return 'http://videos.arte.tv/fr/videos/%s.html' % _id
+        lang = _id[-1:]
        return 'http://arte.tv/papi/tvguide/videos/stream/%s/%s/HBBTV' % (lang, _id)
 class ArteLiveVideo(BaseVideo):