support fillobj and get thumbnail

2010-08-31 15:01:42 +02:00 · 2010-08-31 15:01:42 +02:00 · fab98062ca
commit fab98062ca
parent 9fb8deb516
4 changed files with 32 additions and 17 deletions
--- a/weboob/backends/ina/backend.py
+++ b/weboob/backends/ina/backend.py
@ -22,6 +22,7 @@ from weboob.capabilities.video import ICapVideo
 from weboob.tools.backend import BaseBackend

 from .browser import InaBrowser
+from .video import InaVideo


 __all__ = ['InaBackend']
@ -42,3 +43,16 @@ class InaBackend(BaseBackend, ICapVideo):
    def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
        with self.browser:
            return self.browser.iter_search_results(pattern)
+
+    def fill_video(self, video, fields):
+        if fields != ['thumbnail']:
+            # if we don't want only the thumbnail, we probably want also every fields
+            with self.browser:
+                video = self.browser.get_video(video.id, video)
+        if 'thumbnail' in fields:
+            with self.browser:
+                video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
+
+        return video
+
+    OBJECTS = {InaVideo: fill_video}
--- a/weboob/backends/ina/browser.py
+++ b/weboob/backends/ina/browser.py
@ -34,9 +34,9 @@ class InaBrowser(BaseBrowser):
            }

    @id2url(InaVideo.id2url)
-    def get_video(self, url):
+    def get_video(self, url, video=None):
        self.location(url)
-        return self.page.video
+        return self.page.get_video(video)

    def iter_search_results(self, pattern):
        self.location(self.buildurl('/recherche/recherche', search=pattern))
--- a/weboob/backends/ina/pages/search.py
+++ b/weboob/backends/ina/pages/search.py
@ -34,11 +34,9 @@ class SearchPage(BasePage):
    def iter_videos(self):
        ul = select(self.document.getroot(), 'div.container-videos ul', 1)
        for li in ul.findall('li'):
-            m = self.URL_REGEXP.match(li.find('a').attrib['href'])
-            if m:
-                id = m.group(1)
-            else:
-                raise SelectElementException('Unable to match id (%r)' % li.find('a').attrib['href'])
+            id = re.sub(r'/video/(.+)\.html', r'\1', li.find('a').attrib['href'])
+
+            thumbnail = 'http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src']

            title = select(li, 'p.titre', 1).text

@ -47,14 +45,15 @@ class SearchPage(BasePage):
            date = datetime.datetime(year, month, day)

            duration = select(li, 'p.duree', 1).text
-            m = re.match(r'(\d+)min(\d+)s', duration)
+            m = re.match(r'((\d+)min)?(\d+)s', duration)
            if m:
-                duration = datetime.timedelta(minutes=int(m.group(1)), seconds=int(m.group(2)))
+                duration = datetime.timedelta(minutes=int(m.group(2) or 0), seconds=int(m.group(3)))
            else:
                raise SelectElementException('Unable to match duration (%r)' % duration)

            yield InaVideo(id,
                           title=title,
                           date=date,
-                           duration=duration
+                           duration=duration,
+                           thumbnail_url=thumbnail,
                          )
--- a/weboob/backends/ina/pages/video.py
+++ b/weboob/backends/ina/pages/video.py
@ -35,14 +35,16 @@ __all__ = ['VideoPage']
 class VideoPage(BasePage):
    URL_REGEXP = re.compile('http://boutique.ina.fr/video/(.+).html')

-    def on_loaded(self):
+    def get_video(self, video):
        date, duration = self.get_date_and_duration()
-        self.video = InaVideo(self.get_id(),
-                              title=self.get_title(),
-                              url=self.get_url(),
-                              date=date,
-                              duration=duration,
-                              )
+        if not video:
+            video = InaVideo(self.get_id())
+
+        video.title = self.get_title()
+        video.url = self.get_url()
+        video.date = date
+        video.duration = duration
+        return video

    def get_id(self):
        m = self.URL_REGEXP.match(self.url)