implement searches on INA

2010-08-31 14:44:39 +02:00 · 2010-08-31 14:44:39 +02:00 · 9fb8deb516
commit 9fb8deb516
parent 87cd2bebec
5 changed files with 72 additions and 5 deletions
--- a/weboob/backends/ina/backend.py
+++ b/weboob/backends/ina/backend.py
@ -40,5 +40,5 @@ class InaBackend(BaseBackend, ICapVideo):
        return self.browser.get_video(_id)

    def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
-        debug(u'backend ina: iter_search_results is not implemented')
-        return set()
+        with self.browser:
+            return self.browser.iter_search_results(pattern)
--- a/weboob/backends/ina/browser.py
+++ b/weboob/backends/ina/browser.py
@ -20,6 +20,7 @@ from weboob.tools.browser import BaseBrowser
 from weboob.tools.browser.decorators import id2url

 from .pages.video import VideoPage
+from .pages.search import SearchPage
 from .video import InaVideo


@ -27,11 +28,17 @@ __all__ = ['InaBrowser']


 class InaBrowser(BaseBrowser):
-    DOMAIN = 'ina.fr'
+    DOMAIN = 'boutique.ina.fr'
    PAGES = {'http://boutique\.ina\.fr/video/.+\.html': VideoPage,
+             'http://boutique\.ina\.fr/recherche/.+': SearchPage,
            }

    @id2url(InaVideo.id2url)
    def get_video(self, url):
        self.location(url)
        return self.page.video
+
+    def iter_search_results(self, pattern):
+        self.location(self.buildurl('/recherche/recherche', search=pattern))
+        assert self.is_on_page(SearchPage)
+        return self.page.iter_videos()
--- a/weboob/backends/ina/pages/search.py
+++ b/weboob/backends/ina/pages/search.py
@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010  Romain Bignon
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+import datetime
+import re
+
+from weboob.tools.browser import BasePage
+from weboob.tools.parsers.lxmlparser import select, SelectElementException
+
+from ..video import InaVideo
+
+
+__all__ = ['SearchPage']
+
+
+class SearchPage(BasePage):
+    URL_REGEXP = re.compile('/video/(.+).html')
+
+    def iter_videos(self):
+        ul = select(self.document.getroot(), 'div.container-videos ul', 1)
+        for li in ul.findall('li'):
+            m = self.URL_REGEXP.match(li.find('a').attrib['href'])
+            if m:
+                id = m.group(1)
+            else:
+                raise SelectElementException('Unable to match id (%r)' % li.find('a').attrib['href'])
+
+            title = select(li, 'p.titre', 1).text
+
+            date = select(li, 'p.date', 1).text
+            day, month, year = [int(s) for s in date.split('/')]
+            date = datetime.datetime(year, month, day)
+
+            duration = select(li, 'p.duree', 1).text
+            m = re.match(r'(\d+)min(\d+)s', duration)
+            if m:
+                duration = datetime.timedelta(minutes=int(m.group(1)), seconds=int(m.group(2)))
+            else:
+                raise SelectElementException('Unable to match duration (%r)' % duration)
+
+            yield InaVideo(id,
+                           title=title,
+                           date=date,
+                           duration=duration
+                          )
--- a/weboob/backends/ina/pages/video.py
+++ b/weboob/backends/ina/pages/video.py
@ -59,7 +59,7 @@ class VideoPage(BasePage):
            if m:
                day, month, year = [int(s) for s in m.group(1).split('/')]
                date = datetime.datetime(year, month, day)
-                duration = datetime.timedelta(minutes=m.group(3), seconds=m.group(2))
+                duration = datetime.timedelta(minutes=int(m.group(3)), seconds=int(m.group(2)))
                return date, duration
        else:
            return None
--- a/weboob/backends/ina/video.py
+++ b/weboob/backends/ina/video.py
@ -25,4 +25,4 @@ __all__ = ['InaVideo']
 class InaVideo(BaseVideo):
    @classmethod
    def id2url(cls, _id):
-        return _id
+        return 'http://boutique.ina.fr/video/%s.html' % _id