From 9fb8deb516df044b2f4bc7a5ed52f3757f7fc647 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Tue, 31 Aug 2010 14:44:39 +0200 Subject: [PATCH] implement searches on INA --- weboob/backends/ina/backend.py | 4 +- weboob/backends/ina/browser.py | 9 ++++- weboob/backends/ina/pages/search.py | 60 +++++++++++++++++++++++++++++ weboob/backends/ina/pages/video.py | 2 +- weboob/backends/ina/video.py | 2 +- 5 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 weboob/backends/ina/pages/search.py diff --git a/weboob/backends/ina/backend.py b/weboob/backends/ina/backend.py index 552c27ed..79494c8d 100644 --- a/weboob/backends/ina/backend.py +++ b/weboob/backends/ina/backend.py @@ -40,5 +40,5 @@ class InaBackend(BaseBackend, ICapVideo): return self.browser.get_video(_id) def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): - debug(u'backend ina: iter_search_results is not implemented') - return set() + with self.browser: + return self.browser.iter_search_results(pattern) diff --git a/weboob/backends/ina/browser.py b/weboob/backends/ina/browser.py index ecd4f64c..b682e3cd 100644 --- a/weboob/backends/ina/browser.py +++ b/weboob/backends/ina/browser.py @@ -20,6 +20,7 @@ from weboob.tools.browser import BaseBrowser from weboob.tools.browser.decorators import id2url from .pages.video import VideoPage +from .pages.search import SearchPage from .video import InaVideo @@ -27,11 +28,17 @@ __all__ = ['InaBrowser'] class InaBrowser(BaseBrowser): - DOMAIN = 'ina.fr' + DOMAIN = 'boutique.ina.fr' PAGES = {'http://boutique\.ina\.fr/video/.+\.html': VideoPage, + 'http://boutique\.ina\.fr/recherche/.+': SearchPage, } @id2url(InaVideo.id2url) def get_video(self, url): self.location(url) return self.page.video + + def iter_search_results(self, pattern): + self.location(self.buildurl('/recherche/recherche', search=pattern)) + assert self.is_on_page(SearchPage) + return self.page.iter_videos() diff --git a/weboob/backends/ina/pages/search.py b/weboob/backends/ina/pages/search.py new file mode 100644 index 00000000..52f61f47 --- /dev/null +++ b/weboob/backends/ina/pages/search.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010 Romain Bignon +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +import datetime +import re + +from weboob.tools.browser import BasePage +from weboob.tools.parsers.lxmlparser import select, SelectElementException + +from ..video import InaVideo + + +__all__ = ['SearchPage'] + + +class SearchPage(BasePage): + URL_REGEXP = re.compile('/video/(.+).html') + + def iter_videos(self): + ul = select(self.document.getroot(), 'div.container-videos ul', 1) + for li in ul.findall('li'): + m = self.URL_REGEXP.match(li.find('a').attrib['href']) + if m: + id = m.group(1) + else: + raise SelectElementException('Unable to match id (%r)' % li.find('a').attrib['href']) + + title = select(li, 'p.titre', 1).text + + date = select(li, 'p.date', 1).text + day, month, year = [int(s) for s in date.split('/')] + date = datetime.datetime(year, month, day) + + duration = select(li, 'p.duree', 1).text + m = re.match(r'(\d+)min(\d+)s', duration) + if m: + duration = datetime.timedelta(minutes=int(m.group(1)), seconds=int(m.group(2))) + else: + raise SelectElementException('Unable to match duration (%r)' % duration) + + yield InaVideo(id, + title=title, + date=date, + duration=duration + ) diff --git a/weboob/backends/ina/pages/video.py b/weboob/backends/ina/pages/video.py index c357b659..60c7d543 100644 --- a/weboob/backends/ina/pages/video.py +++ b/weboob/backends/ina/pages/video.py @@ -59,7 +59,7 @@ class VideoPage(BasePage): if m: day, month, year = [int(s) for s in m.group(1).split('/')] date = datetime.datetime(year, month, day) - duration = datetime.timedelta(minutes=m.group(3), seconds=m.group(2)) + duration = datetime.timedelta(minutes=int(m.group(3)), seconds=int(m.group(2))) return date, duration else: return None diff --git a/weboob/backends/ina/video.py b/weboob/backends/ina/video.py index 552952f7..6686b900 100644 --- a/weboob/backends/ina/video.py +++ b/weboob/backends/ina/video.py @@ -25,4 +25,4 @@ __all__ = ['InaVideo'] class InaVideo(BaseVideo): @classmethod def id2url(cls, _id): - return _id + return 'http://boutique.ina.fr/video/%s.html' % _id