implement searches on INA

This commit is contained in:
Romain Bignon 2010-08-31 14:44:39 +02:00
commit 9fb8deb516
5 changed files with 72 additions and 5 deletions

View file

@ -40,5 +40,5 @@ class InaBackend(BaseBackend, ICapVideo):
return self.browser.get_video(_id) return self.browser.get_video(_id)
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
debug(u'backend ina: iter_search_results is not implemented') with self.browser:
return set() return self.browser.iter_search_results(pattern)

View file

@ -20,6 +20,7 @@ from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url from weboob.tools.browser.decorators import id2url
from .pages.video import VideoPage from .pages.video import VideoPage
from .pages.search import SearchPage
from .video import InaVideo from .video import InaVideo
@ -27,11 +28,17 @@ __all__ = ['InaBrowser']
class InaBrowser(BaseBrowser): class InaBrowser(BaseBrowser):
DOMAIN = 'ina.fr' DOMAIN = 'boutique.ina.fr'
PAGES = {'http://boutique\.ina\.fr/video/.+\.html': VideoPage, PAGES = {'http://boutique\.ina\.fr/video/.+\.html': VideoPage,
'http://boutique\.ina\.fr/recherche/.+': SearchPage,
} }
@id2url(InaVideo.id2url) @id2url(InaVideo.id2url)
def get_video(self, url): def get_video(self, url):
self.location(url) self.location(url)
return self.page.video return self.page.video
def iter_search_results(self, pattern):
self.location(self.buildurl('/recherche/recherche', search=pattern))
assert self.is_on_page(SearchPage)
return self.page.iter_videos()

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010 Romain Bignon
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import datetime
import re
from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select, SelectElementException
from ..video import InaVideo
__all__ = ['SearchPage']
class SearchPage(BasePage):
URL_REGEXP = re.compile('/video/(.+).html')
def iter_videos(self):
ul = select(self.document.getroot(), 'div.container-videos ul', 1)
for li in ul.findall('li'):
m = self.URL_REGEXP.match(li.find('a').attrib['href'])
if m:
id = m.group(1)
else:
raise SelectElementException('Unable to match id (%r)' % li.find('a').attrib['href'])
title = select(li, 'p.titre', 1).text
date = select(li, 'p.date', 1).text
day, month, year = [int(s) for s in date.split('/')]
date = datetime.datetime(year, month, day)
duration = select(li, 'p.duree', 1).text
m = re.match(r'(\d+)min(\d+)s', duration)
if m:
duration = datetime.timedelta(minutes=int(m.group(1)), seconds=int(m.group(2)))
else:
raise SelectElementException('Unable to match duration (%r)' % duration)
yield InaVideo(id,
title=title,
date=date,
duration=duration
)

View file

@ -59,7 +59,7 @@ class VideoPage(BasePage):
if m: if m:
day, month, year = [int(s) for s in m.group(1).split('/')] day, month, year = [int(s) for s in m.group(1).split('/')]
date = datetime.datetime(year, month, day) date = datetime.datetime(year, month, day)
duration = datetime.timedelta(minutes=m.group(3), seconds=m.group(2)) duration = datetime.timedelta(minutes=int(m.group(3)), seconds=int(m.group(2)))
return date, duration return date, duration
else: else:
return None return None

View file

@ -25,4 +25,4 @@ __all__ = ['InaVideo']
class InaVideo(BaseVideo): class InaVideo(BaseVideo):
@classmethod @classmethod
def id2url(cls, _id): def id2url(cls, _id):
return _id return 'http://boutique.ina.fr/video/%s.html' % _id