diff --git a/weboob/backends/ina/backend.py b/weboob/backends/ina/backend.py index 79494c8d..b3761636 100644 --- a/weboob/backends/ina/backend.py +++ b/weboob/backends/ina/backend.py @@ -22,6 +22,7 @@ from weboob.capabilities.video import ICapVideo from weboob.tools.backend import BaseBackend from .browser import InaBrowser +from .video import InaVideo __all__ = ['InaBackend'] @@ -42,3 +43,16 @@ class InaBackend(BaseBackend, ICapVideo): def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): with self.browser: return self.browser.iter_search_results(pattern) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + with self.browser: + video = self.browser.get_video(video.id, video) + if 'thumbnail' in fields: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + OBJECTS = {InaVideo: fill_video} diff --git a/weboob/backends/ina/browser.py b/weboob/backends/ina/browser.py index b682e3cd..34ea150b 100644 --- a/weboob/backends/ina/browser.py +++ b/weboob/backends/ina/browser.py @@ -34,9 +34,9 @@ class InaBrowser(BaseBrowser): } @id2url(InaVideo.id2url) - def get_video(self, url): + def get_video(self, url, video=None): self.location(url) - return self.page.video + return self.page.get_video(video) def iter_search_results(self, pattern): self.location(self.buildurl('/recherche/recherche', search=pattern)) diff --git a/weboob/backends/ina/pages/search.py b/weboob/backends/ina/pages/search.py index 52f61f47..a918592f 100644 --- a/weboob/backends/ina/pages/search.py +++ b/weboob/backends/ina/pages/search.py @@ -34,11 +34,9 @@ class SearchPage(BasePage): def iter_videos(self): ul = select(self.document.getroot(), 'div.container-videos ul', 1) for li in ul.findall('li'): - m = self.URL_REGEXP.match(li.find('a').attrib['href']) - if m: - id = m.group(1) - else: - raise SelectElementException('Unable to match id (%r)' % li.find('a').attrib['href']) + id = re.sub(r'/video/(.+)\.html', r'\1', li.find('a').attrib['href']) + + thumbnail = 'http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src'] title = select(li, 'p.titre', 1).text @@ -47,14 +45,15 @@ class SearchPage(BasePage): date = datetime.datetime(year, month, day) duration = select(li, 'p.duree', 1).text - m = re.match(r'(\d+)min(\d+)s', duration) + m = re.match(r'((\d+)min)?(\d+)s', duration) if m: - duration = datetime.timedelta(minutes=int(m.group(1)), seconds=int(m.group(2))) + duration = datetime.timedelta(minutes=int(m.group(2) or 0), seconds=int(m.group(3))) else: raise SelectElementException('Unable to match duration (%r)' % duration) yield InaVideo(id, title=title, date=date, - duration=duration + duration=duration, + thumbnail_url=thumbnail, ) diff --git a/weboob/backends/ina/pages/video.py b/weboob/backends/ina/pages/video.py index 60c7d543..873a2d59 100644 --- a/weboob/backends/ina/pages/video.py +++ b/weboob/backends/ina/pages/video.py @@ -35,14 +35,16 @@ __all__ = ['VideoPage'] class VideoPage(BasePage): URL_REGEXP = re.compile('http://boutique.ina.fr/video/(.+).html') - def on_loaded(self): + def get_video(self, video): date, duration = self.get_date_and_duration() - self.video = InaVideo(self.get_id(), - title=self.get_title(), - url=self.get_url(), - date=date, - duration=duration, - ) + if not video: + video = InaVideo(self.get_id()) + + video.title = self.get_title() + video.url = self.get_url() + video.date = date + video.duration = duration + return video def get_id(self): m = self.URL_REGEXP.match(self.url)