From ce91c5291207c74867fe26b9d3b6bb959c11da64 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Tue, 31 Jan 2012 18:08:09 +0100 Subject: [PATCH] fix parsing of URLs --- modules/ina/backend.py | 1 + modules/ina/browser.py | 2 +- modules/ina/pages/video.py | 7 ++++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules/ina/backend.py b/modules/ina/backend.py index 067f009f..fad2f0cf 100644 --- a/modules/ina/backend.py +++ b/modules/ina/backend.py @@ -47,6 +47,7 @@ class InaBackend(BaseBackend, ICapVideo): return self.browser.iter_search_results(pattern) def fill_video(self, video, fields): + return video if fields != ['thumbnail']: # if we don't want only the thumbnail, we probably want also every fields with self.browser: diff --git a/modules/ina/browser.py b/modules/ina/browser.py index a566cb41..0805484b 100644 --- a/modules/ina/browser.py +++ b/modules/ina/browser.py @@ -31,7 +31,7 @@ __all__ = ['InaBrowser'] class InaBrowser(BaseBrowser): DOMAIN = 'ina.fr' - PAGES = {'http://boutique\.ina\.fr/video/.+\.html': BoutiqueVideoPage, + PAGES = {'http://boutique\.ina\.fr/(video|audio)/.+\.html': BoutiqueVideoPage, 'http://www\.ina\.fr/.+\.html': VideoPage, 'http://boutique\.ina\.fr/recherche/.+': SearchPage, } diff --git a/modules/ina/pages/video.py b/modules/ina/pages/video.py index c6e8f7cc..55f5c729 100644 --- a/modules/ina/pages/video.py +++ b/modules/ina/pages/video.py @@ -56,8 +56,9 @@ class BaseVideoPage(BasePage): def get_url(self): qs = parse_qs(self.document.getroot().cssselect('param[name="flashvars"]')[0].attrib['value']) - url = 'http://mp4.ina.fr/lecture/lire/id_notice/%s/token_notice/%s' % (qs['id_notice'][0], qs['token_notice'][0]) - return url + s = self.browser.readurl('http://boutique.ina.fr/player/infovideo/id_notice/%s' % qs['id_notice'][0]) + s = s[s.find('')+7:s.find('')] + return '%s/pkey/%s' % (s, qs['pkey'][0]) def parse_date_and_duration(self, text): duration_regexp = re.compile('(.* - )?(.+) - ((.+)h)?((.+)min)?(.+)s') @@ -103,7 +104,7 @@ class VideoPage(BaseVideoPage): class BoutiqueVideoPage(BaseVideoPage): - URL_REGEXP = re.compile('http://boutique.ina.fr/video/(.+).html') + URL_REGEXP = re.compile('http://boutique.ina.fr/(audio|video)/(.+).html') def create_id(self, id): return u'boutique.%s' % id