fix parsing of URLs
This commit is contained in:
parent
27d666dfe1
commit
ce91c52912
3 changed files with 6 additions and 4 deletions
|
|
@ -47,6 +47,7 @@ class InaBackend(BaseBackend, ICapVideo):
|
||||||
return self.browser.iter_search_results(pattern)
|
return self.browser.iter_search_results(pattern)
|
||||||
|
|
||||||
def fill_video(self, video, fields):
|
def fill_video(self, video, fields):
|
||||||
|
return video
|
||||||
if fields != ['thumbnail']:
|
if fields != ['thumbnail']:
|
||||||
# if we don't want only the thumbnail, we probably want also every fields
|
# if we don't want only the thumbnail, we probably want also every fields
|
||||||
with self.browser:
|
with self.browser:
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ __all__ = ['InaBrowser']
|
||||||
|
|
||||||
class InaBrowser(BaseBrowser):
|
class InaBrowser(BaseBrowser):
|
||||||
DOMAIN = 'ina.fr'
|
DOMAIN = 'ina.fr'
|
||||||
PAGES = {'http://boutique\.ina\.fr/video/.+\.html': BoutiqueVideoPage,
|
PAGES = {'http://boutique\.ina\.fr/(video|audio)/.+\.html': BoutiqueVideoPage,
|
||||||
'http://www\.ina\.fr/.+\.html': VideoPage,
|
'http://www\.ina\.fr/.+\.html': VideoPage,
|
||||||
'http://boutique\.ina\.fr/recherche/.+': SearchPage,
|
'http://boutique\.ina\.fr/recherche/.+': SearchPage,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,8 +56,9 @@ class BaseVideoPage(BasePage):
|
||||||
|
|
||||||
def get_url(self):
|
def get_url(self):
|
||||||
qs = parse_qs(self.document.getroot().cssselect('param[name="flashvars"]')[0].attrib['value'])
|
qs = parse_qs(self.document.getroot().cssselect('param[name="flashvars"]')[0].attrib['value'])
|
||||||
url = 'http://mp4.ina.fr/lecture/lire/id_notice/%s/token_notice/%s' % (qs['id_notice'][0], qs['token_notice'][0])
|
s = self.browser.readurl('http://boutique.ina.fr/player/infovideo/id_notice/%s' % qs['id_notice'][0])
|
||||||
return url
|
s = s[s.find('<Media>')+7:s.find('</Media>')]
|
||||||
|
return '%s/pkey/%s' % (s, qs['pkey'][0])
|
||||||
|
|
||||||
def parse_date_and_duration(self, text):
|
def parse_date_and_duration(self, text):
|
||||||
duration_regexp = re.compile('(.* - )?(.+) - ((.+)h)?((.+)min)?(.+)s')
|
duration_regexp = re.compile('(.* - )?(.+) - ((.+)h)?((.+)min)?(.+)s')
|
||||||
|
|
@ -103,7 +104,7 @@ class VideoPage(BaseVideoPage):
|
||||||
|
|
||||||
|
|
||||||
class BoutiqueVideoPage(BaseVideoPage):
|
class BoutiqueVideoPage(BaseVideoPage):
|
||||||
URL_REGEXP = re.compile('http://boutique.ina.fr/video/(.+).html')
|
URL_REGEXP = re.compile('http://boutique.ina.fr/(audio|video)/(.+).html')
|
||||||
|
|
||||||
def create_id(self, id):
|
def create_id(self, id):
|
||||||
return u'boutique.%s' % id
|
return u'boutique.%s' % id
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue