ina: Try to handle more videos
Also, the URLs are now closer to what a real browser loads. It still does not work though.
This commit is contained in:
parent
8d5de5d8a0
commit
abe3f3c4a0
1 changed files with 9 additions and 6 deletions
|
|
@ -58,15 +58,16 @@ class BaseVideoPage(BasePage):
|
||||||
|
|
||||||
def get_url(self):
|
def get_url(self):
|
||||||
qs = parse_qs(self.document.getroot().cssselect('param[name="flashvars"]')[0].attrib['value'])
|
qs = parse_qs(self.document.getroot().cssselect('param[name="flashvars"]')[0].attrib['value'])
|
||||||
s = self.browser.readurl('http://boutique.ina.fr/player/infovideo/id_notice/%s' % qs['id_notice'][0])
|
s = self.browser.readurl('http://www.ina.fr/player/infovideo/id_notice/%s/module_request/%s' % (qs['id_notice'][0], qs['module'][0]))
|
||||||
s = s[s.find('<Media>')+7:s.find('</Media>')]
|
s = s[s.find('<Media>')+7:s.find('</Media>')]
|
||||||
return u'%s/pkey/%s' % (s, qs['pkey'][0])
|
return u'%s/id_chaine/%s/module_request/%s/pkey/%s' % \
|
||||||
|
(s, qs['id_chaine'][0], qs['module'][0], qs['pkey'][0])
|
||||||
|
|
||||||
def parse_date_and_duration(self, text):
|
def parse_date_and_duration(self, text):
|
||||||
duration_regexp = re.compile('(.* - )?(.+) - ((.+)h)?((.+)min)?(.+)s')
|
duration_regexp = re.compile('(.* - )?(.+) - ((.+)h)?((.+)min)?(.+)s')
|
||||||
m = duration_regexp.match(text)
|
m = duration_regexp.match(text)
|
||||||
if m:
|
if m:
|
||||||
day, month, year = [int(s) for s in m.group(2).split('/')]
|
day, month, year = [abs(int(s)) for s in m.group(2).split('/')]
|
||||||
date = datetime.datetime(year, month, day)
|
date = datetime.datetime(year, month, day)
|
||||||
duration = datetime.timedelta(hours=int(m.group(4) if m.group(4) is not None else 0),
|
duration = datetime.timedelta(hours=int(m.group(4) if m.group(4) is not None else 0),
|
||||||
minutes=int(m.group(6) if m.group(6) is not None else 0),
|
minutes=int(m.group(6) if m.group(6) is not None else 0),
|
||||||
|
|
@ -99,11 +100,13 @@ class VideoPage(BaseVideoPage):
|
||||||
return self.parse_date_and_duration(qr.find('h2').tail.strip())
|
return self.parse_date_and_duration(qr.find('h2').tail.strip())
|
||||||
|
|
||||||
def get_title(self):
|
def get_title(self):
|
||||||
qr = self.parser.select(self.document.getroot(), 'div.container-global-qr')[0].find('div').findall('div')[1]
|
qr = self.parser.select(self.document.getroot(), 'div.container-global-qr')[0]
|
||||||
return unicode(qr.find('h2').text.strip())
|
return unicode(qr.cssselect('h2.titre-propre')[0].text.strip())
|
||||||
|
|
||||||
def get_description(self):
|
def get_description(self):
|
||||||
return unicode(self.parser.select(self.document.getroot(), 'div.container-global-qr')[1].find('div').find('p').text.strip())
|
desc = self.parser.select(self.document.getroot(), 'div.container-global-qr')[1].find('div').find('p')
|
||||||
|
if desc:
|
||||||
|
return unicode(desc.text.strip())
|
||||||
|
|
||||||
|
|
||||||
class BoutiqueVideoPage(BaseVideoPage):
|
class BoutiqueVideoPage(BaseVideoPage):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue