fix parsing date, duration and title

This commit is contained in:
Romain Bignon 2010-11-04 22:16:04 +01:00
commit d4ecc0c5de

View file

@ -55,7 +55,7 @@ class VideoPage(BasePage):
def get_date_and_duration(self): def get_date_and_duration(self):
duration_regexp = re.compile('(.+) - (.+)min(.+)s') duration_regexp = re.compile('(.+) - (.+)min(.+)s')
el = self.document.getroot().cssselect('.bloc-video-edito h3')[0] el = self.document.getroot().cssselect('div.bloc-produit-haut p.date')[0]
if el is not None: if el is not None:
m = duration_regexp.match(el.text.strip()) m = duration_regexp.match(el.text.strip())
if m: if m:
@ -67,7 +67,7 @@ class VideoPage(BasePage):
return None return None
def get_title(self): def get_title(self):
el = self.document.getroot().cssselect('.bloc-video-edito h2')[0] el = self.document.getroot().cssselect('div.bloc-produit-haut h1')[0]
if el is not None: if el is not None:
return unicode(el.text.strip()) return unicode(el.text.strip())
else: else: