fix parsing of lives in search results

This commit is contained in:
Romain Bignon 2011-05-12 16:33:12 +02:00
commit 1c418763a6

View file

@ -22,6 +22,7 @@ import urllib
import re import re
from weboob.tools.capabilities.thumbnail import Thumbnail from weboob.tools.capabilities.thumbnail import Thumbnail
from weboob.capabilities.base import NotAvailable
from weboob.tools.misc import html2text from weboob.tools.misc import html2text
from weboob.tools.browser import BasePage, BrokenPageError from weboob.tools.browser import BasePage, BrokenPageError
@ -49,14 +50,19 @@ class IndexPage(BasePage):
video.title = self.parser.select(div, 'h3 a', 1).text video.title = self.parser.select(div, 'h3 a', 1).text
video.author = self.parser.select(div, 'div.dmpi_user_login', 1).find('a').text video.author = self.parser.select(div, 'div.dmpi_user_login', 1).find('a').text
video.description = html2text(self.parser.tostring(self.parser.select(div, 'div.dmpi_video_description', 1))).strip() video.description = html2text(self.parser.tostring(self.parser.select(div, 'div.dmpi_video_description', 1))).strip()
try:
parts = self.parser.select(div, 'div.duration', 1).text.split(':') parts = self.parser.select(div, 'div.duration', 1).text.split(':')
if len(parts) < 2: except BrokenPageError:
# it's probably a live, np.
video.duration = NotAvailable
else:
if len(parts) == 1:
seconds = parts[0] seconds = parts[0]
hours = minutes = 0 hours = minutes = 0
elif len(parts) < 3: elif len(parts) == 2:
minutes, seconds = parts minutes, seconds = parts
hours = 0 hours = 0
elif len(parts) < 4: elif len(parts) == 3:
hours, minutes, seconds = parts hours, minutes, seconds = parts
else: else:
raise BrokenPageError('Unable to parse duration %r' % self.parser.select(div, 'div.duration', 1).text) raise BrokenPageError('Unable to parse duration %r' % self.parser.select(div, 'div.duration', 1).text)
@ -68,6 +74,7 @@ class IndexPage(BasePage):
video.rating_max = self.get_rate(rating_div) video.rating_max = self.get_rate(rating_div)
video.rating = self.get_rate(rating_div.find('div')) video.rating = self.get_rate(rating_div.find('div'))
# XXX missing date # XXX missing date
video.date = NotAvailable
yield video yield video
def get_rate(self, div): def get_rate(self, div):