fix parsing in pages

This commit is contained in:
Romain Bignon 2012-10-27 10:31:48 +02:00
commit ad99fef5c2
2 changed files with 6 additions and 6 deletions

View file

@ -33,7 +33,7 @@ __all__ = ['IndexPage']
class IndexPage(PornPage): class IndexPage(PornPage):
def iter_videos(self): def iter_videos(self):
for li in self.document.getroot().xpath('//ul/li[@class="videoBox"]'): for li in self.document.getroot().xpath('//ul/li[@class="videoBox"]'):
a = li.find('a') a = li.find('div').find('a')
if a is None or a.find('img') is None: if a is None or a.find('img') is None:
continue continue
@ -50,7 +50,7 @@ class IndexPage(PornPage):
video.thumbnail = Thumbnail(unicode(thumbnail_url)) video.thumbnail = Thumbnail(unicode(thumbnail_url))
hours = minutes = seconds = 0 hours = minutes = seconds = 0
div = li.cssselect('h2.duration') div = li.cssselect('div.duration')
if len(div) > 0: if len(div) > 0:
pack = [int(s) for s in div[0].text.strip().split(':')] pack = [int(s) for s in div[0].text.strip().split(':')]
if len(pack) == 3: if len(pack) == 3:
@ -60,7 +60,7 @@ class IndexPage(PornPage):
video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds) video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
div = li.cssselect('div.rating h2') div = li.cssselect('div.rating')
if div: if div:
video.rating = int(div[0].text.strip('% ')) video.rating = int(div[0].text.strip('% '))
video.rating_max = 100 video.rating_max = 100

View file

@ -84,8 +84,8 @@ class VideoPage(PornPage):
else: else:
v.author = unicode(author.text) v.author = unicode(author.text)
elif name == 'Rating:': elif name == 'Rating:':
r = value.split() value = li.find('span').text
v.rating = int(r[0].rstrip('%')) v.rating = int(value.rstrip('%'))
v.rating_max = 100 v.rating_max = 100
elif name == 'Date:': elif name == 'Date:':
v.date = parse_dt(value) v.date = parse_dt(value)