Fix parsing on german pages

This commit is contained in:
Florent Fourcot 2013-05-16 23:12:02 +02:00
commit ab5ee589d2

View file

@ -38,10 +38,10 @@ class IndexPage(BasePage):
videos = self.document.getroot().cssselect("div[class=video]") videos = self.document.getroot().cssselect("div[class=video]")
for div in videos: for div in videos:
title = div.find('h2').find('a').text title = div.find('h2').find('a').text
m = re.match(r'/fr/videos/(.*)\.html', div.find('h2').find('a').attrib['href']) m = re.match(r'/(fr|de)/videos/(.*)\.html', div.find('h2').find('a').attrib['href'])
_id = '' _id = ''
if m: if m:
_id = m.group(1) _id = m.group(2)
rating = rating_max = 0 rating = rating_max = 0
rates = self.parser.select(div, 'div[class=rateContainer]', 1) rates = self.parser.select(div, 'div[class=rateContainer]', 1)
for r in rates.findall('div'): for r in rates.findall('div'):