do not follow spam links

This commit is contained in:
Romain Bignon 2013-08-07 19:06:34 +02:00
commit e1eb1b80af
2 changed files with 10 additions and 9 deletions

View file

@ -140,8 +140,7 @@ class Cragr(BaseBrowser):
accounts_list.extend(self.page.get_list())
# credit cards
cards_page = self.page.cards_page()
if cards_page:
for cards_page in self.page.cards_pages():
self.location(cards_page)
assert self.is_on_page(CardsPage)
accounts_list.extend(self.page.get_list())

View file

@ -102,13 +102,15 @@ class _AccountsPage(BasePage):
yield account
def cards_page(self):
try:
return self.document.xpath('//table[@class="ca-table"]' +
'/tr[@class="ligne-connexe"]' +
'//a/@href')[0]
except IndexError:
pass
def cards_pages(self):
for line in self.document.xpath('//table[@class="ca-table"]/tr[@class="ligne-connexe"]'):
try:
link = line.xpath('.//a/@href')[0]
except IndexError:
pass
else:
if not link.startswith('javascript:'):
yield link
class CardsPage(BasePage):