Support pagination

Signed-off-by: Vicnet <vo.publique@gmail.com>
This commit is contained in:
Vicnet 2014-02-19 14:04:53 +01:00 committed by Florent
commit 39ada3289f
2 changed files with 32 additions and 7 deletions

View file

@ -19,6 +19,7 @@
import urllib
import re
from weboob.tools.browser import BaseBrowser
@ -51,18 +52,31 @@ class LaCentraleBrowser(BaseBrowser):
def iter_prices(self, product):
if not self.is_on_page(ListingAutoPage):
url = '/listing_auto.php?witchSearch=0'
url = '/listing_auto.php?num=1&witchSearch=0'
url += self.buildUrl(product, 'Citadine={}','urban')
url += self.buildUrl(product, 'prix_maxi={}','maxprice')
url += self.buildUrl(product, 'km_maxi={}','maxdist')
url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors')
url += self.buildUrl(product, 'cp={}','dept')
url += self.buildUrl(product, 'origin={}','origin')
print url
#print url
self.location(url)
assert self.is_on_page(ListingAutoPage)
return self.page.iter_prices()
numpage = 1
while True:
# parse the current page
for price in self.page.iter_prices(numpage):
yield price
# check if next page
numpage = self.page.get_next()
if not numpage:
break
url = re.sub('num=(\d+)','num={}'.format(numpage),url)
self.location(url)
assert self.is_on_page(ListingAutoPage)
# def iter_prices(self, zipcode, product):
# data = {'aff_param_0_0': '',

View file

@ -25,6 +25,7 @@ from weboob.tools.browser import BasePage, BrokenPageError
from weboob.capabilities import NotAvailable
from weboob.capabilities.pricecomparison import Product, Price, Shop
import re
from decimal import Decimal
__all__ = ['MainPage','ListingAutoPage']
@ -38,6 +39,7 @@ class MainPage(BasePage):
yield product
class ListingAutoPage(BasePage):
def _extract(self, tr, name):
'Extract content from td element with class name'
td = tr.cssselect('td.' + name + ' a')
@ -45,9 +47,9 @@ class ListingAutoPage(BasePage):
return ''
return td[-1].text_content().strip()
def iter_prices(self):
for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id]'):
id = tr.attrib['id'][3:]
def iter_prices(self, numpage):
for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id],tr.lclineJ[id]'):
id = '{numpage}.{id}'.format(numpage=numpage, id=tr.attrib['id'][3:])
title = self._extract(tr, 'lcbrand')
if not title:
continue
@ -61,13 +63,22 @@ class ListingAutoPage(BasePage):
cost = ', ' + self._extract(tr, 'lcprice')
price = Price(id)
price.cost = int(re.findall(r'\d+',cost.replace(' ',''))[0])
price.cost = Decimal(re.findall(r'\d+',cost.replace(' ',''))[0])
price.currency = u''
price.message = unicode(title)
price.set_empty_fields(NotAvailable)
yield price
def get_next(self):
for a in self.document.getroot().cssselect('a.page'):
s = a.getprevious()
if s is not None and s.tag=='span':
m = re.search('num=(\d+)', a.get('href'))
if not m:
return None
return int(m.group(1))
return None
#class ComparisonResultsPage(BasePage):
#def get_product_name(self):