diff --git a/modules/lacentrale/browser.py b/modules/lacentrale/browser.py index ede62fbf..cb64150a 100644 --- a/modules/lacentrale/browser.py +++ b/modules/lacentrale/browser.py @@ -19,6 +19,7 @@ import urllib +import re from weboob.tools.browser import BaseBrowser @@ -51,18 +52,31 @@ class LaCentraleBrowser(BaseBrowser): def iter_prices(self, product): if not self.is_on_page(ListingAutoPage): - url = '/listing_auto.php?witchSearch=0' + url = '/listing_auto.php?num=1&witchSearch=0' url += self.buildUrl(product, 'Citadine={}','urban') url += self.buildUrl(product, 'prix_maxi={}','maxprice') url += self.buildUrl(product, 'km_maxi={}','maxdist') url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors') url += self.buildUrl(product, 'cp={}','dept') url += self.buildUrl(product, 'origin={}','origin') - print url + #print url self.location(url) assert self.is_on_page(ListingAutoPage) - return self.page.iter_prices() + + numpage = 1 + while True: + # parse the current page + for price in self.page.iter_prices(numpage): + yield price + + # check if next page + numpage = self.page.get_next() + if not numpage: + break + url = re.sub('num=(\d+)','num={}'.format(numpage),url) + self.location(url) + assert self.is_on_page(ListingAutoPage) # def iter_prices(self, zipcode, product): # data = {'aff_param_0_0': '', diff --git a/modules/lacentrale/pages.py b/modules/lacentrale/pages.py index 1bf79db9..9ba94a7b 100644 --- a/modules/lacentrale/pages.py +++ b/modules/lacentrale/pages.py @@ -25,6 +25,7 @@ from weboob.tools.browser import BasePage, BrokenPageError from weboob.capabilities import NotAvailable from weboob.capabilities.pricecomparison import Product, Price, Shop import re +from decimal import Decimal __all__ = ['MainPage','ListingAutoPage'] @@ -38,6 +39,7 @@ class MainPage(BasePage): yield product class ListingAutoPage(BasePage): + def _extract(self, tr, name): 'Extract content from td element with class name' td = tr.cssselect('td.' + name + ' a') @@ -45,9 +47,9 @@ class ListingAutoPage(BasePage): return '' return td[-1].text_content().strip() - def iter_prices(self): - for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id]'): - id = tr.attrib['id'][3:] + def iter_prices(self, numpage): + for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id],tr.lclineJ[id]'): + id = '{numpage}.{id}'.format(numpage=numpage, id=tr.attrib['id'][3:]) title = self._extract(tr, 'lcbrand') if not title: continue @@ -61,13 +63,22 @@ class ListingAutoPage(BasePage): cost = ', ' + self._extract(tr, 'lcprice') price = Price(id) - price.cost = int(re.findall(r'\d+',cost.replace(' ',''))[0]) + price.cost = Decimal(re.findall(r'\d+',cost.replace(' ',''))[0]) price.currency = u'€' price.message = unicode(title) price.set_empty_fields(NotAvailable) yield price + def get_next(self): + for a in self.document.getroot().cssselect('a.page'): + s = a.getprevious() + if s is not None and s.tag=='span': + m = re.search('num=(\d+)', a.get('href')) + if not m: + return None + return int(m.group(1)) + return None #class ComparisonResultsPage(BasePage): #def get_product_name(self):