Support pagination

Signed-off-by: Vicnet <vo.publique@gmail.com>
2014-02-19 14:04:53 +01:00 · 2014-02-19 14:04:53 +01:00 · 39ada3289f
commit 39ada3289f
parent 03cc113851
2 changed files with 32 additions and 7 deletions
--- a/modules/lacentrale/browser.py
+++ b/modules/lacentrale/browser.py
@ -19,6 +19,7 @@
 import urllib
 import re
 from weboob.tools.browser import BaseBrowser
@ -51,18 +52,31 @@ class LaCentraleBrowser(BaseBrowser):
    def iter_prices(self, product):
        if not self.is_on_page(ListingAutoPage):
-            url = '/listing_auto.php?witchSearch=0'
+            url = '/listing_auto.php?num=1&witchSearch=0'
            url += self.buildUrl(product, 'Citadine={}','urban')
            url += self.buildUrl(product, 'prix_maxi={}','maxprice')
            url += self.buildUrl(product, 'km_maxi={}','maxdist')
            url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors')
            url += self.buildUrl(product, 'cp={}','dept')
            url += self.buildUrl(product, 'origin={}','origin')
-            print url
+            #print url
            self.location(url)
        assert self.is_on_page(ListingAutoPage)
-        return self.page.iter_prices()
+
        numpage = 1
        while True:
            # parse the current page
            for price in self.page.iter_prices(numpage):
                yield price
            # check if next page
            numpage = self.page.get_next()
            if not numpage:
                break
            url = re.sub('num=(\d+)','num={}'.format(numpage),url)
            self.location(url)
            assert self.is_on_page(ListingAutoPage)
 #    def iter_prices(self, zipcode, product):
 #        data = {'aff_param_0_0':            '',
--- a/modules/lacentrale/pages.py
+++ b/modules/lacentrale/pages.py
@ -25,6 +25,7 @@ from weboob.tools.browser import BasePage, BrokenPageError
 from weboob.capabilities import NotAvailable
 from weboob.capabilities.pricecomparison import Product, Price, Shop
 import re
 from decimal import Decimal
 __all__ = ['MainPage','ListingAutoPage']
@ -38,6 +39,7 @@ class MainPage(BasePage):
        yield product
 class ListingAutoPage(BasePage):
    def _extract(self, tr, name):
        'Extract content from td element with class name'
        td = tr.cssselect('td.' + name + ' a')
@ -45,9 +47,9 @@ class ListingAutoPage(BasePage):
            return ''
        return td[-1].text_content().strip()
-    def iter_prices(self):
+    def iter_prices(self, numpage):
-        for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id]'):
+        for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id],tr.lclineJ[id]'):
-            id = tr.attrib['id'][3:]
+            id = '{numpage}.{id}'.format(numpage=numpage, id=tr.attrib['id'][3:])
            title = self._extract(tr, 'lcbrand')
            if not title:
                continue
@ -61,13 +63,22 @@ class ListingAutoPage(BasePage):
            cost = ', ' + self._extract(tr, 'lcprice')
            price = Price(id)
-            price.cost = int(re.findall(r'\d+',cost.replace(' ',''))[0])
+            price.cost = Decimal(re.findall(r'\d+',cost.replace(' ',''))[0])
            price.currency = u'€'
            price.message = unicode(title)
            price.set_empty_fields(NotAvailable)
            yield price
    def get_next(self):
        for a in self.document.getroot().cssselect('a.page'):
            s = a.getprevious()
            if s is not None and s.tag=='span':
                m = re.search('num=(\d+)', a.get('href'))
                if not m:
                    return None
                return int(m.group(1))
        return None
 #class ComparisonResultsPage(BasePage):
    #def get_product_name(self):