Support pagination

Signed-off-by: Vicnet <vo.publique@gmail.com>
2014-02-19 14:04:53 +01:00 · 2014-02-19 14:04:53 +01:00 · 39ada3289f
commit 39ada3289f
parent 03cc113851
2 changed files with 32 additions and 7 deletions
--- a/modules/lacentrale/browser.py
+++ b/modules/lacentrale/browser.py
@ -19,6 +19,7 @@


 import urllib
+import re

 from weboob.tools.browser import BaseBrowser

@ -51,18 +52,31 @@ class LaCentraleBrowser(BaseBrowser):

    def iter_prices(self, product):
        if not self.is_on_page(ListingAutoPage):
-            url = '/listing_auto.php?witchSearch=0'
+            url = '/listing_auto.php?num=1&witchSearch=0'
            url += self.buildUrl(product, 'Citadine={}','urban')
            url += self.buildUrl(product, 'prix_maxi={}','maxprice')
            url += self.buildUrl(product, 'km_maxi={}','maxdist')
            url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors')
            url += self.buildUrl(product, 'cp={}','dept')
            url += self.buildUrl(product, 'origin={}','origin')
-            print url
+            #print url
            self.location(url)

        assert self.is_on_page(ListingAutoPage)
-        return self.page.iter_prices()
+
+        numpage = 1
+        while True:
+            # parse the current page
+            for price in self.page.iter_prices(numpage):
+                yield price
+
+            # check if next page
+            numpage = self.page.get_next()
+            if not numpage:
+                break
+            url = re.sub('num=(\d+)','num={}'.format(numpage),url)
+            self.location(url)
+            assert self.is_on_page(ListingAutoPage)

 #    def iter_prices(self, zipcode, product):
 #        data = {'aff_param_0_0':            '',
--- a/modules/lacentrale/pages.py
+++ b/modules/lacentrale/pages.py
@ -25,6 +25,7 @@ from weboob.tools.browser import BasePage, BrokenPageError
 from weboob.capabilities import NotAvailable
 from weboob.capabilities.pricecomparison import Product, Price, Shop
 import re
+from decimal import Decimal

 __all__ = ['MainPage','ListingAutoPage']

@ -38,6 +39,7 @@ class MainPage(BasePage):
        yield product

 class ListingAutoPage(BasePage):
+
    def _extract(self, tr, name):
        'Extract content from td element with class name'
        td = tr.cssselect('td.' + name + ' a')
@ -45,9 +47,9 @@ class ListingAutoPage(BasePage):
            return ''
        return td[-1].text_content().strip()

-    def iter_prices(self):
-        for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id]'):
-            id = tr.attrib['id'][3:]
+    def iter_prices(self, numpage):
+        for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id],tr.lclineJ[id]'):
+            id = '{numpage}.{id}'.format(numpage=numpage, id=tr.attrib['id'][3:])
            title = self._extract(tr, 'lcbrand')
            if not title:
                continue
@ -61,13 +63,22 @@ class ListingAutoPage(BasePage):
            cost = ', ' + self._extract(tr, 'lcprice')

            price = Price(id)
-            price.cost = int(re.findall(r'\d+',cost.replace(' ',''))[0])
+            price.cost = Decimal(re.findall(r'\d+',cost.replace(' ',''))[0])
            price.currency = u'€'
            price.message = unicode(title)

            price.set_empty_fields(NotAvailable)
            yield price

+    def get_next(self):
+        for a in self.document.getroot().cssselect('a.page'):
+            s = a.getprevious()
+            if s is not None and s.tag=='span':
+                m = re.search('num=(\d+)', a.get('href'))
+                if not m:
+                    return None
+                return int(m.group(1))
+        return None

 #class ComparisonResultsPage(BasePage):
    #def get_product_name(self):