Support pagination
Signed-off-by: Vicnet <vo.publique@gmail.com>
This commit is contained in:
parent
03cc113851
commit
39ada3289f
2 changed files with 32 additions and 7 deletions
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
|
|
||||||
import urllib
|
import urllib
|
||||||
|
import re
|
||||||
|
|
||||||
from weboob.tools.browser import BaseBrowser
|
from weboob.tools.browser import BaseBrowser
|
||||||
|
|
||||||
|
|
@ -51,18 +52,31 @@ class LaCentraleBrowser(BaseBrowser):
|
||||||
|
|
||||||
def iter_prices(self, product):
|
def iter_prices(self, product):
|
||||||
if not self.is_on_page(ListingAutoPage):
|
if not self.is_on_page(ListingAutoPage):
|
||||||
url = '/listing_auto.php?witchSearch=0'
|
url = '/listing_auto.php?num=1&witchSearch=0'
|
||||||
url += self.buildUrl(product, 'Citadine={}','urban')
|
url += self.buildUrl(product, 'Citadine={}','urban')
|
||||||
url += self.buildUrl(product, 'prix_maxi={}','maxprice')
|
url += self.buildUrl(product, 'prix_maxi={}','maxprice')
|
||||||
url += self.buildUrl(product, 'km_maxi={}','maxdist')
|
url += self.buildUrl(product, 'km_maxi={}','maxdist')
|
||||||
url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors')
|
url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors')
|
||||||
url += self.buildUrl(product, 'cp={}','dept')
|
url += self.buildUrl(product, 'cp={}','dept')
|
||||||
url += self.buildUrl(product, 'origin={}','origin')
|
url += self.buildUrl(product, 'origin={}','origin')
|
||||||
print url
|
#print url
|
||||||
self.location(url)
|
self.location(url)
|
||||||
|
|
||||||
assert self.is_on_page(ListingAutoPage)
|
assert self.is_on_page(ListingAutoPage)
|
||||||
return self.page.iter_prices()
|
|
||||||
|
numpage = 1
|
||||||
|
while True:
|
||||||
|
# parse the current page
|
||||||
|
for price in self.page.iter_prices(numpage):
|
||||||
|
yield price
|
||||||
|
|
||||||
|
# check if next page
|
||||||
|
numpage = self.page.get_next()
|
||||||
|
if not numpage:
|
||||||
|
break
|
||||||
|
url = re.sub('num=(\d+)','num={}'.format(numpage),url)
|
||||||
|
self.location(url)
|
||||||
|
assert self.is_on_page(ListingAutoPage)
|
||||||
|
|
||||||
# def iter_prices(self, zipcode, product):
|
# def iter_prices(self, zipcode, product):
|
||||||
# data = {'aff_param_0_0': '',
|
# data = {'aff_param_0_0': '',
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ from weboob.tools.browser import BasePage, BrokenPageError
|
||||||
from weboob.capabilities import NotAvailable
|
from weboob.capabilities import NotAvailable
|
||||||
from weboob.capabilities.pricecomparison import Product, Price, Shop
|
from weboob.capabilities.pricecomparison import Product, Price, Shop
|
||||||
import re
|
import re
|
||||||
|
from decimal import Decimal
|
||||||
|
|
||||||
__all__ = ['MainPage','ListingAutoPage']
|
__all__ = ['MainPage','ListingAutoPage']
|
||||||
|
|
||||||
|
|
@ -38,6 +39,7 @@ class MainPage(BasePage):
|
||||||
yield product
|
yield product
|
||||||
|
|
||||||
class ListingAutoPage(BasePage):
|
class ListingAutoPage(BasePage):
|
||||||
|
|
||||||
def _extract(self, tr, name):
|
def _extract(self, tr, name):
|
||||||
'Extract content from td element with class name'
|
'Extract content from td element with class name'
|
||||||
td = tr.cssselect('td.' + name + ' a')
|
td = tr.cssselect('td.' + name + ' a')
|
||||||
|
|
@ -45,9 +47,9 @@ class ListingAutoPage(BasePage):
|
||||||
return ''
|
return ''
|
||||||
return td[-1].text_content().strip()
|
return td[-1].text_content().strip()
|
||||||
|
|
||||||
def iter_prices(self):
|
def iter_prices(self, numpage):
|
||||||
for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id]'):
|
for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id],tr.lclineJ[id]'):
|
||||||
id = tr.attrib['id'][3:]
|
id = '{numpage}.{id}'.format(numpage=numpage, id=tr.attrib['id'][3:])
|
||||||
title = self._extract(tr, 'lcbrand')
|
title = self._extract(tr, 'lcbrand')
|
||||||
if not title:
|
if not title:
|
||||||
continue
|
continue
|
||||||
|
|
@ -61,13 +63,22 @@ class ListingAutoPage(BasePage):
|
||||||
cost = ', ' + self._extract(tr, 'lcprice')
|
cost = ', ' + self._extract(tr, 'lcprice')
|
||||||
|
|
||||||
price = Price(id)
|
price = Price(id)
|
||||||
price.cost = int(re.findall(r'\d+',cost.replace(' ',''))[0])
|
price.cost = Decimal(re.findall(r'\d+',cost.replace(' ',''))[0])
|
||||||
price.currency = u'€'
|
price.currency = u'€'
|
||||||
price.message = unicode(title)
|
price.message = unicode(title)
|
||||||
|
|
||||||
price.set_empty_fields(NotAvailable)
|
price.set_empty_fields(NotAvailable)
|
||||||
yield price
|
yield price
|
||||||
|
|
||||||
|
def get_next(self):
|
||||||
|
for a in self.document.getroot().cssselect('a.page'):
|
||||||
|
s = a.getprevious()
|
||||||
|
if s is not None and s.tag=='span':
|
||||||
|
m = re.search('num=(\d+)', a.get('href'))
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
return int(m.group(1))
|
||||||
|
return None
|
||||||
|
|
||||||
#class ComparisonResultsPage(BasePage):
|
#class ComparisonResultsPage(BasePage):
|
||||||
#def get_product_name(self):
|
#def get_product_name(self):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue