Better id and some improvment
Signed-off-by: Vicnet <vo.publique@gmail.com>
This commit is contained in:
parent
4584384db7
commit
8f09828ebf
3 changed files with 138 additions and 95 deletions
|
|
@ -28,6 +28,7 @@ from .browser import LaCentraleBrowser
|
|||
__all__ = ['LaCentraleBackend']
|
||||
|
||||
|
||||
# I implement capability
|
||||
class LaCentraleBackend(BaseBackend, ICapPriceComparison):
|
||||
NAME = 'lacentrale'
|
||||
MAINTAINER = u'Vicnet'
|
||||
|
|
@ -66,29 +67,35 @@ class LaCentraleBackend(BaseBackend, ICapPriceComparison):
|
|||
for product in self.browser.iter_products(criteria):
|
||||
yield product
|
||||
|
||||
# inherited from ICapPriceComparison
|
||||
def iter_prices(self, product):
|
||||
# inherited from ICapPriceComparison
|
||||
with self.browser:
|
||||
return self.browser.iter_prices(product)
|
||||
|
||||
# def get_price(self, id):
|
||||
# inherited from ICapPriceComparison
|
||||
# with self.browser:
|
||||
# if isinstance(id, Price):
|
||||
# price = id
|
||||
# else:
|
||||
# p_id, s_id = id.split('.', 2)
|
||||
# product = Product(p_id)
|
||||
# for price in self.iter_prices(product):
|
||||
# if price.id == id:
|
||||
# break
|
||||
# else:
|
||||
# return None
|
||||
# inherited from ICapPriceComparison
|
||||
def get_price(self, id):
|
||||
# id is a url code part for one car page
|
||||
with self.browser:
|
||||
return self.browser.get_price(id)
|
||||
## inherited from ICapPriceComparison
|
||||
#with self.browser:
|
||||
#if isinstance(id, Price):
|
||||
#print "get_price by price", id
|
||||
#price = id
|
||||
#else:
|
||||
##p_id, s_id = id.split('.', 2)
|
||||
##product = Product(p_id)
|
||||
##for price in self.iter_prices(product):
|
||||
##if price.id == id:
|
||||
##break
|
||||
##else:
|
||||
#return None
|
||||
##price.shop.info = self.browser.get_shop_info(price.id.split('.', 2)[-1])
|
||||
#return price
|
||||
|
||||
# price.shop.info = self.browser.get_shop_info(price.id.split('.', 2)[-1])
|
||||
# return price
|
||||
#def fill_price(self, price, fields):
|
||||
#print "VO lacentrale fill_price", price
|
||||
#return self.get_price(price)
|
||||
|
||||
# def fill_price(self, price, fields):
|
||||
# return self.get_price(price)
|
||||
|
||||
# OBJECTS = {Price: fill_price, }
|
||||
#OBJECTS = {Price: fill_price, }
|
||||
|
|
|
|||
|
|
@ -23,12 +23,13 @@ import re
|
|||
|
||||
from weboob.tools.browser import BaseBrowser
|
||||
|
||||
from .pages import MainPage, ListingAutoPage
|
||||
from .pages import MainPage, ListingAutoPage, AnnoncePage
|
||||
|
||||
|
||||
__all__ = ['LaCentraleBrowser']
|
||||
|
||||
|
||||
# I manage urls and page location, then trasnfert to page
|
||||
class LaCentraleBrowser(BaseBrowser):
|
||||
PROTOCOL = 'http'
|
||||
DOMAIN = 'www.lacentrale.fr'
|
||||
|
|
@ -36,6 +37,7 @@ class LaCentraleBrowser(BaseBrowser):
|
|||
PAGES = {
|
||||
'http://www.lacentrale.fr/': MainPage,
|
||||
'http://www.lacentrale.fr/listing_auto.php?.*': ListingAutoPage,
|
||||
'http://www.lacentrale.fr/auto-occasion-annonce-.*': AnnoncePage,
|
||||
}
|
||||
|
||||
def iter_products(self, criteria):
|
||||
|
|
@ -44,20 +46,22 @@ class LaCentraleBrowser(BaseBrowser):
|
|||
assert self.is_on_page(MainPage)
|
||||
return self.page.iter_products(criteria)
|
||||
|
||||
def buildUrl(self, product, request, criteria):
|
||||
def _buildUrl(self, product, request, criteria):
|
||||
if product._criteria.has_key(criteria):
|
||||
return '&' + request.format(product._criteria.get(criteria))
|
||||
return ''
|
||||
|
||||
def iter_prices(self, product):
|
||||
# convert product criteria to url encoding
|
||||
if not self.is_on_page(ListingAutoPage):
|
||||
#TODO use urllib.urlencode(data) ?
|
||||
url = '/listing_auto.php?num=1&witchSearch=0'
|
||||
url += self.buildUrl(product, 'Citadine={}','urban')
|
||||
url += self.buildUrl(product, 'prix_maxi={}','maxprice')
|
||||
url += self.buildUrl(product, 'km_maxi={}','maxdist')
|
||||
url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors')
|
||||
url += self.buildUrl(product, 'cp={}','dept')
|
||||
url += self.buildUrl(product, 'origin={}','origin')
|
||||
url += self._buildUrl(product, 'Citadine={}','urban')
|
||||
url += self._buildUrl(product, 'prix_maxi={}','maxprice')
|
||||
url += self._buildUrl(product, 'km_maxi={}','maxdist')
|
||||
url += self._buildUrl(product, 'nbportes=%3D{}','nbdoors')
|
||||
url += self._buildUrl(product, 'cp={}','dept')
|
||||
url += self._buildUrl(product, 'origine={}','origin')
|
||||
#print url
|
||||
self.location(url)
|
||||
|
||||
|
|
@ -77,23 +81,8 @@ class LaCentraleBrowser(BaseBrowser):
|
|||
self.location(url)
|
||||
assert self.is_on_page(ListingAutoPage)
|
||||
|
||||
# def iter_prices(self, zipcode, product):
|
||||
# data = {'aff_param_0_0': '',
|
||||
# 'aff_param_0_1': 'les points de vente',
|
||||
# 'aff_param_0_3': zipcode,
|
||||
# 'changeNbPerPage': 'off',
|
||||
# 'toDelete': -1,
|
||||
# }
|
||||
# self.location('/index.php?module=dbgestion&action=search', urllib.urlencode(data))
|
||||
#
|
||||
# assert self.is_on_page(ComparisonResultsPage)
|
||||
# return self.page.iter_results(product)
|
||||
#
|
||||
# def get_shop_info(self, id):
|
||||
# data = {'pdv_id': id,
|
||||
# 'module': 'dbgestion',
|
||||
# 'action': 'getPopupInfo'}
|
||||
# self.location('/index.php?module=dbgestion&action=getPopupInfo', urllib.urlencode(data))
|
||||
#
|
||||
# assert self.is_on_page(ShopInfoPage)
|
||||
# return self.page.get_info()
|
||||
def get_price(self, id):
|
||||
#/auto-occasion-annonce-23440064.html
|
||||
self.location('/auto-occasion-annonce-'+id+'.html')
|
||||
assert self.is_on_page(AnnoncePage)
|
||||
return self.page.get_price(id)
|
||||
|
|
|
|||
|
|
@ -22,13 +22,14 @@
|
|||
#import re
|
||||
|
||||
from weboob.tools.browser import BasePage, BrokenPageError
|
||||
from weboob.capabilities import NotAvailable
|
||||
from weboob.capabilities import NotAvailable, NotLoaded
|
||||
from weboob.capabilities.pricecomparison import Product, Price, Shop
|
||||
import re
|
||||
from decimal import Decimal
|
||||
|
||||
__all__ = ['MainPage','ListingAutoPage']
|
||||
|
||||
# I manage main page, ie do nothing yet
|
||||
class MainPage(BasePage):
|
||||
def iter_products(self, criteria):
|
||||
product = Product(1)
|
||||
|
|
@ -38,6 +39,23 @@ class MainPage(BasePage):
|
|||
product._criteria = criteria
|
||||
yield product
|
||||
|
||||
def get_decimal(s):
|
||||
return re.findall(r'\d+', s.replace(' ',''))[0]
|
||||
|
||||
def new_price(id, product, cost, title):
|
||||
price = Price(id)
|
||||
price.product = product
|
||||
price.cost = Decimal(get_decimal(cost))
|
||||
price.currency = u'€'
|
||||
price.message = unicode(title)
|
||||
price.set_empty_fields(NotAvailable)
|
||||
|
||||
price.shop = Shop(price.id)
|
||||
price.shop.set_empty_fields(NotAvailable) # NotLoaded
|
||||
|
||||
return price
|
||||
|
||||
# I manage listing page and extract information
|
||||
class ListingAutoPage(BasePage):
|
||||
|
||||
def _extract(self, tr, name):
|
||||
|
|
@ -47,9 +65,17 @@ class ListingAutoPage(BasePage):
|
|||
return ''
|
||||
return td[-1].text_content().strip()
|
||||
|
||||
def _extract_id(self, tr):
|
||||
tdas = tr.cssselect('td.lcbrand a')
|
||||
if tdas is None or len(tdas)==0: return None
|
||||
tda = tdas[0]
|
||||
m = re.search('annonce-(\d+)\.html', tda.get('href'))
|
||||
if not m: return None
|
||||
return m.group(1)
|
||||
|
||||
def iter_prices(self, product, numpage):
|
||||
for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id],tr.lclineJ[id]'):
|
||||
id = '{numpage}.{id}'.format(numpage=numpage, id=tr.attrib['id'][3:])
|
||||
for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id],tr.lclineJ[id],tr.lclineB[id]'):
|
||||
id = self._extract_id(tr)
|
||||
title = self._extract(tr, 'lcbrand')
|
||||
if not title:
|
||||
continue
|
||||
|
|
@ -62,16 +88,7 @@ class ListingAutoPage(BasePage):
|
|||
|
||||
cost = ', ' + self._extract(tr, 'lcprice')
|
||||
|
||||
price = Price(id)
|
||||
price.product = product
|
||||
price.cost = Decimal(re.findall(r'\d+',cost.replace(' ',''))[0])
|
||||
price.currency = u'€'
|
||||
price.message = unicode(title)
|
||||
price.shop = Shop(price.id)
|
||||
price.shop.set_empty_fields(NotAvailable)
|
||||
|
||||
price.set_empty_fields(NotAvailable)
|
||||
yield price
|
||||
yield new_price(id, product, cost, title)
|
||||
|
||||
def get_next(self):
|
||||
for a in self.document.getroot().cssselect('a.page'):
|
||||
|
|
@ -83,38 +100,68 @@ class ListingAutoPage(BasePage):
|
|||
return int(m.group(1))
|
||||
return None
|
||||
|
||||
#class ComparisonResultsPage(BasePage):
|
||||
#def get_product_name(self):
|
||||
#try:
|
||||
#div = self.parser.select(self.document.getroot(), 'div#moins_plus_ariane', 1)
|
||||
#except BrokenPageError:
|
||||
#return NotAvailable
|
||||
#else:
|
||||
#m = re.match('Carburant : ([\w\-]+) | .*', div.text)
|
||||
#return m.group(1)
|
||||
# I manage one car page (annonce) )and extract information
|
||||
class AnnoncePage(BasePage):
|
||||
|
||||
#def iter_results(self, product=None):
|
||||
#price = None
|
||||
#product.name = self.get_product_name()
|
||||
#for tr in self.document.getroot().cssselect('table#tab_resultat tr'):
|
||||
#if tr.attrib.get('id', '').startswith('pdv'):
|
||||
#price = Price('%s.%s' % (product.id, tr.attrib['id'][3:]))
|
||||
def _extract(self, e, name):
|
||||
'Extract content from li element with class name'
|
||||
li = e.cssselect('li.' + name)
|
||||
if not li:
|
||||
return ''
|
||||
return li[0].text_content().strip()
|
||||
|
||||
#price.product = product
|
||||
def _extract_info(self, e, name):
|
||||
'Extract content from InfoLib'
|
||||
for td in e.cssselect('td.InfoLib'):
|
||||
if name in td.text_content():
|
||||
ntd = td.getnext()
|
||||
if ntd is None: continue
|
||||
return ntd.text_content().strip()
|
||||
return None
|
||||
|
||||
#tds = tr.findall('td')
|
||||
#price.cost = Decimal(tds[4].text.replace(',', '.'))
|
||||
#price.currency = u'€'
|
||||
def _extract_vendor(self, e, name):
|
||||
'Extract content from VendorLib'
|
||||
for span in e.cssselect('span.VendeurLib'):
|
||||
if name in span.text_content():
|
||||
li = span.getparent()
|
||||
if li is None: continue
|
||||
# get all text
|
||||
s = li.text_content()
|
||||
# get text without header
|
||||
s = s[len(span.text_content())+1:]
|
||||
# special case for not pro
|
||||
if '\n' in s:
|
||||
s = s[:s.find('\n')]
|
||||
return s.strip()
|
||||
return None
|
||||
|
||||
#shop = Shop(price.id)
|
||||
#shop.name = unicode(tds[2].text.strip())
|
||||
#shop.location = unicode(tds[0].text.strip())
|
||||
def get_shop(self, id):
|
||||
shop = Shop(id)
|
||||
for e in self.document.getroot().cssselect('div#Vendeur'):
|
||||
shop.name = self._extract_vendor(e,'Nom') + '(' + self._extract_vendor(e,'Vendeur') + ')'
|
||||
shop.location = ''
|
||||
for adr in self.document.getroot().cssselect('span#AdresseL1,span#AdresseL2'):
|
||||
if shop.location:
|
||||
shop.location += ', '
|
||||
shop.location += adr.text_content().strip()
|
||||
for tel in self.document.getroot().cssselect('span.Tel'):
|
||||
s = tel.text_content().strip()
|
||||
if shop.location:
|
||||
shop.location += ', '
|
||||
shop.location += re.sub('\s+', ' ', s)
|
||||
shop.set_empty_fields(NotAvailable)
|
||||
return shop
|
||||
|
||||
#price.shop = shop
|
||||
#price.set_empty_fields(NotAvailable)
|
||||
#yield price
|
||||
|
||||
|
||||
#class ShopInfoPage(BasePage):
|
||||
#def get_info(self):
|
||||
#return self.parser.tostring(self.parser.select(self.document.getroot(), 'div.colg', 1))
|
||||
def get_price(self, id):
|
||||
for e in self.document.getroot().cssselect('div#DescBar'):
|
||||
product = Product(1)
|
||||
product.name = unicode('Occasion')
|
||||
cost = self._extract(e,'PriceLc')
|
||||
title = self._extract(e,'BrandLc')
|
||||
title += ', ' + self._extract(e,'modeleCom')
|
||||
title += ', ' + self._extract_info(e,'Version')
|
||||
title += ', ' + self._extract_info(e,'Ann')
|
||||
title += ', ' + get_decimal(self._extract_info(e,'Kilom')) + 'km'
|
||||
price = new_price(id, product, cost, title)
|
||||
price.shop = self.get_shop(id)
|
||||
return price
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue