First implementation of lacentrale

Signed-off-by: Vicnet <vo.publique@gmail.com>
This commit is contained in:
Vicnet 2014-02-18 11:11:12 +01:00 committed by Florent
commit 03cc113851
4 changed files with 287 additions and 0 deletions

View file

@ -0,0 +1,3 @@
from .backend import LaCentraleBackend
__all__ = ['LaCentraleBackend']

View file

@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
from weboob.capabilities.pricecomparison import ICapPriceComparison, Price, Product
from weboob.tools.backend import BaseBackend, BackendConfig
#from weboob.tools.value import Value
from .browser import LaCentraleBrowser
__all__ = ['LaCentraleBackend']
class LaCentraleBackend(BaseBackend, ICapPriceComparison):
NAME = 'lacentrale'
MAINTAINER = u'Vicnet'
EMAIL = 'vicnet@weboob.org'
VERSION = '0.h'
DESCRIPTION = 'Vehicule prices at LaCentrale.fr'
LICENSE = 'AGPLv3+'
# CONFIG = BackendConfig(Value('zipcode', label='Zipcode', regexp='\d+'))
BROWSER = LaCentraleBrowser
# inherited from ICapPriceComparison
def search_products(self, patterns=None):
# convert pattern to criteria
criteria = { }
for pattern in patterns.split(','):
pattern = pattern.lower()
if u'' in pattern:
criteria['maxprice'] = pattern[:pattern.find(u'')].strip()
if u'km' in pattern:
criteria['maxdist'] = pattern[:pattern.find(u'km')].strip()
if u'p' in pattern[-1]: # last char = p
criteria['nbdoors'] = pattern[:pattern.find(u'p')].strip()
if u'cit' in pattern:
criteria['urban'] = 'citadine&SS_CATEGORIE=40'
if u'dep' in pattern:
criteria['dept'] = re.findall('\d+',pattern)[0]
if u'pro' in pattern:
criteria['origin'] = 1
if u'part' in pattern:
criteria['origin'] = 0
#print criteria
# browse product
with self.browser:
for product in self.browser.iter_products(criteria):
yield product
def iter_prices(self, product):
# inherited from ICapPriceComparison
with self.browser:
return self.browser.iter_prices(product)
# def get_price(self, id):
# inherited from ICapPriceComparison
# with self.browser:
# if isinstance(id, Price):
# price = id
# else:
# p_id, s_id = id.split('.', 2)
# product = Product(p_id)
# for price in self.iter_prices(product):
# if price.id == id:
# break
# else:
# return None
# price.shop.info = self.browser.get_shop_info(price.id.split('.', 2)[-1])
# return price
# def fill_price(self, price, fields):
# return self.get_price(price)
# OBJECTS = {Price: fill_price, }

View file

@ -0,0 +1,86 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from weboob.tools.browser import BaseBrowser
from .pages import MainPage, ListingAutoPage
__all__ = ['LaCentraleBrowser']
class LaCentraleBrowser(BaseBrowser):
PROTOCOL = 'http'
DOMAIN = 'www.lacentrale.fr'
ENCODING = 'windows-1252'
PAGES = {
'http://www.lacentrale.fr/': MainPage,
'http://www.lacentrale.fr/listing_auto.php?.*': ListingAutoPage,
}
#http://www.lacentrale.fr/listing_auto.php?witchSearch=0&SS_CATEGORIE=40&mo_comm=&Citadine=citadine=&km_maxi=120000&annee2=&conso=&co2=&opt=&version=&transmission=&couleur=&nbportes=%3D5&photo=&new_annonce=&cp=31&origine=1
def iter_products(self, criteria):
if not self.is_on_page(MainPage):
self.location('/')
assert self.is_on_page(MainPage)
return self.page.iter_products(criteria)
def buildUrl(self, product, request, criteria):
if product._criteria.has_key(criteria):
return '&' + request.format(product._criteria.get(criteria))
return ''
def iter_prices(self, product):
if not self.is_on_page(ListingAutoPage):
url = '/listing_auto.php?witchSearch=0'
url += self.buildUrl(product, 'Citadine={}','urban')
url += self.buildUrl(product, 'prix_maxi={}','maxprice')
url += self.buildUrl(product, 'km_maxi={}','maxdist')
url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors')
url += self.buildUrl(product, 'cp={}','dept')
url += self.buildUrl(product, 'origin={}','origin')
print url
self.location(url)
assert self.is_on_page(ListingAutoPage)
return self.page.iter_prices()
# def iter_prices(self, zipcode, product):
# data = {'aff_param_0_0': '',
# 'aff_param_0_1': 'les points de vente',
# 'aff_param_0_3': zipcode,
# 'changeNbPerPage': 'off',
# 'toDelete': -1,
# }
# self.location('/index.php?module=dbgestion&action=search', urllib.urlencode(data))
#
# assert self.is_on_page(ComparisonResultsPage)
# return self.page.iter_results(product)
#
# def get_shop_info(self, id):
# data = {'pdv_id': id,
# 'module': 'dbgestion',
# 'action': 'getPopupInfo'}
# self.location('/index.php?module=dbgestion&action=getPopupInfo', urllib.urlencode(data))
#
# assert self.is_on_page(ShopInfoPage)
# return self.page.get_info()

106
modules/lacentrale/pages.py Normal file
View file

@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
#from decimal import Decimal
#import re
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.capabilities import NotAvailable
from weboob.capabilities.pricecomparison import Product, Price, Shop
import re
__all__ = ['MainPage','ListingAutoPage']
class MainPage(BasePage):
def iter_products(self, criteria):
product = Product(1)
# TODO check if criteria exists in main page
# and get the GET keyword to fill request ?
product.name = unicode('Occasion')
product._criteria = criteria
yield product
class ListingAutoPage(BasePage):
def _extract(self, tr, name):
'Extract content from td element with class name'
td = tr.cssselect('td.' + name + ' a')
if not td:
return ''
return td[-1].text_content().strip()
def iter_prices(self):
for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id]'):
id = tr.attrib['id'][3:]
title = self._extract(tr, 'lcbrand')
if not title:
continue
title += ', ' + self._extract(tr, 'lcmodel')
ntr = tr.getnext()
title += ', ' + self._extract(ntr, 'lcversion')
title += ', ' + self._extract(tr, 'lcyear')
dist = self._extract(tr, 'lcmileage') + 'km'
title += ', ' + dist.replace(' ','')
cost = ', ' + self._extract(tr, 'lcprice')
price = Price(id)
price.cost = int(re.findall(r'\d+',cost.replace(' ',''))[0])
price.currency = u''
price.message = unicode(title)
price.set_empty_fields(NotAvailable)
yield price
#class ComparisonResultsPage(BasePage):
#def get_product_name(self):
#try:
#div = self.parser.select(self.document.getroot(), 'div#moins_plus_ariane', 1)
#except BrokenPageError:
#return NotAvailable
#else:
#m = re.match('Carburant : ([\w\-]+) | .*', div.text)
#return m.group(1)
#def iter_results(self, product=None):
#price = None
#product.name = self.get_product_name()
#for tr in self.document.getroot().cssselect('table#tab_resultat tr'):
#if tr.attrib.get('id', '').startswith('pdv'):
#price = Price('%s.%s' % (product.id, tr.attrib['id'][3:]))
#price.product = product
#tds = tr.findall('td')
#price.cost = Decimal(tds[4].text.replace(',', '.'))
#price.currency = u'€'
#shop = Shop(price.id)
#shop.name = unicode(tds[2].text.strip())
#shop.location = unicode(tds[0].text.strip())
#price.shop = shop
#price.set_empty_fields(NotAvailable)
#yield price
#class ShopInfoPage(BasePage):
#def get_info(self):
#return self.parser.tostring(self.parser.select(self.document.getroot(), 'div.colg', 1))