First implementation of lacentrale
Signed-off-by: Vicnet <vo.publique@gmail.com>
This commit is contained in:
parent
ec07532a63
commit
03cc113851
4 changed files with 287 additions and 0 deletions
3
modules/lacentrale/__init__.py
Normal file
3
modules/lacentrale/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .backend import LaCentraleBackend
|
||||
|
||||
__all__ = ['LaCentraleBackend']
|
||||
92
modules/lacentrale/backend.py
Normal file
92
modules/lacentrale/backend.py
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2012 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import re
|
||||
from weboob.capabilities.pricecomparison import ICapPriceComparison, Price, Product
|
||||
from weboob.tools.backend import BaseBackend, BackendConfig
|
||||
#from weboob.tools.value import Value
|
||||
|
||||
from .browser import LaCentraleBrowser
|
||||
|
||||
|
||||
__all__ = ['LaCentraleBackend']
|
||||
|
||||
|
||||
class LaCentraleBackend(BaseBackend, ICapPriceComparison):
|
||||
NAME = 'lacentrale'
|
||||
MAINTAINER = u'Vicnet'
|
||||
EMAIL = 'vicnet@weboob.org'
|
||||
VERSION = '0.h'
|
||||
DESCRIPTION = 'Vehicule prices at LaCentrale.fr'
|
||||
LICENSE = 'AGPLv3+'
|
||||
# CONFIG = BackendConfig(Value('zipcode', label='Zipcode', regexp='\d+'))
|
||||
BROWSER = LaCentraleBrowser
|
||||
|
||||
# inherited from ICapPriceComparison
|
||||
def search_products(self, patterns=None):
|
||||
# convert pattern to criteria
|
||||
criteria = { }
|
||||
for pattern in patterns.split(','):
|
||||
pattern = pattern.lower()
|
||||
if u'€' in pattern:
|
||||
criteria['maxprice'] = pattern[:pattern.find(u'€')].strip()
|
||||
if u'km' in pattern:
|
||||
criteria['maxdist'] = pattern[:pattern.find(u'km')].strip()
|
||||
if u'p' in pattern[-1]: # last char = p
|
||||
criteria['nbdoors'] = pattern[:pattern.find(u'p')].strip()
|
||||
if u'cit' in pattern:
|
||||
criteria['urban'] = 'citadine&SS_CATEGORIE=40'
|
||||
if u'dep' in pattern:
|
||||
criteria['dept'] = re.findall('\d+',pattern)[0]
|
||||
if u'pro' in pattern:
|
||||
criteria['origin'] = 1
|
||||
if u'part' in pattern:
|
||||
criteria['origin'] = 0
|
||||
#print criteria
|
||||
# browse product
|
||||
with self.browser:
|
||||
for product in self.browser.iter_products(criteria):
|
||||
yield product
|
||||
|
||||
def iter_prices(self, product):
|
||||
# inherited from ICapPriceComparison
|
||||
with self.browser:
|
||||
return self.browser.iter_prices(product)
|
||||
|
||||
# def get_price(self, id):
|
||||
# inherited from ICapPriceComparison
|
||||
# with self.browser:
|
||||
# if isinstance(id, Price):
|
||||
# price = id
|
||||
# else:
|
||||
# p_id, s_id = id.split('.', 2)
|
||||
# product = Product(p_id)
|
||||
# for price in self.iter_prices(product):
|
||||
# if price.id == id:
|
||||
# break
|
||||
# else:
|
||||
# return None
|
||||
|
||||
# price.shop.info = self.browser.get_shop_info(price.id.split('.', 2)[-1])
|
||||
# return price
|
||||
|
||||
# def fill_price(self, price, fields):
|
||||
# return self.get_price(price)
|
||||
|
||||
# OBJECTS = {Price: fill_price, }
|
||||
86
modules/lacentrale/browser.py
Normal file
86
modules/lacentrale/browser.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2012 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
import urllib
|
||||
|
||||
from weboob.tools.browser import BaseBrowser
|
||||
|
||||
from .pages import MainPage, ListingAutoPage
|
||||
|
||||
|
||||
__all__ = ['LaCentraleBrowser']
|
||||
|
||||
|
||||
class LaCentraleBrowser(BaseBrowser):
|
||||
PROTOCOL = 'http'
|
||||
DOMAIN = 'www.lacentrale.fr'
|
||||
ENCODING = 'windows-1252'
|
||||
PAGES = {
|
||||
'http://www.lacentrale.fr/': MainPage,
|
||||
'http://www.lacentrale.fr/listing_auto.php?.*': ListingAutoPage,
|
||||
}
|
||||
#http://www.lacentrale.fr/listing_auto.php?witchSearch=0&SS_CATEGORIE=40&mo_comm=&Citadine=citadine=&km_maxi=120000&annee2=&conso=&co2=&opt=&version=&transmission=&couleur=&nbportes=%3D5&photo=&new_annonce=&cp=31&origine=1
|
||||
|
||||
def iter_products(self, criteria):
|
||||
if not self.is_on_page(MainPage):
|
||||
self.location('/')
|
||||
assert self.is_on_page(MainPage)
|
||||
return self.page.iter_products(criteria)
|
||||
|
||||
def buildUrl(self, product, request, criteria):
|
||||
if product._criteria.has_key(criteria):
|
||||
return '&' + request.format(product._criteria.get(criteria))
|
||||
return ''
|
||||
|
||||
def iter_prices(self, product):
|
||||
if not self.is_on_page(ListingAutoPage):
|
||||
url = '/listing_auto.php?witchSearch=0'
|
||||
url += self.buildUrl(product, 'Citadine={}','urban')
|
||||
url += self.buildUrl(product, 'prix_maxi={}','maxprice')
|
||||
url += self.buildUrl(product, 'km_maxi={}','maxdist')
|
||||
url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors')
|
||||
url += self.buildUrl(product, 'cp={}','dept')
|
||||
url += self.buildUrl(product, 'origin={}','origin')
|
||||
print url
|
||||
self.location(url)
|
||||
|
||||
assert self.is_on_page(ListingAutoPage)
|
||||
return self.page.iter_prices()
|
||||
|
||||
# def iter_prices(self, zipcode, product):
|
||||
# data = {'aff_param_0_0': '',
|
||||
# 'aff_param_0_1': 'les points de vente',
|
||||
# 'aff_param_0_3': zipcode,
|
||||
# 'changeNbPerPage': 'off',
|
||||
# 'toDelete': -1,
|
||||
# }
|
||||
# self.location('/index.php?module=dbgestion&action=search', urllib.urlencode(data))
|
||||
#
|
||||
# assert self.is_on_page(ComparisonResultsPage)
|
||||
# return self.page.iter_results(product)
|
||||
#
|
||||
# def get_shop_info(self, id):
|
||||
# data = {'pdv_id': id,
|
||||
# 'module': 'dbgestion',
|
||||
# 'action': 'getPopupInfo'}
|
||||
# self.location('/index.php?module=dbgestion&action=getPopupInfo', urllib.urlencode(data))
|
||||
#
|
||||
# assert self.is_on_page(ShopInfoPage)
|
||||
# return self.page.get_info()
|
||||
106
modules/lacentrale/pages.py
Normal file
106
modules/lacentrale/pages.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2012 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
#from decimal import Decimal
|
||||
#import re
|
||||
|
||||
from weboob.tools.browser import BasePage, BrokenPageError
|
||||
from weboob.capabilities import NotAvailable
|
||||
from weboob.capabilities.pricecomparison import Product, Price, Shop
|
||||
import re
|
||||
|
||||
__all__ = ['MainPage','ListingAutoPage']
|
||||
|
||||
class MainPage(BasePage):
|
||||
def iter_products(self, criteria):
|
||||
product = Product(1)
|
||||
# TODO check if criteria exists in main page
|
||||
# and get the GET keyword to fill request ?
|
||||
product.name = unicode('Occasion')
|
||||
product._criteria = criteria
|
||||
yield product
|
||||
|
||||
class ListingAutoPage(BasePage):
|
||||
def _extract(self, tr, name):
|
||||
'Extract content from td element with class name'
|
||||
td = tr.cssselect('td.' + name + ' a')
|
||||
if not td:
|
||||
return ''
|
||||
return td[-1].text_content().strip()
|
||||
|
||||
def iter_prices(self):
|
||||
for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id]'):
|
||||
id = tr.attrib['id'][3:]
|
||||
title = self._extract(tr, 'lcbrand')
|
||||
if not title:
|
||||
continue
|
||||
title += ', ' + self._extract(tr, 'lcmodel')
|
||||
ntr = tr.getnext()
|
||||
title += ', ' + self._extract(ntr, 'lcversion')
|
||||
title += ', ' + self._extract(tr, 'lcyear')
|
||||
dist = self._extract(tr, 'lcmileage') + 'km'
|
||||
title += ', ' + dist.replace(' ','')
|
||||
|
||||
cost = ', ' + self._extract(tr, 'lcprice')
|
||||
|
||||
price = Price(id)
|
||||
price.cost = int(re.findall(r'\d+',cost.replace(' ',''))[0])
|
||||
price.currency = u'€'
|
||||
price.message = unicode(title)
|
||||
|
||||
price.set_empty_fields(NotAvailable)
|
||||
yield price
|
||||
|
||||
|
||||
#class ComparisonResultsPage(BasePage):
|
||||
#def get_product_name(self):
|
||||
#try:
|
||||
#div = self.parser.select(self.document.getroot(), 'div#moins_plus_ariane', 1)
|
||||
#except BrokenPageError:
|
||||
#return NotAvailable
|
||||
#else:
|
||||
#m = re.match('Carburant : ([\w\-]+) | .*', div.text)
|
||||
#return m.group(1)
|
||||
|
||||
#def iter_results(self, product=None):
|
||||
#price = None
|
||||
#product.name = self.get_product_name()
|
||||
#for tr in self.document.getroot().cssselect('table#tab_resultat tr'):
|
||||
#if tr.attrib.get('id', '').startswith('pdv'):
|
||||
#price = Price('%s.%s' % (product.id, tr.attrib['id'][3:]))
|
||||
|
||||
#price.product = product
|
||||
|
||||
#tds = tr.findall('td')
|
||||
#price.cost = Decimal(tds[4].text.replace(',', '.'))
|
||||
#price.currency = u'€'
|
||||
|
||||
#shop = Shop(price.id)
|
||||
#shop.name = unicode(tds[2].text.strip())
|
||||
#shop.location = unicode(tds[0].text.strip())
|
||||
|
||||
#price.shop = shop
|
||||
#price.set_empty_fields(NotAvailable)
|
||||
#yield price
|
||||
|
||||
|
||||
#class ShopInfoPage(BasePage):
|
||||
#def get_info(self):
|
||||
#return self.parser.tostring(self.parser.select(self.document.getroot(), 'div.colg', 1))
|
||||
Loading…
Add table
Add a link
Reference in a new issue