From 03cc113851f4dde0b9e89fc970d2433848edc512 Mon Sep 17 00:00:00 2001 From: Vicnet Date: Tue, 18 Feb 2014 11:11:12 +0100 Subject: [PATCH] First implementation of lacentrale Signed-off-by: Vicnet --- modules/lacentrale/__init__.py | 3 + modules/lacentrale/backend.py | 92 ++++++++++++++++++++++++++++ modules/lacentrale/browser.py | 86 ++++++++++++++++++++++++++ modules/lacentrale/pages.py | 106 +++++++++++++++++++++++++++++++++ 4 files changed, 287 insertions(+) create mode 100644 modules/lacentrale/__init__.py create mode 100644 modules/lacentrale/backend.py create mode 100644 modules/lacentrale/browser.py create mode 100644 modules/lacentrale/pages.py diff --git a/modules/lacentrale/__init__.py b/modules/lacentrale/__init__.py new file mode 100644 index 00000000..a0da6f3c --- /dev/null +++ b/modules/lacentrale/__init__.py @@ -0,0 +1,3 @@ +from .backend import LaCentraleBackend + +__all__ = ['LaCentraleBackend'] diff --git a/modules/lacentrale/backend.py b/modules/lacentrale/backend.py new file mode 100644 index 00000000..66a40d7b --- /dev/null +++ b/modules/lacentrale/backend.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import re +from weboob.capabilities.pricecomparison import ICapPriceComparison, Price, Product +from weboob.tools.backend import BaseBackend, BackendConfig +#from weboob.tools.value import Value + +from .browser import LaCentraleBrowser + + +__all__ = ['LaCentraleBackend'] + + +class LaCentraleBackend(BaseBackend, ICapPriceComparison): + NAME = 'lacentrale' + MAINTAINER = u'Vicnet' + EMAIL = 'vicnet@weboob.org' + VERSION = '0.h' + DESCRIPTION = 'Vehicule prices at LaCentrale.fr' + LICENSE = 'AGPLv3+' +# CONFIG = BackendConfig(Value('zipcode', label='Zipcode', regexp='\d+')) + BROWSER = LaCentraleBrowser + + # inherited from ICapPriceComparison + def search_products(self, patterns=None): + # convert pattern to criteria + criteria = { } + for pattern in patterns.split(','): + pattern = pattern.lower() + if u'€' in pattern: + criteria['maxprice'] = pattern[:pattern.find(u'€')].strip() + if u'km' in pattern: + criteria['maxdist'] = pattern[:pattern.find(u'km')].strip() + if u'p' in pattern[-1]: # last char = p + criteria['nbdoors'] = pattern[:pattern.find(u'p')].strip() + if u'cit' in pattern: + criteria['urban'] = 'citadine&SS_CATEGORIE=40' + if u'dep' in pattern: + criteria['dept'] = re.findall('\d+',pattern)[0] + if u'pro' in pattern: + criteria['origin'] = 1 + if u'part' in pattern: + criteria['origin'] = 0 + #print criteria + # browse product + with self.browser: + for product in self.browser.iter_products(criteria): + yield product + + def iter_prices(self, product): + # inherited from ICapPriceComparison + with self.browser: + return self.browser.iter_prices(product) + +# def get_price(self, id): + # inherited from ICapPriceComparison +# with self.browser: +# if isinstance(id, Price): +# price = id +# else: +# p_id, s_id = id.split('.', 2) +# product = Product(p_id) +# for price in self.iter_prices(product): +# if price.id == id: +# break +# else: +# return None + +# price.shop.info = self.browser.get_shop_info(price.id.split('.', 2)[-1]) +# return price + + # def fill_price(self, price, fields): + # return self.get_price(price) + + # OBJECTS = {Price: fill_price, } diff --git a/modules/lacentrale/browser.py b/modules/lacentrale/browser.py new file mode 100644 index 00000000..ede62fbf --- /dev/null +++ b/modules/lacentrale/browser.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import urllib + +from weboob.tools.browser import BaseBrowser + +from .pages import MainPage, ListingAutoPage + + +__all__ = ['LaCentraleBrowser'] + + +class LaCentraleBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'www.lacentrale.fr' + ENCODING = 'windows-1252' + PAGES = { + 'http://www.lacentrale.fr/': MainPage, + 'http://www.lacentrale.fr/listing_auto.php?.*': ListingAutoPage, + } +#http://www.lacentrale.fr/listing_auto.php?witchSearch=0&SS_CATEGORIE=40&mo_comm=&Citadine=citadine=&km_maxi=120000&annee2=&conso=&co2=&opt=&version=&transmission=&couleur=&nbportes=%3D5&photo=&new_annonce=&cp=31&origine=1 + + def iter_products(self, criteria): + if not self.is_on_page(MainPage): + self.location('/') + assert self.is_on_page(MainPage) + return self.page.iter_products(criteria) + + def buildUrl(self, product, request, criteria): + if product._criteria.has_key(criteria): + return '&' + request.format(product._criteria.get(criteria)) + return '' + + def iter_prices(self, product): + if not self.is_on_page(ListingAutoPage): + url = '/listing_auto.php?witchSearch=0' + url += self.buildUrl(product, 'Citadine={}','urban') + url += self.buildUrl(product, 'prix_maxi={}','maxprice') + url += self.buildUrl(product, 'km_maxi={}','maxdist') + url += self.buildUrl(product, 'nbportes=%3D{}','nbdoors') + url += self.buildUrl(product, 'cp={}','dept') + url += self.buildUrl(product, 'origin={}','origin') + print url + self.location(url) + + assert self.is_on_page(ListingAutoPage) + return self.page.iter_prices() + +# def iter_prices(self, zipcode, product): +# data = {'aff_param_0_0': '', +# 'aff_param_0_1': 'les points de vente', +# 'aff_param_0_3': zipcode, +# 'changeNbPerPage': 'off', +# 'toDelete': -1, +# } +# self.location('/index.php?module=dbgestion&action=search', urllib.urlencode(data)) +# +# assert self.is_on_page(ComparisonResultsPage) +# return self.page.iter_results(product) +# +# def get_shop_info(self, id): +# data = {'pdv_id': id, +# 'module': 'dbgestion', +# 'action': 'getPopupInfo'} +# self.location('/index.php?module=dbgestion&action=getPopupInfo', urllib.urlencode(data)) +# +# assert self.is_on_page(ShopInfoPage) +# return self.page.get_info() diff --git a/modules/lacentrale/pages.py b/modules/lacentrale/pages.py new file mode 100644 index 00000000..1bf79db9 --- /dev/null +++ b/modules/lacentrale/pages.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +#from decimal import Decimal +#import re + +from weboob.tools.browser import BasePage, BrokenPageError +from weboob.capabilities import NotAvailable +from weboob.capabilities.pricecomparison import Product, Price, Shop +import re + +__all__ = ['MainPage','ListingAutoPage'] + +class MainPage(BasePage): + def iter_products(self, criteria): + product = Product(1) + # TODO check if criteria exists in main page + # and get the GET keyword to fill request ? + product.name = unicode('Occasion') + product._criteria = criteria + yield product + +class ListingAutoPage(BasePage): + def _extract(self, tr, name): + 'Extract content from td element with class name' + td = tr.cssselect('td.' + name + ' a') + if not td: + return '' + return td[-1].text_content().strip() + + def iter_prices(self): + for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id]'): + id = tr.attrib['id'][3:] + title = self._extract(tr, 'lcbrand') + if not title: + continue + title += ', ' + self._extract(tr, 'lcmodel') + ntr = tr.getnext() + title += ', ' + self._extract(ntr, 'lcversion') + title += ', ' + self._extract(tr, 'lcyear') + dist = self._extract(tr, 'lcmileage') + 'km' + title += ', ' + dist.replace(' ','') + + cost = ', ' + self._extract(tr, 'lcprice') + + price = Price(id) + price.cost = int(re.findall(r'\d+',cost.replace(' ',''))[0]) + price.currency = u'€' + price.message = unicode(title) + + price.set_empty_fields(NotAvailable) + yield price + + +#class ComparisonResultsPage(BasePage): + #def get_product_name(self): + #try: + #div = self.parser.select(self.document.getroot(), 'div#moins_plus_ariane', 1) + #except BrokenPageError: + #return NotAvailable + #else: + #m = re.match('Carburant : ([\w\-]+) | .*', div.text) + #return m.group(1) + + #def iter_results(self, product=None): + #price = None + #product.name = self.get_product_name() + #for tr in self.document.getroot().cssselect('table#tab_resultat tr'): + #if tr.attrib.get('id', '').startswith('pdv'): + #price = Price('%s.%s' % (product.id, tr.attrib['id'][3:])) + + #price.product = product + + #tds = tr.findall('td') + #price.cost = Decimal(tds[4].text.replace(',', '.')) + #price.currency = u'€' + + #shop = Shop(price.id) + #shop.name = unicode(tds[2].text.strip()) + #shop.location = unicode(tds[0].text.strip()) + + #price.shop = shop + #price.set_empty_fields(NotAvailable) + #yield price + + +#class ShopInfoPage(BasePage): + #def get_info(self): + #return self.parser.tostring(self.parser.select(self.document.getroot(), 'div.colg', 1))