[prixcarburant] fix site changes

This commit is contained in:
Bezleputh 2013-12-10 19:23:06 +01:00 committed by Romain Bignon
commit fe94887947
3 changed files with 39 additions and 59 deletions

View file

@ -34,8 +34,8 @@ class PrixCarburantsBackend(BaseBackend, ICapPriceComparison):
VERSION = '0.h' VERSION = '0.h'
DESCRIPTION = 'French governement website to compare fuel prices' DESCRIPTION = 'French governement website to compare fuel prices'
LICENSE = 'AGPLv3+' LICENSE = 'AGPLv3+'
CONFIG = BackendConfig(Value('zipcode', label='Zipcode', regexp='\d+'))
BROWSER = PrixCarburantsBrowser BROWSER = PrixCarburantsBrowser
CONFIG = BackendConfig(Value('zipcode', label='Zipcode', regexp='\d+'))
def search_products(self, pattern=None): def search_products(self, pattern=None):
with self.browser: with self.browser:
@ -66,5 +66,4 @@ class PrixCarburantsBackend(BaseBackend, ICapPriceComparison):
def fill_price(self, price, fields): def fill_price(self, price, fields):
return self.get_price(price) return self.get_price(price)
OBJECTS = {Price: fill_price, OBJECTS = {Price: fill_price, }
}

View file

@ -29,62 +29,43 @@ __all__ = ['PrixCarburantsBrowser']
class PrixCarburantsBrowser(BaseBrowser): class PrixCarburantsBrowser(BaseBrowser):
TOKEN = None
PROTOCOL = 'http' PROTOCOL = 'http'
DOMAIN = 'www.prix-carburants.economie.gouv.fr' DOMAIN = 'www.prix-carburants.economie.gouv.fr'
ENCODING = 'iso-8859-15' ENCODING = 'iso-8859-15'
PAGES = { PAGES = {
'http://www\.prix-carburants\.economie\.gouv\.fr/index\.php': IndexPage, 'http://www.prix-carburants.economie.gouv.fr': IndexPage,
'http://www\.prix-carburants\.economie\.gouv\.fr/index\.php\?module=dbgestion\&action=search': ComparisonResultsPage, 'http://www.prix-carburants.economie.gouv.fr/recherche/': ComparisonResultsPage,
'http://www\.prix-carburants\.economie\.gouv\.fr/index\.php\?module=dbgestion\&action=getPopupInfo': ShopInfoPage, 'http://www.prix-carburants.economie.gouv.fr/itineraire/infos/\d+': ShopInfoPage, }
}
def iter_products(self): def iter_products(self):
if not self.is_on_page(IndexPage): if not self.is_on_page(IndexPage):
self.location('/index.php') self.location("%s://%s" % (self.PROTOCOL, self.DOMAIN))
assert self.is_on_page(IndexPage) assert self.is_on_page(IndexPage)
return self.page.iter_products() return self.page.iter_products()
def iter_prices(self, zipcode, product): def get_token(self):
data = {'aff_param_0_0': '', if not self.is_on_page(IndexPage):
'aff_param_0_1': 'les points de vente', self.location("%s://%s" % (self.PROTOCOL, self.DOMAIN))
'aff_param_0_2': '',
'aff_param_0_3': zipcode,
'changeNbPerPage': 'off',
'col*param*pdv_brand': 'Marque',
'col*param*pdv_city': 'Commune',
'col*param*pdv_name': 'Nom du point de vente',
'col*param*pdv_pop': '',
'col*param*price_fuel_%s' % product.id: 'GPL',
'col*param*price_lmdate_%s' % product.id: 'Mise a jour GPL',
'critere_contrainte': 'letters',
'critere_info': 'pdv_city*0',
'critere_txt': '',
'flag_contrainte': 'off',
'index_contrainte': 0,
'modeaffichage': 'list',
'nb_search_per_page': 100,
'orderBy': 'price_fuel_%s' % product.id,
'orderType': 'ASC',
'req_param_0_0': '',
'req_param_0_1': 'pdv_zipcode',
'req_param_0_2': 'ILIKE',
'req_param_0_3': '%s%%' % zipcode,
'seeFuel': product.id,
'thisPageLetter': 'Tous',
'thisPageNumber': 1,
'toDelete': -1,
}
self.location('/index.php?module=dbgestion&action=search', urllib.urlencode(data))
assert self.is_on_page(IndexPage)
self.TOKEN = self.page.get_token()
def iter_prices(self, zipcode, product):
if self.TOKEN is None:
self.get_token()
data = {
'_recherche_recherchertype[localisation]': '%s' % zipcode,
'_recherche_recherchertype[choix_carbu]': '%s' % product.id,
'_recherche_recherchertype[_token]': '%s' % self.TOKEN, }
self.location('%s://%s' % (self.PROTOCOL, self.DOMAIN), urllib.urlencode(data))
assert self.is_on_page(ComparisonResultsPage) assert self.is_on_page(ComparisonResultsPage)
return self.page.iter_results(product) return self.page.iter_results(product)
def get_shop_info(self, id): def get_shop_info(self, id):
data = {'pdv_id': id, self.location('%s://%s/itineraire/infos/%s' % (self.PROTOCOL, self.DOMAIN, id))
'module': 'dbgestion',
'action': 'getPopupInfo'}
self.location('/index.php?module=dbgestion&action=getPopupInfo', urllib.urlencode(data))
assert self.is_on_page(ShopInfoPage) assert self.is_on_page(ShopInfoPage)
return self.page.get_info() return self.page.get_info()

View file

@ -19,9 +19,8 @@
from decimal import Decimal from decimal import Decimal
import re
from weboob.tools.browser import BasePage, BrokenPageError from weboob.tools.browser import BasePage
from weboob.capabilities import NotAvailable from weboob.capabilities import NotAvailable
from weboob.capabilities.pricecomparison import Product, Shop, Price from weboob.capabilities.pricecomparison import Product, Shop, Price
@ -30,6 +29,11 @@ __all__ = ['IndexPage', 'ComparisonResultsPage', 'ShopInfoPage']
class IndexPage(BasePage): class IndexPage(BasePage):
def get_token(self):
input = self.parser.select(self.document.getroot(), 'div#localisation input#recherche_recherchertype__token', 1)
return input.attrib['value']
def iter_products(self): def iter_products(self):
for li in self.parser.select(self.document.getroot(), 'div#choix_carbu ul li'): for li in self.parser.select(self.document.getroot(), 'div#choix_carbu ul li'):
input = li.find('input') input = li.find('input')
@ -47,30 +51,26 @@ class IndexPage(BasePage):
class ComparisonResultsPage(BasePage): class ComparisonResultsPage(BasePage):
def get_product_name(self): def get_product_name(self):
try: th = self.document.getroot().cssselect('table#tab_resultat tr th')
div = self.parser.select(self.document.getroot(), 'div#moins_plus_ariane', 1) if th and len(th) == 9:
except BrokenPageError: return u'%s' % th[5].find('a').text
return NotAvailable
else:
m = re.match('Carburant : ([\w\-]+) | .*', div.text)
return m.group(1)
def iter_results(self, product=None): def iter_results(self, product=None):
price = None price = None
product.name = self.get_product_name() product.name = self.get_product_name()
for tr in self.document.getroot().cssselect('table#tab_resultat tr'): for tr in self.document.getroot().cssselect('table#tab_resultat tr'):
if tr.attrib.get('id', '').startswith('pdv'): tds = self.parser.select(tr, 'td')
price = Price('%s.%s' % (product.id, tr.attrib['id'][3:])) if tds and len(tds) == 9 and product is not None:
price = Price('%s.%s' % (product.id, tr.attrib['id']))
price.product = product price.product = product
tds = tr.findall('td') price.cost = Decimal(tds[5].text.replace(',', '.'))
price.cost = Decimal(tds[4].text.replace(',', '.'))
price.currency = u'' price.currency = u''
shop = Shop(price.id) shop = Shop(price.id)
shop.name = unicode(tds[2].text.strip()) shop.name = unicode(tds[3].text.strip())
shop.location = unicode(tds[0].text.strip()) shop.location = unicode(tds[2].text.strip())
price.shop = shop price.shop = shop
price.set_empty_fields(NotAvailable) price.set_empty_fields(NotAvailable)
@ -79,4 +79,4 @@ class ComparisonResultsPage(BasePage):
class ShopInfoPage(BasePage): class ShopInfoPage(BasePage):
def get_info(self): def get_info(self):
return self.parser.tostring(self.parser.select(self.document.getroot(), 'div.colg', 1)) return self.parser.tostring(self.parser.select(self.document.getroot(), 'div.infos', 1))