Pep8 cleaning

This commit is contained in:
Florent 2014-04-08 10:55:42 +02:00
commit 3e9c168158
4 changed files with 58 additions and 29 deletions

View file

@ -41,7 +41,7 @@ class LaCentraleBackend(BaseBackend, ICapPriceComparison):
# inherited from ICapPriceComparison # inherited from ICapPriceComparison
def search_products(self, patternString=None): def search_products(self, patternString=None):
# convert pattern to criteria # convert pattern to criteria
criteria = { } criteria = {}
patterns = [] patterns = []
if patternString: if patternString:
patterns = patternString.split(',') patterns = patternString.split(',')
@ -56,7 +56,7 @@ class LaCentraleBackend(BaseBackend, ICapPriceComparison):
if u'cit' in pattern: if u'cit' in pattern:
criteria['urban'] = 'citadine&SS_CATEGORIE=40' criteria['urban'] = 'citadine&SS_CATEGORIE=40'
if u'dep' in pattern: if u'dep' in pattern:
criteria['dept'] = re.findall('\d+',pattern)[0] criteria['dept'] = re.findall('\d+', pattern)[0]
if u'pro' in pattern: if u'pro' in pattern:
criteria['origin'] = 1 criteria['origin'] = 1
if u'part' in pattern: if u'part' in pattern:

View file

@ -33,8 +33,7 @@ class LaCentraleBrowser(BaseBrowser):
PROTOCOL = 'http' PROTOCOL = 'http'
DOMAIN = 'www.lacentrale.fr' DOMAIN = 'www.lacentrale.fr'
ENCODING = 'windows-1252' ENCODING = 'windows-1252'
PAGES = { PAGES = {'http://www.lacentrale.fr/': MainPage,
'http://www.lacentrale.fr/': MainPage,
'http://www.lacentrale.fr/listing_auto.php?.*': ListingAutoPage, 'http://www.lacentrale.fr/listing_auto.php?.*': ListingAutoPage,
'http://www.lacentrale.fr/auto-occasion-annonce-.*': AnnoncePage, 'http://www.lacentrale.fr/auto-occasion-annonce-.*': AnnoncePage,
} }
@ -55,12 +54,12 @@ class LaCentraleBrowser(BaseBrowser):
if not self.is_on_page(ListingAutoPage): if not self.is_on_page(ListingAutoPage):
#TODO use urllib.urlencode(data) ? #TODO use urllib.urlencode(data) ?
url = '/listing_auto.php?num=1&witchSearch=0' url = '/listing_auto.php?num=1&witchSearch=0'
url += self._buildUrl(product, 'Citadine={}','urban') url += self._buildUrl(product, 'Citadine={}', 'urban')
url += self._buildUrl(product, 'prix_maxi={}','maxprice') url += self._buildUrl(product, 'prix_maxi={}', 'maxprice')
url += self._buildUrl(product, 'km_maxi={}','maxdist') url += self._buildUrl(product, 'km_maxi={}', 'maxdist')
url += self._buildUrl(product, 'nbportes=%3D{}','nbdoors') url += self._buildUrl(product, 'nbportes=%3D{}', 'nbdoors')
url += self._buildUrl(product, 'cp={}','dept') url += self._buildUrl(product, 'cp={}', 'dept')
url += self._buildUrl(product, 'origine={}','origin') url += self._buildUrl(product, 'origine={}', 'origin')
#print url #print url
self.location(url) self.location(url)
@ -76,7 +75,7 @@ class LaCentraleBrowser(BaseBrowser):
numpage = self.page.get_next() numpage = self.page.get_next()
if not numpage: if not numpage:
break break
url = re.sub('num=(\d+)','num={}'.format(numpage),url) url = re.sub('num=(\d+)', 'num={}'.format(numpage), url)
self.location(url) self.location(url)
assert self.is_on_page(ListingAutoPage) assert self.is_on_page(ListingAutoPage)

View file

@ -27,6 +27,7 @@ from weboob.capabilities.pricecomparison import Product, Price, Shop
__all__ = ['MainPage', 'ListingAutoPage', 'AnnoncePage'] __all__ = ['MainPage', 'ListingAutoPage', 'AnnoncePage']
# I manage main page, ie do nothing yet # I manage main page, ie do nothing yet
class MainPage(BasePage): class MainPage(BasePage):
def iter_products(self, criteria): def iter_products(self, criteria):
@ -37,14 +38,17 @@ class MainPage(BasePage):
product._criteria = criteria product._criteria = criteria
yield product yield product
def get_decimal(s): def get_decimal(s):
return re.findall(r'\d+', s.replace(' ',''))[0] return re.findall(r'\d+', s.replace(' ', ''))[0]
def new_shop(id): def new_shop(id):
shop = Shop(id) shop = Shop(id)
shop.set_empty_fields(NotLoaded) shop.set_empty_fields(NotLoaded)
return shop return shop
def new_price(id, product, cost, title): def new_price(id, product, cost, title):
price = Price(id) price = Price(id)
price.product = product price.product = product
@ -55,6 +59,7 @@ def new_price(id, product, cost, title):
price.shop = new_shop(id) price.shop = new_shop(id)
return price return price
# I manage listing page and extract information # I manage listing page and extract information
class ListingAutoPage(BasePage): class ListingAutoPage(BasePage):
@ -67,10 +72,12 @@ class ListingAutoPage(BasePage):
def _extract_id(self, tr): def _extract_id(self, tr):
tdas = tr.cssselect('td.lcbrand a') tdas = tr.cssselect('td.lcbrand a')
if tdas is None or len(tdas)==0: return None if tdas is None or len(tdas) == 0:
return None
tda = tdas[0] tda = tdas[0]
m = re.search('annonce-(\d+)\.html', tda.get('href')) m = re.search('annonce-(\d+)\.html', tda.get('href'))
if not m: return None if not m:
return None
return m.group(1) return m.group(1)
def iter_prices(self, product, numpage): def iter_prices(self, product, numpage):
@ -84,7 +91,7 @@ class ListingAutoPage(BasePage):
title += ', ' + self._extract(ntr, 'lcversion') title += ', ' + self._extract(ntr, 'lcversion')
title += ', ' + self._extract(tr, 'lcyear') title += ', ' + self._extract(tr, 'lcyear')
dist = self._extract(tr, 'lcmileage') + 'km' dist = self._extract(tr, 'lcmileage') + 'km'
title += ', ' + dist.replace(' ','') title += ', ' + dist.replace(' ', '')
cost = ', ' + self._extract(tr, 'lcprice') cost = ', ' + self._extract(tr, 'lcprice')
@ -93,13 +100,14 @@ class ListingAutoPage(BasePage):
def get_next(self): def get_next(self):
for a in self.document.getroot().cssselect('a.page'): for a in self.document.getroot().cssselect('a.page'):
s = a.getprevious() s = a.getprevious()
if s is not None and s.tag=='span': if s is not None and s.tag == 'span':
m = re.search('num=(\d+)', a.get('href')) m = re.search('num=(\d+)', a.get('href'))
if not m: if not m:
return None return None
return int(m.group(1)) return int(m.group(1))
return None return None
# I manage one car page (annonce) )and extract information # I manage one car page (annonce) )and extract information
class AnnoncePage(BasePage): class AnnoncePage(BasePage):
@ -115,7 +123,8 @@ class AnnoncePage(BasePage):
for td in e.cssselect('td.InfoLib'): for td in e.cssselect('td.InfoLib'):
if name in td.text_content(): if name in td.text_content():
ntd = td.getnext() ntd = td.getnext()
if ntd is None: continue if ntd is None:
continue
return ntd.text_content().strip() return ntd.text_content().strip()
return None return None
@ -124,7 +133,8 @@ class AnnoncePage(BasePage):
for span in e.cssselect('span.VendeurLib'): for span in e.cssselect('span.VendeurLib'):
if name in span.text_content(): if name in span.text_content():
li = span.getparent() li = span.getparent()
if li is None: continue if li is None:
continue
# get all text # get all text
s = li.text_content() s = li.text_content()
# get text without header # get text without header
@ -138,7 +148,7 @@ class AnnoncePage(BasePage):
def get_shop(self, id): def get_shop(self, id):
shop = Shop(id) shop = Shop(id)
for e in self.document.getroot().cssselect('div#Vendeur'): for e in self.document.getroot().cssselect('div#Vendeur'):
shop.name = self._extract_vendor(e,'Nom') + '(' + self._extract_vendor(e,'Vendeur') + ')' shop.name = self._extract_vendor(e, 'Nom') + '(' + self._extract_vendor(e, 'Vendeur') + ')'
shop.location = '' shop.location = ''
for adr in self.document.getroot().cssselect('span#AdresseL1,span#AdresseL2'): for adr in self.document.getroot().cssselect('span#AdresseL1,span#AdresseL2'):
if shop.location: if shop.location:
@ -156,12 +166,12 @@ class AnnoncePage(BasePage):
for e in self.document.getroot().cssselect('div#DescBar'): for e in self.document.getroot().cssselect('div#DescBar'):
product = Product(1) product = Product(1)
product.name = unicode('Occasion') product.name = unicode('Occasion')
cost = self._extract(e,'PriceLc') cost = self._extract(e, 'PriceLc')
title = self._extract(e,'BrandLc') title = self._extract(e, 'BrandLc')
title += ', ' + self._extract(e,'modeleCom') title += ', ' + self._extract(e, 'modeleCom')
title += ', ' + self._extract_info(e,'Version') title += ', ' + self._extract_info(e, 'Version')
title += ', ' + self._extract_info(e,'Ann') title += ', ' + self._extract_info(e, 'Ann')
title += ', ' + get_decimal(self._extract_info(e,'Kilom')) + 'km' title += ', ' + get_decimal(self._extract_info(e, 'Kilom')) + 'km'
price = new_price(id, product, cost, title) price = new_price(id, product, cost, title)
price.shop = self.get_shop(id) price.shop = self.get_shop(id)
return price return price

View file

@ -1,8 +1,28 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2014 Vicnet
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
__all__ = ['LaCentraleTest'] __all__ = ['LaCentraleTest']
class LaCentraleTest(BackendTest): class LaCentraleTest(BackendTest):
BACKEND = 'lacentrale' BACKEND = 'lacentrale'