From 3e9c1681589ef3c8746590505b57f2d8bd3c2806 Mon Sep 17 00:00:00 2001 From: Florent Date: Tue, 8 Apr 2014 10:55:42 +0200 Subject: [PATCH] Pep8 cleaning --- modules/lacentrale/backend.py | 6 +++--- modules/lacentrale/browser.py | 23 ++++++++++----------- modules/lacentrale/pages.py | 38 ++++++++++++++++++++++------------- modules/lacentrale/test.py | 20 ++++++++++++++++++ 4 files changed, 58 insertions(+), 29 deletions(-) diff --git a/modules/lacentrale/backend.py b/modules/lacentrale/backend.py index 1829f750..fb22233a 100644 --- a/modules/lacentrale/backend.py +++ b/modules/lacentrale/backend.py @@ -41,7 +41,7 @@ class LaCentraleBackend(BaseBackend, ICapPriceComparison): # inherited from ICapPriceComparison def search_products(self, patternString=None): # convert pattern to criteria - criteria = { } + criteria = {} patterns = [] if patternString: patterns = patternString.split(',') @@ -51,12 +51,12 @@ class LaCentraleBackend(BaseBackend, ICapPriceComparison): criteria['maxprice'] = pattern[:pattern.find(u'€')].strip() if u'km' in pattern: criteria['maxdist'] = pattern[:pattern.find(u'km')].strip() - if u'p' in pattern[-1]: # last char = p + if u'p' in pattern[-1]: # last char = p criteria['nbdoors'] = pattern[:pattern.find(u'p')].strip() if u'cit' in pattern: criteria['urban'] = 'citadine&SS_CATEGORIE=40' if u'dep' in pattern: - criteria['dept'] = re.findall('\d+',pattern)[0] + criteria['dept'] = re.findall('\d+', pattern)[0] if u'pro' in pattern: criteria['origin'] = 1 if u'part' in pattern: diff --git a/modules/lacentrale/browser.py b/modules/lacentrale/browser.py index 58264550..0d6200c7 100644 --- a/modules/lacentrale/browser.py +++ b/modules/lacentrale/browser.py @@ -33,11 +33,10 @@ class LaCentraleBrowser(BaseBrowser): PROTOCOL = 'http' DOMAIN = 'www.lacentrale.fr' ENCODING = 'windows-1252' - PAGES = { - 'http://www.lacentrale.fr/': MainPage, - 'http://www.lacentrale.fr/listing_auto.php?.*': ListingAutoPage, - 'http://www.lacentrale.fr/auto-occasion-annonce-.*': AnnoncePage, - } + PAGES = {'http://www.lacentrale.fr/': MainPage, + 'http://www.lacentrale.fr/listing_auto.php?.*': ListingAutoPage, + 'http://www.lacentrale.fr/auto-occasion-annonce-.*': AnnoncePage, + } def iter_products(self, criteria): if not self.is_on_page(MainPage): @@ -55,12 +54,12 @@ class LaCentraleBrowser(BaseBrowser): if not self.is_on_page(ListingAutoPage): #TODO use urllib.urlencode(data) ? url = '/listing_auto.php?num=1&witchSearch=0' - url += self._buildUrl(product, 'Citadine={}','urban') - url += self._buildUrl(product, 'prix_maxi={}','maxprice') - url += self._buildUrl(product, 'km_maxi={}','maxdist') - url += self._buildUrl(product, 'nbportes=%3D{}','nbdoors') - url += self._buildUrl(product, 'cp={}','dept') - url += self._buildUrl(product, 'origine={}','origin') + url += self._buildUrl(product, 'Citadine={}', 'urban') + url += self._buildUrl(product, 'prix_maxi={}', 'maxprice') + url += self._buildUrl(product, 'km_maxi={}', 'maxdist') + url += self._buildUrl(product, 'nbportes=%3D{}', 'nbdoors') + url += self._buildUrl(product, 'cp={}', 'dept') + url += self._buildUrl(product, 'origine={}', 'origin') #print url self.location(url) @@ -76,7 +75,7 @@ class LaCentraleBrowser(BaseBrowser): numpage = self.page.get_next() if not numpage: break - url = re.sub('num=(\d+)','num={}'.format(numpage),url) + url = re.sub('num=(\d+)', 'num={}'.format(numpage), url) self.location(url) assert self.is_on_page(ListingAutoPage) diff --git a/modules/lacentrale/pages.py b/modules/lacentrale/pages.py index da2d2eff..2361492d 100644 --- a/modules/lacentrale/pages.py +++ b/modules/lacentrale/pages.py @@ -27,6 +27,7 @@ from weboob.capabilities.pricecomparison import Product, Price, Shop __all__ = ['MainPage', 'ListingAutoPage', 'AnnoncePage'] + # I manage main page, ie do nothing yet class MainPage(BasePage): def iter_products(self, criteria): @@ -37,14 +38,17 @@ class MainPage(BasePage): product._criteria = criteria yield product + def get_decimal(s): - return re.findall(r'\d+', s.replace(' ',''))[0] + return re.findall(r'\d+', s.replace(' ', ''))[0] + def new_shop(id): shop = Shop(id) shop.set_empty_fields(NotLoaded) return shop + def new_price(id, product, cost, title): price = Price(id) price.product = product @@ -55,6 +59,7 @@ def new_price(id, product, cost, title): price.shop = new_shop(id) return price + # I manage listing page and extract information class ListingAutoPage(BasePage): @@ -67,10 +72,12 @@ class ListingAutoPage(BasePage): def _extract_id(self, tr): tdas = tr.cssselect('td.lcbrand a') - if tdas is None or len(tdas)==0: return None + if tdas is None or len(tdas) == 0: + return None tda = tdas[0] m = re.search('annonce-(\d+)\.html', tda.get('href')) - if not m: return None + if not m: + return None return m.group(1) def iter_prices(self, product, numpage): @@ -84,7 +91,7 @@ class ListingAutoPage(BasePage): title += ', ' + self._extract(ntr, 'lcversion') title += ', ' + self._extract(tr, 'lcyear') dist = self._extract(tr, 'lcmileage') + 'km' - title += ', ' + dist.replace(' ','') + title += ', ' + dist.replace(' ', '') cost = ', ' + self._extract(tr, 'lcprice') @@ -93,13 +100,14 @@ class ListingAutoPage(BasePage): def get_next(self): for a in self.document.getroot().cssselect('a.page'): s = a.getprevious() - if s is not None and s.tag=='span': + if s is not None and s.tag == 'span': m = re.search('num=(\d+)', a.get('href')) if not m: return None return int(m.group(1)) return None + # I manage one car page (annonce) )and extract information class AnnoncePage(BasePage): @@ -115,7 +123,8 @@ class AnnoncePage(BasePage): for td in e.cssselect('td.InfoLib'): if name in td.text_content(): ntd = td.getnext() - if ntd is None: continue + if ntd is None: + continue return ntd.text_content().strip() return None @@ -124,7 +133,8 @@ class AnnoncePage(BasePage): for span in e.cssselect('span.VendeurLib'): if name in span.text_content(): li = span.getparent() - if li is None: continue + if li is None: + continue # get all text s = li.text_content() # get text without header @@ -138,7 +148,7 @@ class AnnoncePage(BasePage): def get_shop(self, id): shop = Shop(id) for e in self.document.getroot().cssselect('div#Vendeur'): - shop.name = self._extract_vendor(e,'Nom') + '(' + self._extract_vendor(e,'Vendeur') + ')' + shop.name = self._extract_vendor(e, 'Nom') + '(' + self._extract_vendor(e, 'Vendeur') + ')' shop.location = '' for adr in self.document.getroot().cssselect('span#AdresseL1,span#AdresseL2'): if shop.location: @@ -156,12 +166,12 @@ class AnnoncePage(BasePage): for e in self.document.getroot().cssselect('div#DescBar'): product = Product(1) product.name = unicode('Occasion') - cost = self._extract(e,'PriceLc') - title = self._extract(e,'BrandLc') - title += ', ' + self._extract(e,'modeleCom') - title += ', ' + self._extract_info(e,'Version') - title += ', ' + self._extract_info(e,'Ann') - title += ', ' + get_decimal(self._extract_info(e,'Kilom')) + 'km' + cost = self._extract(e, 'PriceLc') + title = self._extract(e, 'BrandLc') + title += ', ' + self._extract(e, 'modeleCom') + title += ', ' + self._extract_info(e, 'Version') + title += ', ' + self._extract_info(e, 'Ann') + title += ', ' + get_decimal(self._extract_info(e, 'Kilom')) + 'km' price = new_price(id, product, cost, title) price.shop = self.get_shop(id) return price diff --git a/modules/lacentrale/test.py b/modules/lacentrale/test.py index 2d779190..5e6270b3 100644 --- a/modules/lacentrale/test.py +++ b/modules/lacentrale/test.py @@ -1,8 +1,28 @@ # -*- coding: utf-8 -*- + +# Copyright(C) 2014 Vicnet +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + from weboob.tools.test import BackendTest __all__ = ['LaCentraleTest'] + class LaCentraleTest(BackendTest): BACKEND = 'lacentrale'