120 lines
4.2 KiB
Python
120 lines
4.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright(C) 2012 Romain Bignon
|
|
#
|
|
# This file is part of weboob.
|
|
#
|
|
# weboob is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# weboob is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
#from decimal import Decimal
|
|
#import re
|
|
|
|
from weboob.tools.browser import BasePage, BrokenPageError
|
|
from weboob.capabilities import NotAvailable
|
|
from weboob.capabilities.pricecomparison import Product, Price, Shop
|
|
import re
|
|
from decimal import Decimal
|
|
|
|
__all__ = ['MainPage','ListingAutoPage']
|
|
|
|
class MainPage(BasePage):
|
|
def iter_products(self, criteria):
|
|
product = Product(1)
|
|
# TODO check if criteria exists in main page
|
|
# and get the GET keyword to fill request ?
|
|
product.name = unicode('Occasion')
|
|
product._criteria = criteria
|
|
yield product
|
|
|
|
class ListingAutoPage(BasePage):
|
|
|
|
def _extract(self, tr, name):
|
|
'Extract content from td element with class name'
|
|
td = tr.cssselect('td.' + name + ' a')
|
|
if not td:
|
|
return ''
|
|
return td[-1].text_content().strip()
|
|
|
|
def iter_prices(self, product, numpage):
|
|
for tr in self.document.getroot().cssselect('tr.lcline[id],tr.lclineJB[id],tr.lclineJ[id]'):
|
|
id = '{numpage}.{id}'.format(numpage=numpage, id=tr.attrib['id'][3:])
|
|
title = self._extract(tr, 'lcbrand')
|
|
if not title:
|
|
continue
|
|
title += ', ' + self._extract(tr, 'lcmodel')
|
|
ntr = tr.getnext()
|
|
title += ', ' + self._extract(ntr, 'lcversion')
|
|
title += ', ' + self._extract(tr, 'lcyear')
|
|
dist = self._extract(tr, 'lcmileage') + 'km'
|
|
title += ', ' + dist.replace(' ','')
|
|
|
|
cost = ', ' + self._extract(tr, 'lcprice')
|
|
|
|
price = Price(id)
|
|
price.product = product
|
|
price.cost = Decimal(re.findall(r'\d+',cost.replace(' ',''))[0])
|
|
price.currency = u'€'
|
|
price.message = unicode(title)
|
|
price.shop = Shop(price.id)
|
|
price.shop.set_empty_fields(NotAvailable)
|
|
|
|
price.set_empty_fields(NotAvailable)
|
|
yield price
|
|
|
|
def get_next(self):
|
|
for a in self.document.getroot().cssselect('a.page'):
|
|
s = a.getprevious()
|
|
if s is not None and s.tag=='span':
|
|
m = re.search('num=(\d+)', a.get('href'))
|
|
if not m:
|
|
return None
|
|
return int(m.group(1))
|
|
return None
|
|
|
|
#class ComparisonResultsPage(BasePage):
|
|
#def get_product_name(self):
|
|
#try:
|
|
#div = self.parser.select(self.document.getroot(), 'div#moins_plus_ariane', 1)
|
|
#except BrokenPageError:
|
|
#return NotAvailable
|
|
#else:
|
|
#m = re.match('Carburant : ([\w\-]+) | .*', div.text)
|
|
#return m.group(1)
|
|
|
|
#def iter_results(self, product=None):
|
|
#price = None
|
|
#product.name = self.get_product_name()
|
|
#for tr in self.document.getroot().cssselect('table#tab_resultat tr'):
|
|
#if tr.attrib.get('id', '').startswith('pdv'):
|
|
#price = Price('%s.%s' % (product.id, tr.attrib['id'][3:]))
|
|
|
|
#price.product = product
|
|
|
|
#tds = tr.findall('td')
|
|
#price.cost = Decimal(tds[4].text.replace(',', '.'))
|
|
#price.currency = u'€'
|
|
|
|
#shop = Shop(price.id)
|
|
#shop.name = unicode(tds[2].text.strip())
|
|
#shop.location = unicode(tds[0].text.strip())
|
|
|
|
#price.shop = shop
|
|
#price.set_empty_fields(NotAvailable)
|
|
#yield price
|
|
|
|
|
|
#class ShopInfoPage(BasePage):
|
|
#def get_info(self):
|
|
#return self.parser.tostring(self.parser.select(self.document.getroot(), 'div.colg', 1))
|