weboob.tools.browser -> weboob.deprecated.browser weboob.tools.parsers -> weboob.deprecated.browser.parsers weboob.tools.mech -> weboob.deprecated.mech weboob.browser2 -> weboob.browser weboob.core.exceptions -> weboob.exceptions Also, the new tree for browser2 is: weboob.browser: import weboob.browser.browsers.* and weboob.browser.url.* weboob.browser.browsers: all browsers (including PagesBrowser and LoginBrowser) weboob.browser.url: the URL class weboob.browser.profiles: all Profile classes weboob.browser.sessions: WeboobSession and FuturesSession weboob.browser.cookies: that's a cookies thing weboob.browser.pages: all Page and derivated classes, and Form class weboob.browser.exceptions: specific browser exceptions weboob.browser.elements: AbstractElement classes, and 'method' decorator weboob.browser.filters.*: all filters
86 lines
3 KiB
Python
86 lines
3 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright(C) 2014 Vicnet
|
|
#
|
|
# This file is part of weboob.
|
|
#
|
|
# weboob is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# weboob is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
import re
|
|
|
|
from weboob.deprecated.browser import Browser
|
|
|
|
from .pages import MainPage, ListingAutoPage, AnnoncePage
|
|
|
|
|
|
__all__ = ['LaCentraleBrowser']
|
|
|
|
|
|
# I manage urls and page location, then trasnfert to page
|
|
class LaCentraleBrowser(Browser):
|
|
PROTOCOL = 'http'
|
|
DOMAIN = 'www.lacentrale.fr'
|
|
ENCODING = 'windows-1252'
|
|
PAGES = {'http://www.lacentrale.fr/': MainPage,
|
|
'http://www.lacentrale.fr/listing_auto.php?.*': ListingAutoPage,
|
|
'http://www.lacentrale.fr/auto-occasion-annonce-.*': AnnoncePage,
|
|
}
|
|
|
|
def iter_products(self, criteria):
|
|
if not self.is_on_page(MainPage):
|
|
self.location('/')
|
|
assert self.is_on_page(MainPage)
|
|
return self.page.iter_products(criteria)
|
|
|
|
def _buildUrl(self, product, request, criteria):
|
|
if criteria in product._criteria:
|
|
return '&' + request.format(product._criteria.get(criteria))
|
|
return ''
|
|
|
|
def iter_prices(self, product):
|
|
# convert product criteria to url encoding
|
|
if not self.is_on_page(ListingAutoPage):
|
|
#TODO use urllib.urlencode(data) ?
|
|
url = '/listing_auto.php?num=1&witchSearch=0'
|
|
url += self._buildUrl(product, 'Citadine={}', 'urban')
|
|
url += self._buildUrl(product, 'prix_maxi={}', 'maxprice')
|
|
url += self._buildUrl(product, 'km_maxi={}', 'maxdist')
|
|
url += self._buildUrl(product, 'nbportes=%3D{}', 'nbdoors')
|
|
url += self._buildUrl(product, 'cp={}', 'dept')
|
|
url += self._buildUrl(product, 'origine={}', 'origin')
|
|
#print url
|
|
self.location(url)
|
|
|
|
assert self.is_on_page(ListingAutoPage)
|
|
|
|
numpage = 1
|
|
while True:
|
|
# parse the current page
|
|
for price in self.page.iter_prices(product, numpage):
|
|
yield price
|
|
|
|
# check if next page
|
|
numpage = self.page.get_next()
|
|
if not numpage:
|
|
break
|
|
url = re.sub('num=(\d+)', 'num={}'.format(numpage), url)
|
|
self.location(url)
|
|
assert self.is_on_page(ListingAutoPage)
|
|
|
|
def get_price(self, id):
|
|
#/auto-occasion-annonce-23440064.html
|
|
self.location('/auto-occasion-annonce-'+id+'.html')
|
|
assert self.is_on_page(AnnoncePage)
|
|
return self.page.get_price(id)
|