[explorimo] New CapHousing module explorimmmo
This commit is contained in:
parent
2eb5507c3a
commit
015626b87e
5 changed files with 395 additions and 0 deletions
24
modules/explorimmo/__init__.py
Normal file
24
modules/explorimmo/__init__.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2014 Bezleputh
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from .module import ExplorimmoModule
|
||||
|
||||
|
||||
__all__ = ['ExplorimmoModule']
|
||||
79
modules/explorimmo/browser.py
Normal file
79
modules/explorimmo/browser.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2014 Bezleputh
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import urllib
|
||||
from weboob.browser import PagesBrowser, URL
|
||||
from weboob.capabilities.housing import Query
|
||||
from .pages import CitiesPage, SearchPage, HousingPage, HousingPage2, PhonePage
|
||||
|
||||
|
||||
class ExplorimmoBrowser(PagesBrowser):
|
||||
BASEURL = 'http://www.explorimmo.com'
|
||||
|
||||
cities = URL('rest/locations\?q=(?P<city>.*)', CitiesPage)
|
||||
search = URL('resultat/annonces.html\?(?P<query>.*)', SearchPage)
|
||||
housing_html = URL('annonce-(?P<_id>.*).html', HousingPage)
|
||||
phone = URL('rest/classifieds/(?P<_id>.*)/phone', PhonePage)
|
||||
housing = URL('rest/classifieds/(?P<_id>.*)',
|
||||
'rest/classifieds/\?(?P<js_datas>.*)', HousingPage2)
|
||||
|
||||
TYPES = {Query.TYPE_RENT: 'location',
|
||||
Query.TYPE_SALE: 'vente'}
|
||||
|
||||
RET = {Query.HOUSE_TYPES.HOUSE: 'Maison',
|
||||
Query.HOUSE_TYPES.APART: 'Appartement',
|
||||
Query.HOUSE_TYPES.LAND: 'Terrain',
|
||||
Query.HOUSE_TYPES.PARKING: 'Parking',
|
||||
Query.HOUSE_TYPES.OTHER: 'Divers'}
|
||||
|
||||
def get_cities(self, pattern):
|
||||
return self.cities.open(city=pattern).get_cities()
|
||||
|
||||
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
|
||||
|
||||
ret = []
|
||||
for house_type in house_types:
|
||||
if house_type in self.RET:
|
||||
ret.append(self.RET.get(house_type))
|
||||
|
||||
data = {'location': ','.join(cities),
|
||||
'areaMin': area_min or '',
|
||||
'areaMax': area_max or '',
|
||||
'priceMin': cost_min or '',
|
||||
'priceMax': cost_max or '',
|
||||
'transaction': self.TYPES.get(type, 'location'),
|
||||
'recherche': '',
|
||||
'mode': '',
|
||||
'proximity': '0',
|
||||
'roomMin': nb_rooms or '',
|
||||
'page': '1'
|
||||
}
|
||||
|
||||
params = '%s%s%s' % (urllib.urlencode(data), '&type=', '&type='.join(ret))
|
||||
|
||||
return self.search.go(query=params).iter_housings()
|
||||
|
||||
def get_housing(self, _id, housing=None):
|
||||
return self.housing.go(_id=_id).get_housing(obj=housing)
|
||||
|
||||
def get_phone(self, _id):
|
||||
return self.phone.go(_id=_id).get_phone()
|
||||
|
||||
def get_total_page(self, js_datas):
|
||||
return self.housing.open(js_datas=js_datas).get_total_page()
|
||||
77
modules/explorimmo/module.py
Normal file
77
modules/explorimmo/module.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2014 Bezleputh
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.backend import Module
|
||||
from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto
|
||||
|
||||
from .browser import ExplorimmoBrowser
|
||||
|
||||
|
||||
__all__ = ['ExplorimmoModule']
|
||||
|
||||
|
||||
class ExplorimmoModule(Module, CapHousing):
|
||||
NAME = 'explorimmo'
|
||||
DESCRIPTION = u'explorimmo website'
|
||||
MAINTAINER = u'Bezleputh'
|
||||
EMAIL = 'carton_ben@yahoo.fr'
|
||||
LICENSE = 'AGPLv3+'
|
||||
VERSION = '1.1'
|
||||
|
||||
BROWSER = ExplorimmoBrowser
|
||||
|
||||
def get_housing(self, housing):
|
||||
if isinstance(housing, Housing):
|
||||
id = housing.id
|
||||
else:
|
||||
id = housing
|
||||
housing = None
|
||||
housing = self.browser.get_housing(id, housing)
|
||||
housing.phone = self.browser.get_phone(id)
|
||||
return housing
|
||||
|
||||
def search_city(self, pattern):
|
||||
return self.browser.get_cities(pattern)
|
||||
|
||||
def search_housings(self, query):
|
||||
cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
|
||||
if len(cities) == 0:
|
||||
return list()
|
||||
|
||||
return self.browser.search_housings(query.type, cities, query.nb_rooms,
|
||||
query.area_min, query.area_max,
|
||||
query.cost_min, query.cost_max,
|
||||
query.house_types)
|
||||
|
||||
def fill_housing(self, housing, fields):
|
||||
self.browser.get_housing(housing.id, housing)
|
||||
if 'phone' in fields:
|
||||
housing.phone = self.browser.get_phone(housing.id)
|
||||
|
||||
return housing
|
||||
|
||||
def fill_photo(self, photo, fields):
|
||||
if 'data' in fields and photo.url and not photo.data:
|
||||
photo.data = self.browser.open(photo.url).content
|
||||
return photo
|
||||
|
||||
OBJECTS = {Housing: fill_housing,
|
||||
HousingPhoto: fill_photo,
|
||||
}
|
||||
175
modules/explorimmo/pages.py
Normal file
175
modules/explorimmo/pages.py
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2014 Bezleputh
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import re
|
||||
from decimal import Decimal
|
||||
from datetime import datetime
|
||||
from weboob.browser.filters.json import Dict
|
||||
from weboob.browser.elements import ItemElement, ListElement, method
|
||||
from weboob.browser.pages import JsonPage, HTMLPage, pagination
|
||||
from weboob.browser.filters.standard import CleanText, CleanDecimal, Regexp, Env, BrowserURL, Filter, Format
|
||||
from weboob.browser.filters.html import CleanHTML, XPath
|
||||
from weboob.capabilities.base import NotAvailable
|
||||
from weboob.capabilities.housing import Housing, HousingPhoto, City
|
||||
|
||||
|
||||
class DictElement(ListElement):
|
||||
def find_elements(self):
|
||||
for el in self.el[0].get(self.item_xpath):
|
||||
yield el
|
||||
|
||||
|
||||
class CitiesPage(JsonPage):
|
||||
@method
|
||||
class get_cities(DictElement):
|
||||
item_xpath = 'locations'
|
||||
|
||||
class item(ItemElement):
|
||||
klass = City
|
||||
|
||||
obj_id = Dict('label')
|
||||
obj_name = Dict('label')
|
||||
|
||||
|
||||
class SearchPage(HTMLPage):
|
||||
@pagination
|
||||
@method
|
||||
class iter_housings(ListElement):
|
||||
item_xpath = '//div[starts-with(@id, "bloc-vue-")]'
|
||||
|
||||
def next_page(self):
|
||||
js_datas = CleanText('//div[@id="js-data"]/@data-rest-search-request')(self)
|
||||
total_page = self.page.browser.get_total_page(js_datas.split('?')[-1])
|
||||
m = re.match(".*page=(\d?)(?:&.*)?", self.page.url)
|
||||
if m:
|
||||
current_page = int(m.group(1))
|
||||
next_page = current_page + 1
|
||||
if next_page <= total_page:
|
||||
return self.page.url.replace('page=%d' % current_page, 'page=%d' % next_page)
|
||||
|
||||
class item(ItemElement):
|
||||
klass = Housing
|
||||
|
||||
obj_id = CleanText('./@data-classified-id')
|
||||
obj_title = CleanText('./div/h2[@itemprop="name"]/a')
|
||||
obj_location = CleanText('./div/h2[@itemprop="name"]/span[class="item-localisation"]')
|
||||
obj_cost = CleanDecimal('./div/div/span[@class="price-label"]')
|
||||
obj_currency = Regexp(CleanText('./div/div/span[@class="price-label"]'),
|
||||
'.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')
|
||||
obj_text = CleanText('./div/div/div[@itemprop="description"]')
|
||||
obj_area = CleanDecimal(Regexp(CleanText('./div/h2[@itemprop="name"]/a'),
|
||||
'(.*?)(\d*) m2(.*?)', '\\2', default=None),
|
||||
default=NotAvailable)
|
||||
obj_phone = CleanText('./div/div/ul/li/span[@class="js-clickphone"]',
|
||||
replace=[(u'Téléphoner : ', u'')],
|
||||
default=NotAvailable)
|
||||
|
||||
def obj_photos(self):
|
||||
url = CleanText('./div/div/a/img[@itemprop="image"]/@src')(self)
|
||||
return [HousingPhoto(url)]
|
||||
|
||||
|
||||
class TypeDecimal(Filter):
|
||||
def filter(self, el):
|
||||
return Decimal(el)
|
||||
|
||||
|
||||
class FromTimestamp(Filter):
|
||||
def filter(self, el):
|
||||
return datetime.fromtimestamp(el / 1000.0)
|
||||
|
||||
|
||||
class PhonePage(JsonPage):
|
||||
def get_phone(self):
|
||||
return self.doc.get('phoneNumber')
|
||||
|
||||
|
||||
class HousingPage2(JsonPage):
|
||||
@method
|
||||
class get_housing(ItemElement):
|
||||
klass = Housing
|
||||
|
||||
obj_id = Env('_id')
|
||||
obj_title = Dict('characteristics/titleWithTransaction')
|
||||
obj_location = Format('%s %s %s', Dict('location/address'),
|
||||
Dict('location/postalCode'), Dict('location/cityLabel'))
|
||||
obj_cost = TypeDecimal(Dict('characteristics/price'))
|
||||
obj_currency = u'€'
|
||||
obj_text = CleanHTML(Dict('characteristics/description'))
|
||||
obj_url = BrowserURL('housing_html', _id=Env('_id'))
|
||||
obj_area = TypeDecimal(Dict('characteristics/area'))
|
||||
obj_date = FromTimestamp(Dict('characteristics/date'))
|
||||
|
||||
def obj_photos(self):
|
||||
photos = []
|
||||
for img in Dict('characteristics/images')(self):
|
||||
m = re.search('.*(http://photos.ubiflow.net.*)', img)
|
||||
if m:
|
||||
photos.append(HousingPhoto(m.group(1)))
|
||||
return photos
|
||||
|
||||
def obj_details(self):
|
||||
details = {}
|
||||
details['fees'] = Dict('characteristics/fees')(self)
|
||||
details['bedrooms'] = Dict('characteristics/bedroomCount')(self)
|
||||
details['energy'] = Dict('characteristics/energyConsumptionCategory')(self)
|
||||
details['rooms'] = Dict('characteristics/roomCount')(self)[0]
|
||||
details['available'] = Dict('characteristics/available')(self)
|
||||
return details
|
||||
|
||||
def get_total_page(self):
|
||||
return self.doc.get('pagination').get('total')
|
||||
|
||||
|
||||
class HousingPage(HTMLPage):
|
||||
@method
|
||||
class get_housing(ItemElement):
|
||||
klass = Housing
|
||||
|
||||
obj_id = Env('_id')
|
||||
obj_title = CleanText('//h1[@itemprop="name"]')
|
||||
obj_location = CleanText('//span[@class="informations-localisation"]')
|
||||
obj_cost = CleanDecimal('//span[@itemprop="price"]')
|
||||
obj_currency = Regexp(CleanText('//span[@itemprop="price"]'),
|
||||
'.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')
|
||||
obj_text = CleanHTML('//div[@itemprop="description"]')
|
||||
obj_url = BrowserURL('housing', _id=Env('_id'))
|
||||
obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'),
|
||||
'(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable)
|
||||
|
||||
def obj_photos(self):
|
||||
photos = []
|
||||
for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self):
|
||||
url = Regexp(CleanText('./@src'), '.*(http://photos.ubiflow.net.*)')(img)
|
||||
photos.append(HousingPhoto(url))
|
||||
return photos
|
||||
|
||||
def obj_details(self):
|
||||
details = dict()
|
||||
for item in XPath('//div[@class="features clearfix"]/ul/li')(self):
|
||||
key = CleanText('./span[@class="name"]')(item)
|
||||
value = CleanText('./span[@class="value"]')(item)
|
||||
if value and key:
|
||||
details[key] = value
|
||||
|
||||
key = CleanText('//div[@class="title-dpe clearfix"]')(self)
|
||||
value = CleanText('//div[@class="energy-consumption"]')(self)
|
||||
if value and key:
|
||||
details[key] = value
|
||||
return details
|
||||
40
modules/explorimmo/test.py
Normal file
40
modules/explorimmo/test.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2014 Bezleputh
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import itertools
|
||||
from weboob.capabilities.housing import Query
|
||||
from weboob.tools.test import BackendTest
|
||||
|
||||
|
||||
class ExplorimmoTest(BackendTest):
|
||||
MODULE = 'explorimmo'
|
||||
|
||||
def test_explorimmo(self):
|
||||
query = Query()
|
||||
query.area_min = 20
|
||||
query.cost_max = 900
|
||||
query.cities = []
|
||||
for city in self.backend.search_city('paris'):
|
||||
city.backend = self.backend.name
|
||||
query.cities.append(city)
|
||||
|
||||
results = list(itertools.islice(self.backend.search_housings(query), 0, 20))
|
||||
self.assertTrue(len(results) > 0)
|
||||
|
||||
self.backend.fillobj(results[0], 'phone')
|
||||
Loading…
Add table
Add a link
Reference in a new issue