diff --git a/modules/explorimmo/__init__.py b/modules/explorimmo/__init__.py new file mode 100644 index 00000000..1ea252a5 --- /dev/null +++ b/modules/explorimmo/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .module import ExplorimmoModule + + +__all__ = ['ExplorimmoModule'] diff --git a/modules/explorimmo/browser.py b/modules/explorimmo/browser.py new file mode 100644 index 00000000..78dc226a --- /dev/null +++ b/modules/explorimmo/browser.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import urllib +from weboob.browser import PagesBrowser, URL +from weboob.capabilities.housing import Query +from .pages import CitiesPage, SearchPage, HousingPage, HousingPage2, PhonePage + + +class ExplorimmoBrowser(PagesBrowser): + BASEURL = 'http://www.explorimmo.com' + + cities = URL('rest/locations\?q=(?P.*)', CitiesPage) + search = URL('resultat/annonces.html\?(?P.*)', SearchPage) + housing_html = URL('annonce-(?P<_id>.*).html', HousingPage) + phone = URL('rest/classifieds/(?P<_id>.*)/phone', PhonePage) + housing = URL('rest/classifieds/(?P<_id>.*)', + 'rest/classifieds/\?(?P.*)', HousingPage2) + + TYPES = {Query.TYPE_RENT: 'location', + Query.TYPE_SALE: 'vente'} + + RET = {Query.HOUSE_TYPES.HOUSE: 'Maison', + Query.HOUSE_TYPES.APART: 'Appartement', + Query.HOUSE_TYPES.LAND: 'Terrain', + Query.HOUSE_TYPES.PARKING: 'Parking', + Query.HOUSE_TYPES.OTHER: 'Divers'} + + def get_cities(self, pattern): + return self.cities.open(city=pattern).get_cities() + + def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): + + ret = [] + for house_type in house_types: + if house_type in self.RET: + ret.append(self.RET.get(house_type)) + + data = {'location': ','.join(cities), + 'areaMin': area_min or '', + 'areaMax': area_max or '', + 'priceMin': cost_min or '', + 'priceMax': cost_max or '', + 'transaction': self.TYPES.get(type, 'location'), + 'recherche': '', + 'mode': '', + 'proximity': '0', + 'roomMin': nb_rooms or '', + 'page': '1' + } + + params = '%s%s%s' % (urllib.urlencode(data), '&type=', '&type='.join(ret)) + + return self.search.go(query=params).iter_housings() + + def get_housing(self, _id, housing=None): + return self.housing.go(_id=_id).get_housing(obj=housing) + + def get_phone(self, _id): + return self.phone.go(_id=_id).get_phone() + + def get_total_page(self, js_datas): + return self.housing.open(js_datas=js_datas).get_total_page() diff --git a/modules/explorimmo/module.py b/modules/explorimmo/module.py new file mode 100644 index 00000000..ea76c578 --- /dev/null +++ b/modules/explorimmo/module.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import Module +from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto + +from .browser import ExplorimmoBrowser + + +__all__ = ['ExplorimmoModule'] + + +class ExplorimmoModule(Module, CapHousing): + NAME = 'explorimmo' + DESCRIPTION = u'explorimmo website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '1.1' + + BROWSER = ExplorimmoBrowser + + def get_housing(self, housing): + if isinstance(housing, Housing): + id = housing.id + else: + id = housing + housing = None + housing = self.browser.get_housing(id, housing) + housing.phone = self.browser.get_phone(id) + return housing + + def search_city(self, pattern): + return self.browser.get_cities(pattern) + + def search_housings(self, query): + cities = ['%s' % c.id for c in query.cities if c.backend == self.name] + if len(cities) == 0: + return list() + + return self.browser.search_housings(query.type, cities, query.nb_rooms, + query.area_min, query.area_max, + query.cost_min, query.cost_max, + query.house_types) + + def fill_housing(self, housing, fields): + self.browser.get_housing(housing.id, housing) + if 'phone' in fields: + housing.phone = self.browser.get_phone(housing.id) + + return housing + + def fill_photo(self, photo, fields): + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.open(photo.url).content + return photo + + OBJECTS = {Housing: fill_housing, + HousingPhoto: fill_photo, + } diff --git a/modules/explorimmo/pages.py b/modules/explorimmo/pages.py new file mode 100644 index 00000000..08c024af --- /dev/null +++ b/modules/explorimmo/pages.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import re +from decimal import Decimal +from datetime import datetime +from weboob.browser.filters.json import Dict +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.browser.pages import JsonPage, HTMLPage, pagination +from weboob.browser.filters.standard import CleanText, CleanDecimal, Regexp, Env, BrowserURL, Filter, Format +from weboob.browser.filters.html import CleanHTML, XPath +from weboob.capabilities.base import NotAvailable +from weboob.capabilities.housing import Housing, HousingPhoto, City + + +class DictElement(ListElement): + def find_elements(self): + for el in self.el[0].get(self.item_xpath): + yield el + + +class CitiesPage(JsonPage): + @method + class get_cities(DictElement): + item_xpath = 'locations' + + class item(ItemElement): + klass = City + + obj_id = Dict('label') + obj_name = Dict('label') + + +class SearchPage(HTMLPage): + @pagination + @method + class iter_housings(ListElement): + item_xpath = '//div[starts-with(@id, "bloc-vue-")]' + + def next_page(self): + js_datas = CleanText('//div[@id="js-data"]/@data-rest-search-request')(self) + total_page = self.page.browser.get_total_page(js_datas.split('?')[-1]) + m = re.match(".*page=(\d?)(?:&.*)?", self.page.url) + if m: + current_page = int(m.group(1)) + next_page = current_page + 1 + if next_page <= total_page: + return self.page.url.replace('page=%d' % current_page, 'page=%d' % next_page) + + class item(ItemElement): + klass = Housing + + obj_id = CleanText('./@data-classified-id') + obj_title = CleanText('./div/h2[@itemprop="name"]/a') + obj_location = CleanText('./div/h2[@itemprop="name"]/span[class="item-localisation"]') + obj_cost = CleanDecimal('./div/div/span[@class="price-label"]') + obj_currency = Regexp(CleanText('./div/div/span[@class="price-label"]'), + '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') + obj_text = CleanText('./div/div/div[@itemprop="description"]') + obj_area = CleanDecimal(Regexp(CleanText('./div/h2[@itemprop="name"]/a'), + '(.*?)(\d*) m2(.*?)', '\\2', default=None), + default=NotAvailable) + obj_phone = CleanText('./div/div/ul/li/span[@class="js-clickphone"]', + replace=[(u'Téléphoner : ', u'')], + default=NotAvailable) + + def obj_photos(self): + url = CleanText('./div/div/a/img[@itemprop="image"]/@src')(self) + return [HousingPhoto(url)] + + +class TypeDecimal(Filter): + def filter(self, el): + return Decimal(el) + + +class FromTimestamp(Filter): + def filter(self, el): + return datetime.fromtimestamp(el / 1000.0) + + +class PhonePage(JsonPage): + def get_phone(self): + return self.doc.get('phoneNumber') + + +class HousingPage2(JsonPage): + @method + class get_housing(ItemElement): + klass = Housing + + obj_id = Env('_id') + obj_title = Dict('characteristics/titleWithTransaction') + obj_location = Format('%s %s %s', Dict('location/address'), + Dict('location/postalCode'), Dict('location/cityLabel')) + obj_cost = TypeDecimal(Dict('characteristics/price')) + obj_currency = u'€' + obj_text = CleanHTML(Dict('characteristics/description')) + obj_url = BrowserURL('housing_html', _id=Env('_id')) + obj_area = TypeDecimal(Dict('characteristics/area')) + obj_date = FromTimestamp(Dict('characteristics/date')) + + def obj_photos(self): + photos = [] + for img in Dict('characteristics/images')(self): + m = re.search('.*(http://photos.ubiflow.net.*)', img) + if m: + photos.append(HousingPhoto(m.group(1))) + return photos + + def obj_details(self): + details = {} + details['fees'] = Dict('characteristics/fees')(self) + details['bedrooms'] = Dict('characteristics/bedroomCount')(self) + details['energy'] = Dict('characteristics/energyConsumptionCategory')(self) + details['rooms'] = Dict('characteristics/roomCount')(self)[0] + details['available'] = Dict('characteristics/available')(self) + return details + + def get_total_page(self): + return self.doc.get('pagination').get('total') + + +class HousingPage(HTMLPage): + @method + class get_housing(ItemElement): + klass = Housing + + obj_id = Env('_id') + obj_title = CleanText('//h1[@itemprop="name"]') + obj_location = CleanText('//span[@class="informations-localisation"]') + obj_cost = CleanDecimal('//span[@itemprop="price"]') + obj_currency = Regexp(CleanText('//span[@itemprop="price"]'), + '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') + obj_text = CleanHTML('//div[@itemprop="description"]') + obj_url = BrowserURL('housing', _id=Env('_id')) + obj_area = CleanDecimal(Regexp(CleanText('//h1[@itemprop="name"]'), + '(.*?)(\d*) m2(.*?)', '\\2'), default=NotAvailable) + + def obj_photos(self): + photos = [] + for img in XPath('//a[@class="thumbnail-link"]/img[@itemprop="image"]')(self): + url = Regexp(CleanText('./@src'), '.*(http://photos.ubiflow.net.*)')(img) + photos.append(HousingPhoto(url)) + return photos + + def obj_details(self): + details = dict() + for item in XPath('//div[@class="features clearfix"]/ul/li')(self): + key = CleanText('./span[@class="name"]')(item) + value = CleanText('./span[@class="value"]')(item) + if value and key: + details[key] = value + + key = CleanText('//div[@class="title-dpe clearfix"]')(self) + value = CleanText('//div[@class="energy-consumption"]')(self) + if value and key: + details[key] = value + return details diff --git a/modules/explorimmo/test.py b/modules/explorimmo/test.py new file mode 100644 index 00000000..032c6c65 --- /dev/null +++ b/modules/explorimmo/test.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import itertools +from weboob.capabilities.housing import Query +from weboob.tools.test import BackendTest + + +class ExplorimmoTest(BackendTest): + MODULE = 'explorimmo' + + def test_explorimmo(self): + query = Query() + query.area_min = 20 + query.cost_max = 900 + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + + results = list(itertools.islice(self.backend.search_housings(query), 0, 20)) + self.assertTrue(len(results) > 0) + + self.backend.fillobj(results[0], 'phone')