From 0359bb4a3da4f6a6a5e1229c679a22bc2aaca393 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Thu, 30 Oct 2014 15:38:07 +0100 Subject: [PATCH] [logicimmo] New module logicimmo --- modules/logicimmo/__init__.py | 24 +++++++ modules/logicimmo/browser.py | 84 +++++++++++++++++++++++ modules/logicimmo/module.py | 90 +++++++++++++++++++++++++ modules/logicimmo/pages.py | 121 ++++++++++++++++++++++++++++++++++ modules/logicimmo/test.py | 42 ++++++++++++ 5 files changed, 361 insertions(+) create mode 100644 modules/logicimmo/__init__.py create mode 100644 modules/logicimmo/browser.py create mode 100644 modules/logicimmo/module.py create mode 100644 modules/logicimmo/pages.py create mode 100644 modules/logicimmo/test.py diff --git a/modules/logicimmo/__init__.py b/modules/logicimmo/__init__.py new file mode 100644 index 00000000..71c4747a --- /dev/null +++ b/modules/logicimmo/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .module import LogicimmoModule + + +__all__ = ['LogicimmoModule'] diff --git a/modules/logicimmo/browser.py b/modules/logicimmo/browser.py new file mode 100644 index 00000000..2a616cf6 --- /dev/null +++ b/modules/logicimmo/browser.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.browser import PagesBrowser, URL +from weboob.capabilities.housing import Query +from .pages import CitiesPage, SearchPage, HousingPage, PhonePage + + +class LogicimmoBrowser(PagesBrowser): + BASEURL = 'http://www.logic-immo.com' + + city = URL('asset/t9/t9_district/fr/(?P\d*)/(?P\w)/(?P.*)\.txt\?json=%22(?P.*)%22', + CitiesPage) + search = URL('(?Plocation|vente)-immobilier-(?P.*)/options/(?P.*)', SearchPage) + housing = URL('detail-(?P<_id>.*).htm', HousingPage) + phone = URL('(?P.*)', PhonePage) + + TYPES = {Query.TYPE_RENT: 'location', + Query.TYPE_SALE: 'vente'} + + RET = {Query.HOUSE_TYPES.HOUSE: '2', + Query.HOUSE_TYPES.APART: '1', + Query.HOUSE_TYPES.LAND: '3', + Query.HOUSE_TYPES.PARKING: '10', + Query.HOUSE_TYPES.OTHER: '14'} + + def get_cities(self, pattern): + if pattern: + size = len(pattern) + first_letter = pattern[0].upper() + return self.city.go(size=size, first_letter=first_letter, pattern=pattern.upper(), + pattern2=pattern.upper()).get_cities() + + def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): + options = [] + + ret = [] + for house_type in house_types: + if house_type in self.RET: + ret.append(self.RET.get(house_type)) + + if len(ret): + options.append('groupprptypesids=%s' % ','.join(ret)) + + options.append('pricemin=%s' % (cost_min if cost_min else '0')) + + if cost_max: + options.append('pricemax=%s' % cost_max) + + options.append('areamin=%s' % (area_min if area_min else '0')) + + if area_max: + options.append('areamax=%s' % area_max) + + if nb_rooms: + options.append('nbrooms=%s' % nb_rooms) + + return self.search.go(type=self.TYPES.get(type, 'location'), + cities=cities, + options='/'.join(options)).iter_housings() + + def get_housing(self, _id, housing=None): + return self.housing.go(_id=_id).get_housing(obj=housing) + + def get_phone(self, _id): + urlcontact, params = self.housing.stay_or_go(_id=_id).get_phone_url_datas() + return self.phone.go(urlcontact=urlcontact, params=params).get_phone() diff --git a/modules/logicimmo/module.py b/modules/logicimmo/module.py new file mode 100644 index 00000000..e2582084 --- /dev/null +++ b/modules/logicimmo/module.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import Module +from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto +from weboob.capabilities.base import UserError +from .browser import LogicimmoBrowser + + +__all__ = ['LogicimmoModule'] + + +class LogicImmoCitiesError(UserError): + """ + Raised when more than 3 cities are selected + """ + def __init__(self, msg='You cannot select more than three cities'): + UserError.__init__(self, msg) + + +class LogicimmoModule(Module, CapHousing): + NAME = 'logicimmo' + DESCRIPTION = u'logicimmo website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '1.1' + + BROWSER = LogicimmoBrowser + + def get_housing(self, housing): + if isinstance(housing, Housing): + id = housing.id + else: + id = housing + housing = None + housing = self.browser.get_housing(id, housing) + housing.phone = self.browser.get_phone(id) + return housing + + def search_city(self, pattern): + return self.browser.get_cities(pattern) + + def search_housings(self, query): + cities_names = ['%s' % c.name.replace(' ', '-') for c in query.cities if c.backend == self.name] + cities_ids = ['%s' % c.id for c in query.cities if c.backend == self.name] + + if len(cities_names) == 0: + return list() + + if len(cities_names) > 3: + raise LogicImmoCitiesError() + + cities = ','.join(cities_names + cities_ids) + return self.browser.search_housings(query.type, cities.lower(), query.nb_rooms, + query.area_min, query.area_max, + query.cost_min, query.cost_max, + query.house_types) + + def fill_housing(self, housing, fields): + self.browser.get_housing(housing.id, housing) + if 'phone' in fields: + housing.phone = self.browser.get_phone(housing.id) + return housing + + def fill_photo(self, photo, fields): + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.open(photo.url).content + return photo + + OBJECTS = {Housing: fill_housing, + HousingPhoto: fill_photo, + } diff --git a/modules/logicimmo/pages.py b/modules/logicimmo/pages.py new file mode 100644 index 00000000..2fb09a83 --- /dev/null +++ b/modules/logicimmo/pages.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from decimal import Decimal + +from weboob.browser.pages import HTMLPage, JsonPage +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.browser.filters.json import Dict +from weboob.browser.filters.standard import Format, CleanText, Regexp, CleanDecimal, Date, Env, BrowserURL +from weboob.browser.filters.html import XPath +from weboob.capabilities.housing import Housing, HousingPhoto, City +from weboob.capabilities.base import NotAvailable + + +class DictElement(ListElement): + def find_elements(self): + for el in self.el: + yield el + + +class CitiesPage(JsonPage): + @method + class get_cities(DictElement): + item_xpath = '' + + class item(ItemElement): + klass = City + + obj_id = Format('%s_%s', Dict('lct_id'), Dict('lct_level')) + obj_name = Format('%s %s', Dict('lct_name'), Dict('lct_post_code')) + + +class PhonePage(HTMLPage): + def get_phone(self): + return CleanText('//div[has-class("phone")]', childs=False)(self.doc) + + +class HousingPage(HTMLPage): + @method + class get_housing(ItemElement): + klass = Housing + + obj_id = Env('_id') + obj_title = CleanText('//meta[@itemprop="name"]/@content') + obj_area = CleanDecimal(Regexp(CleanText('//meta[@itemprop="name"]/@content'), + '(.*?)(\d*) m\xb2(.*?)', '\\2'), default=NotAvailable) + obj_cost = CleanDecimal('//span[@itemprop="price"]') + obj_currency = Regexp(CleanText('//span[@itemprop="price"]'), + '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') + obj_date = Date(Regexp(CleanText('//p[@class="size_11 darkergrey"]'), u'.* Mis à jour : (\d{2}/\d{2}/\d{4}).*')) + obj_text = CleanText('//div[@class="columns offer-description alpha"]') + obj_location = CleanText('//span[@itemprop="address"]') + obj_url = BrowserURL('housing', _id=Env('_id')) + + def obj_photos(self): + photos = [] + for img in XPath('//div[@class="carousel"]/ul/li/a/img/@src')(self): + photos.append(HousingPhoto(u'%s' % img)) + return photos + + def obj_details(self): + details = {} + a = CleanText('//div[@class="box box-noborder"]/p[@class="size_13 darkergrey bold"]')(self) + if a: + splitted_a = a.split(':') + dpe = Regexp(CleanText('//div[@id="energy-pyramid"]/img/@src'), + 'http://mmf.logic-immo.com/mmf/fr/static/dpe/dpe_(\w)_b.gif', + '(\\1)', default="")(self) + details[splitted_a[0]] = '%s %s' % (splitted_a[1], dpe) + return details + + def get_phone_url_datas(self): + a = XPath('//a[has-class("phone-link")]')(self.doc)[0] + urlcontact = CleanText('./@data-urlcontact')(a) + params = {} + params['univers'] = CleanText('./@data-univers')(a) + params['pushcontact'] = CleanText('./@data-pushcontact')(a) + params['mapper'] = CleanText('./@data-mapper')(a) + params['offerid'] = CleanText('./@data-offerid')(a) + params['offerflag'] = CleanText('./@data-offerflag')(a) + params['campaign'] = CleanText('./@data-campaign')(a) + params['xtpage'] = CleanText('./@data-xtpage')(a) + return urlcontact, params + + +class SearchPage(HTMLPage): + @method + class iter_housings(ListElement): + item_xpath = '//article' + + class item(ItemElement): + klass = Housing + + obj_id = Format('%s-%s', Env('type'), CleanText('./div/header/@id', replace=[('header-offer-', '')])) + obj_title = CleanText('./div/header/section/p[@class="property-type"]/span/@title') + obj_area = CleanDecimal(Regexp(CleanText('./div/header/section/p[@class="property-type"]/span/@title'), + '(.*?)(\d*) m\xb2(.*?)', '\\2'), default=NotAvailable) + obj_cost = CleanDecimal(CleanText('./div/header/section/p[@class="price"]'), + replace_dots=(',', '.'), default=Decimal(0)) + obj_currency = Regexp(CleanText('./div/header/section/p[@class="price"]'), + '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') + obj_date = Date(Regexp(CleanText('./div/header/section/p[has-class("update-date")]'), + ".*(\d{2}/\d{2}/\d{4}).*")) + obj_text = CleanText('./div/div[@class="content-offer"]/section[has-class("content-desc")]/p/span[@intemprop="adress"]') + obj_location = CleanText('./div/div[@class="content-offer"]/section[has-class("content-desc")]/p/span[not(@intemprop)]') diff --git a/modules/logicimmo/test.py b/modules/logicimmo/test.py new file mode 100644 index 00000000..872f1ce1 --- /dev/null +++ b/modules/logicimmo/test.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.housing import Query +from weboob.tools.test import BackendTest + + +class LogicimmoTest(BackendTest): + MODULE = 'logicimmo' + + def test_logicimmo(self): + query = Query() + query.area_min = 20 + query.cost_max = 900 + query.cities = [] + for city in self.backend.search_city('paris'): + if len(query.cities) >= 3: + break + + city.backend = self.backend.name + query.cities.append(city) + + results = list(self.backend.search_housings(query)) + + self.assertTrue(len(results) > 0) + self.backend.fillobj(results[0], 'phone')