From 66eb02902a1bd5797c82e5321397876422c58851 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Mon, 18 May 2015 21:26:55 +0200 Subject: [PATCH] [entreparticuliers] new module entreparticuliers.com --- modules/entreparticuliers/__init__.py | 24 ++++++ modules/entreparticuliers/browser.py | 118 ++++++++++++++++++++++++++ modules/entreparticuliers/module.py | 64 ++++++++++++++ modules/entreparticuliers/pages.py | 99 +++++++++++++++++++++ modules/entreparticuliers/test.py | 40 +++++++++ 5 files changed, 345 insertions(+) create mode 100644 modules/entreparticuliers/__init__.py create mode 100644 modules/entreparticuliers/browser.py create mode 100644 modules/entreparticuliers/module.py create mode 100644 modules/entreparticuliers/pages.py create mode 100644 modules/entreparticuliers/test.py diff --git a/modules/entreparticuliers/__init__.py b/modules/entreparticuliers/__init__.py new file mode 100644 index 00000000..638821c3 --- /dev/null +++ b/modules/entreparticuliers/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .module import EntreparticuliersModule + + +__all__ = ['EntreparticuliersModule'] diff --git a/modules/entreparticuliers/browser.py b/modules/entreparticuliers/browser.py new file mode 100644 index 00000000..88baee86 --- /dev/null +++ b/modules/entreparticuliers/browser.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.json import json +from weboob.capabilities.housing import Query +from weboob.browser import PagesBrowser, URL + +from .pages import CitiesPage, SearchPage, HousingPage + + +class EntreparticuliersBrowser(PagesBrowser): + BASEURL = 'http://www.entreparticuliers.com' + + cities = URL('/HTTPHandlers/LocalisationsAutocompleteHandler.ashx\?q=(?P.*)', CitiesPage) + search = URL('/Default.aspx/CreateSearchParams') + form_item = URL('/Default.aspx/GetElementsMoteur') + search_result = URL('/annonces-immobilieres/vente/resultats-de-recherche-ergo', SearchPage) + housing = URL('/(?P<_id>.*).html', HousingPage) + + def search_city(self, pattern): + return self.cities.open(pattern=pattern).iter_cities() + + TYPES = {Query.TYPE_RENT: '1', + Query.TYPE_SALE: '4' + } + + RET = {Query.TYPE_RENT: {Query.HOUSE_TYPES.HOUSE: '2', + Query.HOUSE_TYPES.APART: '1', + Query.HOUSE_TYPES.LAND: '', + Query.HOUSE_TYPES.PARKING: '4', + Query.HOUSE_TYPES.OTHER: '6'}, + Query.TYPE_SALE: {Query.HOUSE_TYPES.HOUSE: '2', + Query.HOUSE_TYPES.APART: '1', + Query.HOUSE_TYPES.LAND: '5', + Query.HOUSE_TYPES.PARKING: '6', + Query.HOUSE_TYPES.OTHER: '9'} + } + + def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): + referer = "http://www.entreparticuliers.com/annonces-immobilieres/vente/resultats-de-recherche-ergo" + self.session.headers.update({"X-Requested-With": "XMLHttpRequest", + "Referer": referer, + "Content-Type": "application/json; charset=utf-8", + "Accept": "application/json, text/javascript, */*; q=0.01"}) + + result = self.form_item.open(data="{'rubrique': '%s'}" % self.TYPES.get(type)) + biens = json.loads(json.loads(result.content)['d']) + + for house_type in house_types: + id_type = self.RET[type].get(house_type, '1') + + data = {} + data['rubrique'] = self.TYPES.get(type) + data['ach_id'] = None + data['FromMoteur'] = "true" + + for bien in biens: + if bien['Idchoix'] == int(id_type): + data['lstSSTbien'] = bien['SsTypebien'] + data['lstTbien'] = bien['TypeBien'] + data['caracteristique'] = bien['Idchoix'] + + data['OrigineAlerte'] = "SaveSearchMoteurHome" + data['pays'] = "fra" + data['prix_min'] = cost_min if cost_min and cost_min > 0 else None + data['prix_max'] = cost_max if cost_max and cost_max > 0 else None + data['lstThemes'] = "" + + min_rooms = nb_rooms if nb_rooms and nb_rooms > 1 else 1 + max_rooms = 5 + if min_rooms == 1 and max_rooms == 5: + data['lstNbPieces'] = 0 + else: + data['lstNbPieces'] = ','.join('%s' % n for n in range(min_rooms, max_rooms + 1)) + + data['lstNbChambres'] = None + data['surface_min'] = area_min if area_min else None + # var modes = { "all": -1, "ville": 5, "region": 2, "departement": 4, "pays": 1, "regionUsuelle": 3 }; + data['localisationType'] = 5 + data['reference'] = '' + data['nbpiecesMin'] = min_rooms + data['nbpiecesMax'] = max_rooms + data['rayon'] = 0 + data['localisation_id_rayon'] = None + data['lstLocalisationId'] = ','.join(cities) + data['photos'] = 0 + data['colocation'] = '' + data['meuble'] = '' + data['pageNumber'] = 1 + data['order_by'] = 5 + data['sort_order'] = 1 + data['top'] = 25 + data['SaveSearch'] = "false" + data['EmailUser'] = "" + data['GSMUser'] = "" + + self.search.go(data="{'p_SearchParams':'%s'}" % json.dumps(data)) + for item in self.search_result.go().iter_housings(): + yield item + + def get_housing(self, _id, obj=None): + return self.housing.go(_id=_id).get_housing(obj=obj) diff --git a/modules/entreparticuliers/module.py b/modules/entreparticuliers/module.py new file mode 100644 index 00000000..06630b04 --- /dev/null +++ b/modules/entreparticuliers/module.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import Module +from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto + +from .browser import EntreparticuliersBrowser + + +__all__ = ['EntreparticuliersModule'] + + +class EntreparticuliersModule(Module, CapHousing): + NAME = 'entreparticuliers' + DESCRIPTION = u'entreparticuliers.com website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '1.1' + + BROWSER = EntreparticuliersBrowser + + def search_city(self, pattern): + return self.browser.search_city(pattern) + + def search_housings(self, query): + cities = [c.id for c in query.cities if c.backend == self.name] + if len(cities) == 0: + return list([]) + + return self.browser.search_housings(query.type, cities, query.nb_rooms, + query.area_min, query.area_max, + query.cost_min, query.cost_max, + query.house_types) + + def get_housing(self, _id): + return self.browser.get_housing(_id) + + def fill_housing(self, housing, fields): + return self.browser.get_housing(housing.id, housing) + + def fill_photo(self, photo, fields): + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.open(photo.url).content + return photo + + OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo} diff --git a/modules/entreparticuliers/pages.py b/modules/entreparticuliers/pages.py new file mode 100644 index 00000000..785bb9e1 --- /dev/null +++ b/modules/entreparticuliers/pages.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from decimal import Decimal +from datetime import datetime + +from weboob.browser.pages import JsonPage, HTMLPage +from weboob.browser.elements import ItemElement, ListElement, DictElement, method +from weboob.browser.filters.json import Dict +from weboob.browser.filters.html import CleanHTML +from weboob.browser.filters.standard import CleanText, CleanDecimal, Regexp, Env, BrowserURL +from weboob.capabilities.housing import Housing, HousingPhoto, City + + +class CitiesPage(JsonPage): + @method + class iter_cities(DictElement): + class item(ItemElement): + klass = City + + def condition(self): + return Dict('id', default=None)(self) + + obj_id = Dict('id') + obj_name = Dict('libelle') + + +class SearchPage(HTMLPage): + @method + class iter_housings(ListElement): + item_xpath = '//ul[@class="results rview"]/li' + + class item(ItemElement): + klass = Housing + + def condition(self): + return CleanText('./div/span[@class="infos"]/a[@class="titre"]/@href')(self) + + obj_id = Regexp(CleanText('./div/span[@class="infos"]/a[@class="titre"]/@href'), + '/(.*).html') + obj_title = CleanText('./div/span[@class="infos"]/a[@class="titre"]') + obj_cost = CleanDecimal(Regexp(CleanText('./div/span[@class="infos"]/span[@id="prix"]'), + '(.*) [%s%s%s].*' % (u'€', u'$', u'£'), + default=''), + replace_dots=(',', '.'), + default=Decimal(0)) + obj_currency = Regexp(CleanText('./div/span[@class="infos"]/span[@id="prix"]'), + '.*([%s%s%s]).*' % (u'€', u'$', u'£'), default=u'€') + obj_text = CleanText('./div/span[@class="infos"]') + obj_date = datetime.now + + +class HousingPage(HTMLPage): + @method + class get_housing(ItemElement): + klass = Housing + + obj_id = Env('_id') + obj_title = CleanText('//section[class="wctrl"]/h1') + obj_cost = CleanDecimal('//span[@class="i prix"]', replace_dots=(',', '.'), default=Decimal(0)) + + obj_currency = Regexp(CleanText('//span[@class="i prix"]'), + '.*([%s%s%s])' % (u'€', u'$', u'£'), default='') + obj_text = CleanHTML('//article[@class="bloc description"]/p') + obj_location = CleanText('//span[@class="i ville"]') + obj_area = CleanDecimal(Regexp(CleanText('//span[@class="i"]'), '.*/(.*) m.*')) + obj_url = BrowserURL('housing', _id=Env('_id')) + obj_phone = CleanText('//input[@id="hftel"]/@value') + obj_date = datetime.now + + def obj_details(self): + details = {} + for detail in self.el.xpath('//span[@class="i small"]'): + item = detail.text.split(':') + details[item[0]] = item[1] + return details + + def obj_photos(self): + photos = [] + for img in self.el.xpath('//ul[@id="ulPhotos"]/li/img/@src'): + url = 'http://www.entreparticuliers.com/%s' % img + photos.append(HousingPhoto(url)) + return photos diff --git a/modules/entreparticuliers/test.py b/modules/entreparticuliers/test.py new file mode 100644 index 00000000..6f64d0df --- /dev/null +++ b/modules/entreparticuliers/test.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import itertools +from weboob.tools.test import BackendTest +from weboob.capabilities.housing import Query + + +class EntreparticuliersTest(BackendTest): + MODULE = 'entreparticuliers' + + def test_entreparticuliers(self): + query = Query() + query.cities = [] + for city in self.backend.search_city('lille'): + city.backend = self.backend.name + query.cities.append(city) + + query.type = Query.TYPE_SALE + results = list(itertools.islice(self.backend.search_housings(query), 0, 20)) + self.assertTrue(len(results) > 0) + + obj = self.backend.fillobj(results[0]) + self.assertTrue(obj.area is not None, 'Area for "%s"' % (obj.id))