diff --git a/modules/seloger/__init__.py b/modules/seloger/__init__.py new file mode 100644 index 00000000..10540bdb --- /dev/null +++ b/modules/seloger/__init__.py @@ -0,0 +1,3 @@ +from .backend import SeLogerBackend + +__all__ = ['SeLogerBackend'] diff --git a/modules/seloger/backend.py b/modules/seloger/backend.py new file mode 100644 index 00000000..30de0f79 --- /dev/null +++ b/modules/seloger/backend.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.housing import ICapHousing, City, Housing +from weboob.tools.backend import BaseBackend + +from .browser import SeLogerBrowser + + +__all__ = ['SeLogerBackend'] + + +class SeLogerBackend(BaseBackend, ICapHousing): + NAME = 'seloger' + MAINTAINER = u'Romain Bignon' + EMAIL = 'romain@weboob.org' + VERSION = '0.b' + DESCRIPTION = 'French housing website' + LICENSE = 'AGPLv3+' + ICON = 'http://static.poliris.com/z/portail/svx/portals/sv6_gen/favicon.png' + BROWSER = SeLogerBrowser + + def search_housings(self, query): + cities = [c.id for c in query.cities if c.backend == self.name] + with self.browser: + for housing in self.browser.search_housings(cities, + query.area_min, query.area_max, + query.cost_min, query.cost_max): + yield housing + + def get_housing(self, housing): + if isinstance(housing, Housing): + id = housing.id + else: + id = housing + housing = None + + with self.browser: + return self.browser.get_housing(id, housing) + + def search_city(self, pattern): + with self.browser: + for categories in self.browser.search_geo(pattern): + if categories['label'] != 'Villes': + continue + for city in categories['values']: + if not 'value' in city: + continue + c = City(city['value']) + c.name = city['label'] + yield c + + def fill_housing(self, housing, fields): + with self.browser: + return self.browser.get_housing(housing.id) + + OBJECTS = {Housing: fill_housing, + } diff --git a/modules/seloger/browser.py b/modules/seloger/browser.py new file mode 100644 index 00000000..af371462 --- /dev/null +++ b/modules/seloger/browser.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import json + +from weboob.tools.browser import BaseBrowser + +from .pages import SearchResultsPage, HousingPage + + +__all__ = ['SeLogerBrowser'] + + +class SeLogerBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'www.seloger.com' + ENCODING = 'utf-8' + PAGES = { + 'http://www.seloger.com/(pre)?recherche.htm.*': SearchResultsPage, + 'http://www.seloger.com/annonces.htm.*': SearchResultsPage, + 'http://www.seloger.com/annonces/.*': HousingPage, + } + + def search_geo(self, pattern): + fp = self.openurl(self.buildurl('http://www.seloger.com/js,ajax,villequery_v3.htm', ville=pattern, mode=1)) + return json.load(fp) + + def search_housings(self, cities, area_min, area_max, cost_min, cost_max): + data = {'ci': ','.join(cities), + 'idtt': 1, #location + 'idtypebien': 1, #appart + 'org': 'advanced_search', + 'px_loyermax': cost_max or '', + 'px_loyermin': cost_min or '', + 'surfacemax': area_max or '', + 'surfacemin': area_min or '', + } + + self.location(self.buildurl('/prerecherche.htm', **data)) + assert self.is_on_page(SearchResultsPage) + + return self.page.iter_housings() + + def get_housing(self, id, obj=None): + self.location('/%d/detail_new.htm' % int(id)) + + assert self.is_on_page(HousingPage) + return self.page.get_housing(obj) diff --git a/modules/seloger/pages.py b/modules/seloger/pages.py new file mode 100644 index 00000000..d8d2924e --- /dev/null +++ b/modules/seloger/pages.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import re +from datetime import date + +from weboob.tools.browser import BasePage, BrokenPageError +from weboob.capabilities.base import NotAvailable +from weboob.capabilities.housing import Housing + + +__all__ = ['SearchResultsPage', 'HousingPage'] + + +def sanitarize_cost(t): + return int(float(t.strip(u' \t\u20ac\xa0c€\n\r').replace(u'\xa0', u'').replace(',', '.'))) + +class SearchResultsPage(BasePage): + def iter_housings(self): + for div in self.document.getroot().cssselect('div.ann_ann_border'): + id = div.find('a').attrib['id'][3:] + housing = Housing(id) + + head = div.cssselect('div.rech_headerann')[0] + housing.title = head.xpath('.//span[@class="mea1"]/a')[0].text.strip() + + parts = head.xpath('.//span[@class="mea2"]')[0].text.strip().split('+') + housing.cost = sanitarize_cost(parts[0]) + if len(parts) > 1: + for span in head.xpath('.//span[@class="addprixfr"]/span/strong'): + if span.text.strip() == u'Charges\xa0:': + housing.cost += sanitarize_cost(span.tail) + housing.currency = u'€' + + sub = div.xpath('.//div[@class="rech_desc_right_photo"]')[0] + span = sub.xpath('./span[@class="mea7"]') + if len(span) > 0: + housing.text = '%s - %s' % (span[0].text.strip(), span[0].tail.strip()) + else: + housing.text = div.xpath('.//div[@class="rech_ville"]')[0].tail.strip() + housing.text = housing.text.replace('\r\n', ' ') + housing.location = sub.xpath('.//div[@class="rech_ville"]/strong')[0].text.strip() + + housing.date = date(*map(int, reversed(sub.xpath('.//div[@class="rech_majref"]/strong')[0].tail.strip('- \xa0\r\t\n').split('/')))) + yield housing + +class HousingPage(BasePage): + def get_housing(self, housing=None): + doc = self.document.getroot() + if housing is None: + housing = Housing(self.url.split('/')[-1].rstrip('.htm')) + + housing.title = doc.xpath('//head/title')[0].text + housing.text = doc.xpath('//head/meta[@name="description"]')[0].attrib['content'] + txt = doc.xpath('//meta[@itemprop="price"]')[0].attrib['content'].strip() + m = re.match(u'(\d+)\xa0\u20ac(\+ch|cc)(Charges: (\d+)\u20ac)?', txt) + if not m: + raise BrokenPageError('Unable to parse price %r' % txt) + + housing.cost = sanitarize_cost(m.group(1)) + if m.group(4): + housing.cost += sanitarize_cost(m.group(4)) + housing.currency = u'€' + + housing.date = date(*map(int, reversed(doc.xpath('//span[@class="maj"]/b')[0].text.split(' / ')))) + housing.area = int(doc.xpath('//li[@title="Surface"]/b')[0].text.strip(u'\xa0m\xb2')) + + try: + housing.station = doc.xpath('//dd[@class="metro_paris"]')[0].text.strip() + except IndexError: + housing.station = NotAvailable + + try: + housing.phone = doc.xpath('//div[@class="tel"]')[0].text.strip() + except IndexError: + housing.phone = NotAvailable + + try: + housing.location = doc.xpath('//div[@class="adresse"]/b')[0].tail.strip().replace('\r\n', ' ') + except IndexError: + housing.location = NotAvailable + + return housing diff --git a/modules/seloger/test.py b/modules/seloger/test.py new file mode 100644 index 00000000..f83ea2d4 --- /dev/null +++ b/modules/seloger/test.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.housing import Query +from weboob.tools.test import BackendTest + + +__all__ = ['SeLogerTest'] + + +class SeLogerTest(BackendTest): + BACKEND = 'seloger' + + def test_seloger(self): + query = Query() + query.area_min = 20 + query.cost_max = 1000 + query.cities = [] + for city in self.backend.search_city('paris'): + city.backend = self.backend.name + query.cities.append(city) + + results = list(self.backend.search_housings(query)) + self.assertTrue(len(results) > 0) + + self.backend.fillobj(results[0], 'phone')