diff --git a/modules/pap/__init__.py b/modules/pap/__init__.py new file mode 100644 index 00000000..04880498 --- /dev/null +++ b/modules/pap/__init__.py @@ -0,0 +1,3 @@ +from .backend import PapBackend + +__all__ = ['PapBackend'] diff --git a/modules/pap/backend.py b/modules/pap/backend.py new file mode 100644 index 00000000..f0e8e0e3 --- /dev/null +++ b/modules/pap/backend.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.housing import ICapHousing, City, Housing +from weboob.tools.backend import BaseBackend + +from .browser import PapBrowser + + +__all__ = ['PapBackend'] + + +class PapBackend(BaseBackend, ICapHousing): + NAME = 'pap' + MAINTAINER = u'Romain Bignon' + EMAIL = 'romain@weboob.org' + VERSION = '0.b' + DESCRIPTION = 'French housing website' + LICENSE = 'AGPLv3+' + BROWSER = PapBrowser + + def search_housings(self, query): + cities = [c.id for c in query.cities if c.backend == self.name] + with self.browser: + for housing in self.browser.search_housings(cities, + query.area_min, query.area_max, + query.cost_min, query.cost_max): + yield housing + + def get_housing(self, housing): + if isinstance(housing, Housing): + id = housing.id + else: + id = housing + + with self.browser: + return self.browser.get_housing(id) + + def search_city(self, pattern): + with self.browser: + for city in self.browser.search_geo(pattern): + c = City(city['id']) + c.name = city['name'] + yield c + + def fill_housing(self, housing, fields): + with self.browser: + return self.browser.get_housing(housing.id) + + OBJECTS = {Housing: fill_housing, + } diff --git a/modules/pap/browser.py b/modules/pap/browser.py new file mode 100644 index 00000000..c60c8cee --- /dev/null +++ b/modules/pap/browser.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import urllib +import json + +from weboob.tools.browser import BaseBrowser + +from .pages import SearchResultsPage, HousingPage + + +__all__ = ['PapBrowser'] + + +class PapBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'www.pap.fr' + ENCODING = 'utf-8' + PAGES = { + 'http://www.pap.fr/annonce/.*': SearchResultsPage, + 'http://www.pap.fr/annonces/.*': HousingPage, + } + + def search_geo(self, pattern): + fp = self.openurl(self.buildurl('http://www.pap.fr/index/ac-geo', q=pattern)) + return json.load(fp) + + def search_housings(self, cities, area_min, area_max, cost_min, cost_max): + data = {'geo_objets_ids': ','.join(cities), + 'surface[min]': area_min or '', + 'surface[max]': area_max or '', + 'prix[min]': cost_min or '', + 'prix[max]': cost_max or '', + 'produit': 'location', + 'recherche': 1, + 'nb_resultats_par_page': 40, + 'submit': 'rechercher', + 'typesbien[]': 'appartement', + } + self.location('/annonce/', urllib.urlencode(data)) + assert self.is_on_page(SearchResultsPage) + + return self.page.iter_housings() + + def get_housing(self, housing): + self.location('/annonces/%s' % urllib.quote(housing)) + + assert self.is_on_page(HousingPage) + return self.page.get_housing() diff --git a/modules/pap/favicon.png b/modules/pap/favicon.png new file mode 100644 index 00000000..a4c7a509 Binary files /dev/null and b/modules/pap/favicon.png differ diff --git a/modules/pap/pages.py b/modules/pap/pages.py new file mode 100644 index 00000000..e9459672 --- /dev/null +++ b/modules/pap/pages.py @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import re +from dateutil.parser import parse as parse_date + +from weboob.tools.browser import BasePage +from weboob.capabilities.base import NotAvailable +from weboob.capabilities.housing import Housing + + +__all__ = ['SearchResultsPage', 'HousingPage'] + + +class SearchResultsPage(BasePage): + DATE_RE = re.compile('Annonce \w+ du (.*)') + MONTHS = {u'janvier': 'january', + u'février': 'february', + u'mars': 'march', + u'avril': 'april', + u'mai': 'may', + u'juin': 'june', + u'juillet': 'july', + u'août': 'august', + u'septembre': 'september', + u'octobre': 'october', + u'novembre': 'november', + u'décembre': 'december', + } + + def iter_housings(self): + for div in self.document.getroot().cssselect('div.annonce-resume'): + a = div.cssselect('td.lien-annonce')[0].find('a') + id = a.attrib['href'].split('-')[-1] + housing = Housing(id) + housing.title = a.text.strip() + housing.cost = int(div.cssselect('td.prix')[0].text.strip(u' \t\u20ac\xa0€\n\r')) + housing.currency = u'€' + + m = self.DATE_RE.match(div.cssselect('p.date-publication')[0].text.strip()) + if m: + date = m.group(1) + for fr, en in self.MONTHS.iteritems(): + date = date.replace(fr, en) + housing.date = parse_date(date) + + metro = div.cssselect('p.metro') + if len(metro) > 0: + housing.station = metro[0].text.strip() + else: + housing.station = NotAvailable + + p = div.cssselect('p.annonce-resume-texte')[0] + b = p.findall('b') + if len(b) > 0: + housing.text = b[0].tail.strip() + housing.location = b[0].text + else: + housing.text = p.text.strip() + + yield housing + +class HousingPage(BasePage): + def get_housing(self): + div = self.parser.select(self.document.getroot(), 'div#annonce_detail', 1) + housing = Housing(self.url.split('-')[-1]) + + parts = div.find('h1').text.split(' - ') + housing.title = parts[0].strip() + housing.cost = int(parts[1].strip(u' \t\u20ac\xa0€\n\r')) + housing.currency = u'€' + + m = re.match('(\w+) ([\w\s]+) (\d+)\xa0m\xb2 (.*)', housing.title) + if m: + housing.area = int(m.group(3)) + + housing.date = housing.station = housing.location = housing.phone = NotAvailable + + metro = div.cssselect('p.metro') + if len(metro) > 0: + housing.station = metro[0].text.strip() + + p = div.cssselect('p.annonce-detail-texte')[0] + b = p.findall('b') + if len(b) > 0: + housing.text = b[0].tail.strip() + housing.location = b[0].text + if len(b) > 1: + housing.phone = b[1].text + else: + housing.text = p.text.strip() + + return housing diff --git a/modules/pap/test.py b/modules/pap/test.py new file mode 100644 index 00000000..aa51a764 --- /dev/null +++ b/modules/pap/test.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.housing import Query +from weboob.tools.test import BackendTest + + +__all__ = ['PapTest'] + + +class PapTest(BackendTest): + BACKEND = 'pap' + + def test_pap(self): + query = Query() + query.area_min = 20 + query.cost_max = 900 + query.cities = [] + for city in self.backend.search_city('paris'): + query.cities.append(city) + + results = list(self.backend.search_housings(query)) + self.assertTrue(len(results) > 0) + + self.backend.fillobj(results[0], 'phone')