From 0b74d5a8c41be6bcd1d9bc1d75ffb271cdfbf5f6 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Thu, 16 Feb 2012 17:04:54 +0100 Subject: [PATCH] use mobile API to search housings too --- modules/seloger/backend.py | 19 +++++++++++++----- modules/seloger/browser.py | 7 ++++--- modules/seloger/pages.py | 41 +++++++++++++++----------------------- 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/modules/seloger/backend.py b/modules/seloger/backend.py index 065c1d78..fdda9c8e 100644 --- a/modules/seloger/backend.py +++ b/modules/seloger/backend.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.capabilities.housing import ICapHousing, City, Housing +from weboob.capabilities.housing import ICapHousing, City, Housing, HousingPhoto from weboob.tools.backend import BaseBackend from .browser import SeLogerBrowser @@ -38,11 +38,13 @@ class SeLogerBackend(BaseBackend, ICapHousing): def search_housings(self, query): cities = [c.id for c in query.cities if c.backend == self.name] + if len(cities) == 0: + return list([]) + with self.browser: - for housing in self.browser.search_housings(cities, - query.area_min, query.area_max, - query.cost_min, query.cost_max): - yield housing + return self.browser.search_housings(cities, + query.area_min, query.area_max, + query.cost_min, query.cost_max) def get_housing(self, housing): if isinstance(housing, Housing): @@ -76,5 +78,12 @@ class SeLogerBackend(BaseBackend, ICapHousing): photo.data = self.browser.readurl(photo.url) return housing + def fill_photo(self, photo, fields): + with self.browser: + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.readurl(photo.url) + return photo + OBJECTS = {Housing: fill_housing, + HousingPhoto: fill_photo, } diff --git a/modules/seloger/browser.py b/modules/seloger/browser.py index 8480a9e6..06d7d5d0 100644 --- a/modules/seloger/browser.py +++ b/modules/seloger/browser.py @@ -34,8 +34,7 @@ class SeLogerBrowser(BaseBrowser): ENCODING = 'utf-8' USER_AGENT = BaseBrowser.USER_AGENTS['android'] PAGES = { - 'http://www.seloger.com/(pre)?recherche.htm.*': SearchResultsPage, - 'http://www.seloger.com/annonces.htm.*': SearchResultsPage, + 'http://ws.seloger.com/search.xml.*': SearchResultsPage, 'http://ws.seloger.com/annonceDetail.xml\?idAnnonce=(\d+)(&noAudiotel=\d)?': HousingPage, } @@ -52,9 +51,11 @@ class SeLogerBrowser(BaseBrowser): 'px_loyermin': cost_min or '', 'surfacemax': area_max or '', 'surfacemin': area_min or '', + 'tri': 'd_dt_crea', } - self.location(self.buildurl('/prerecherche.htm', **data)) + self.location(self.buildurl('http://ws.seloger.com/search.xml', **data)) + assert self.is_on_page(SearchResultsPage) return self.page.iter_housings() diff --git a/modules/seloger/pages.py b/modules/seloger/pages.py index e2755771..eb815cc8 100644 --- a/modules/seloger/pages.py +++ b/modules/seloger/pages.py @@ -30,35 +30,26 @@ __all__ = ['SearchResultsPage', 'HousingPage'] class SearchResultsPage(BasePage): - def sanitarize_cost(self, t): - return int(float(t.strip(u' \t\u20ac\xa0c€\n\r').replace(u'\xa0', u'').replace(',', '.'))) - def iter_housings(self): - for div in self.document.getroot().cssselect('div.ann_ann_border'): - id = div.find('a').attrib['id'][3:] - housing = Housing(id) - - head = div.cssselect('div.rech_headerann')[0] - housing.title = head.xpath('.//span[@class="mea1"]/a')[0].text.strip() - - parts = head.xpath('.//span[@class="mea2"]')[0].text.strip().split('+') - housing.cost = self.sanitarize_cost(parts[0]) - if len(parts) > 1: - for span in head.xpath('.//span[@class="addprixfr"]/span/strong'): - if span.text.strip() == u'Charges\xa0:': - housing.cost += self.sanitarize_cost(span.tail) + for a in self.document.getroot().xpath('//annonce'): + housing = Housing(a.find('idannonce').text) + housing.title = a.find('titre').text + housing.date = parse_date(a.find('dtfraicheur').text) + housing.cost = float(a.find('prix').text) housing.currency = u'€' + housing.area = float(a.find('surface').text) + housing.text = a.find('descriptif').text.strip() + housing.location = a.find('ville').text + try: + housing.station = a.find('proximite').text + except AttributeError: + housing.station = NotAvailable - sub = div.xpath('.//div[@class="rech_desc_right_photo"]')[0] - span = sub.xpath('./span[@class="mea7"]') - if len(span) > 0: - housing.text = '%s - %s' % (span[0].text.strip(), span[0].tail.strip()) - else: - housing.text = div.xpath('.//div[@class="rech_ville"]')[0].tail.strip() - housing.text = housing.text.replace('\r\n', ' ') - housing.location = sub.xpath('.//div[@class="rech_ville"]/strong')[0].text.strip() + housing.photos = [] + for photo in a.xpath('./photos/photo'): + url = photo.find('stdurl').text + housing.photos.append(HousingPhoto(url)) - housing.date = date(*map(int, reversed(sub.xpath('.//div[@class="rech_majref"]/strong')[0].tail.strip('- \xa0\r\t\n').split('/')))) yield housing class HousingPage(BasePage):