From 8ac7bbb4348b0be5fab7411e9664f56e66c291de Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Sat, 11 Oct 2014 13:47:24 +0200 Subject: [PATCH] [seloger] adapt to browser2 --- modules/seloger/browser.py | 67 +++++++--------- modules/seloger/module.py | 44 ++++------- modules/seloger/pages.py | 152 ++++++++++++++++++++----------------- modules/seloger/test.py | 4 +- 4 files changed, 130 insertions(+), 137 deletions(-) diff --git a/modules/seloger/browser.py b/modules/seloger/browser.py index ce03d8a6..d4beb882 100644 --- a/modules/seloger/browser.py +++ b/modules/seloger/browser.py @@ -17,45 +17,45 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . +import urllib -from weboob.tools.json import json - -from weboob.deprecated.browser import Browser from weboob.capabilities.housing import Query -from .pages import SearchResultsPage, HousingPage - +from weboob.browser import PagesBrowser, URL +from .pages import SearchResultsPage, HousingPage, CitiesPage +from weboob.browser.profiles import Android __all__ = ['SeLogerBrowser'] -class SeLogerBrowser(Browser): - PROTOCOL = 'http' - DOMAIN = 'www.seloger.com' - ENCODING = 'utf-8' - USER_AGENT = Browser.USER_AGENTS['android'] - PAGES = { - 'http://ws.seloger.com/search.xml.*': SearchResultsPage, - 'http://ws.seloger.com/annonceDetail.xml\?idAnnonce=(\d+)(&noAudiotel=\d)?': HousingPage, - } +class SeLogerBrowser(PagesBrowser): + BASEURL = 'http://www.seloger.com' + PROFILE = Android() + cities = URL('js,ajax,villequery_v3.htm\?ville=(?P.*)', CitiesPage) + search = URL('http://ws.seloger.com/search.xml\?(?P.*)', SearchResultsPage) + housing = URL('http://ws.seloger.com/annonceDetail.xml\?idAnnonce=(?P<_id>\d+)&noAudiotel=(?P\d)', HousingPage) def search_geo(self, pattern): - fp = self.openurl(self.buildurl('http://www.seloger.com/js,ajax,villequery_v3.htm', ville=pattern.encode('utf-8'), mode=1)) - return json.load(fp) + return self.cities.open(pattern=pattern.encode('utf-8')).iter_cities() TYPES = {Query.TYPE_RENT: 1, Query.TYPE_SALE: 2 - } + } - def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max): + RET = {Query.HOUSE_TYPES.HOUSE: '2', + Query.HOUSE_TYPES.APART: '1', + Query.HOUSE_TYPES.LAND: '4', + Query.HOUSE_TYPES.PARKING: '3', + Query.HOUSE_TYPES.OTHER: '10'} + + def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types): data = {'ci': ','.join(cities), 'idtt': self.TYPES.get(type, 1), - 'idtypebien': 1, #appart 'org': 'advanced_search', 'surfacemax': area_max or '', 'surfacemin': area_min or '', 'tri': 'd_dt_crea', - } + } if type == Query.TYPE_SALE: data['pxmax'] = cost_max or '' @@ -67,24 +67,15 @@ class SeLogerBrowser(Browser): if nb_rooms: data['nb_pieces'] = nb_rooms - self.location(self.buildurl('http://ws.seloger.com/search.xml', **data)) + ret = [] + for house_type in house_types: + if house_type in self.RET: + ret.append(self.RET.get(house_type)) - while True: - assert self.is_on_page(SearchResultsPage) + if ret: + data['idtypebien'] = ','.join(ret) - for housing in self.page.iter_housings(): - yield housing + return self.search.go(request=urllib.urlencode(data)).iter_housings() - url = self.page.next_page_url() - if url is None: - return - - self.location(url) - - def get_housing(self, id, obj=None): - self.location(self.buildurl('http://ws.seloger.com/annonceDetail.xml', idAnnonce=id, noAudiotel=1)) - - assert self.is_on_page(HousingPage) - housing = self.page.get_housing(obj) - - return housing + def get_housing(self, _id, obj=None): + return self.housing.go(_id=_id, noAudiotel=1).get_housing(obj) diff --git a/modules/seloger/module.py b/modules/seloger/module.py index 39baca15..6f451e05 100644 --- a/modules/seloger/module.py +++ b/modules/seloger/module.py @@ -18,7 +18,7 @@ # along with weboob. If not, see . -from weboob.capabilities.housing import CapHousing, City, Housing, HousingPhoto +from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto from weboob.tools.backend import Module from .browser import SeLogerBrowser @@ -42,10 +42,10 @@ class SeLogerModule(Module, CapHousing): if len(cities) == 0: return list([]) - with self.browser: - return self.browser.search_housings(query.type, cities, query.nb_rooms, - query.area_min, query.area_max, - query.cost_min, query.cost_max) + return self.browser.search_housings(query.type, cities, query.nb_rooms, + query.area_min, query.area_max, + query.cost_min, query.cost_max, + query.house_types) def get_housing(self, housing): if isinstance(housing, Housing): @@ -54,37 +54,25 @@ class SeLogerModule(Module, CapHousing): id = housing housing = None - with self.browser: - return self.browser.get_housing(id, housing) + return self.browser.get_housing(id, housing) def search_city(self, pattern): - with self.browser: - for categories in self.browser.search_geo(pattern): - if categories['label'] != 'Villes': - continue - for city in categories['values']: - if 'value' not in city: - continue - c = City(city['value']) - c.name = unicode(city['label']) - yield c + return self.browser.search_geo(pattern) def fill_housing(self, housing, fields): - with self.browser: - if fields != ['photos'] or not housing.photos: - housing = self.browser.get_housing(housing.id) - if 'photos' in fields: - for photo in housing.photos: - if not photo.data: - photo.data = self.browser.readurl(photo.url) + if fields != ['photos'] or not housing.photos: + housing = self.browser.get_housing(housing.id) + if 'photos' in fields: + for photo in housing.photos: + if not photo.data: + photo.data = self.browser.open(photo.url) return housing def fill_photo(self, photo, fields): - with self.browser: - if 'data' in fields and photo.url and not photo.data: - photo.data = self.browser.readurl(photo.url) + if 'data' in fields and photo.url and not photo.data: + photo.data = self.browser.open(photo.url).content return photo OBJECTS = {Housing: fill_housing, HousingPhoto: fill_photo, - } + } diff --git a/modules/seloger/pages.py b/modules/seloger/pages.py index 093150d4..5b63917a 100644 --- a/modules/seloger/pages.py +++ b/modules/seloger/pages.py @@ -18,85 +18,99 @@ # along with weboob. If not, see . -from decimal import Decimal -from dateutil.parser import parse as parse_date - -from weboob.deprecated.browser import Page +from weboob.browser.pages import XMLPage, JsonPage, pagination +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.browser.filters.json import Dict +from weboob.browser.filters.html import XPath +from weboob.browser.filters.standard import CleanText, CleanDecimal, DateTime from weboob.capabilities.base import NotAvailable -from weboob.capabilities.housing import Housing, HousingPhoto +from weboob.capabilities.housing import Housing, HousingPhoto, City -class SearchResultsPage(Page): - def next_page_url(self): - urls = self.document.getroot().xpath('//pagesuivante') - if len(urls) == 0: - return None - else: - return urls[0].text - - def iter_housings(self): - for a in self.document.getroot().xpath('//annonce'): - housing = Housing(a.find('idannonce').text) - housing.title = unicode(a.find('titre').text) - housing.date = parse_date(a.find('dtfraicheur').text) - housing.cost = Decimal(a.find('prix').text) - housing.currency = u'€' - housing.area = Decimal(a.find('surface').text) - housing.text = unicode(a.find('descriptif').text.strip()) - housing.location = unicode(a.find('ville').text) - try: - housing.station = unicode(a.find('proximite').text) - except AttributeError: - housing.station = NotAvailable - - housing.photos = [] - for photo in a.xpath('./photos/photo'): - url = unicode(photo.find('stdurl').text) - housing.photos.append(HousingPhoto(url)) - - yield housing +class DictElement(ListElement): + def find_elements(self): + for el in self.el: + if el.get('label') == 'Villes': + for item in el.get('values'): + if 'value' in item: + yield item -class HousingPage(Page): - def get_housing(self, housing=None): - if housing is None: - housing = Housing(self.groups[0]) +class CitiesPage(JsonPage): + @method + class iter_cities(DictElement): + class item(ItemElement): + klass = City - details = self.document.getroot().xpath('//detailannonce')[0] - if details.find('titre') is None: - return None + obj_id = Dict('value') + obj_name = Dict('label') - housing.title = unicode(details.find('titre').text) - housing.text = details.find('descriptif').text.strip() - housing.cost = Decimal(details.find('prix').text) - housing.currency = u'€' - housing.date = parse_date(details.find('dtfraicheur').text) - housing.area = Decimal(details.find('surface').text) - housing.phone = unicode(details.find('contact').find('telephone').text) - try: - housing.station = unicode(details.find('proximite').text) - except AttributeError: - housing.station = NotAvailable +class SeLogerItem(ItemElement): + klass = Housing - housing.location = details.find('adresse').text - if not housing.location and details.find('quartier') is not None: - housing.location = unicode(details.find('quartier').text) - if not housing.location: - housing.location = NotAvailable + obj_id = CleanText('idAnnonce') + obj_title = CleanText('titre') + obj_date = DateTime(CleanText('dtFraicheur')) + obj_cost = CleanDecimal('prix') + obj_currency = CleanText('prixUnite') + obj_area = CleanDecimal('surface') + obj_text = CleanText('descriptif') + obj_location = CleanText('ville') + obj_station = CleanText('proximite', default=NotAvailable) + obj_url = CleanText('permaLien') - housing.photos = [] - for photo in details.xpath('./photos/photo'): - if photo.find('bigurl').text: - url = photo.find('bigurl').text - else: - url = photo.find('stdurl').text - housing.photos.append(HousingPhoto(unicode(url))) - housing.details = {} - for detail in details.xpath('./details/detail'): - housing.details[detail.find('libelle').text.strip()] = detail.find('valeur').text or 'N/A' +class SearchResultsPage(XMLPage): + @pagination + @method + class iter_housings(ListElement): + item_xpath = "//annonce" - housing.details['Reference'] = details.find('reference').text + def next_page(self): + return CleanText('//pagesuivante', default=None)(self) - return housing + class item(SeLogerItem): + def obj_photos(self): + photos = [] + + for photo in XPath('./photos/photo/stdurl')(self): + photos.append(HousingPhoto(photo)) + + return photos + + +class HousingPage(XMLPage): + @method + class get_housing(SeLogerItem): + + def obj_photos(self): + photos = [] + + for photo in XPath('./photos/photo')(self): + url = CleanText('bigUrl', default=None)(photo) + if not url: + url = CleanText('stdUrl', default=None)(photo) + photos.append(HousingPhoto(url)) + return photos + + def condition(self): + return CleanText('//detailAnnonce/titre', default=None)(self) + + def obj_location(self): + location = CleanText('//detailAnnonce/adresse')(self) + quartier = CleanText('//detailAnnonce/quartier', default=None)(self) + if not location and quartier is not None: + location = quartier + ville = CleanText('ville')(self) + return u'%s %s' % (location, ville) + + def obj_details(self): + details = {} + for detail in XPath('//detailAnnonce/details/detail')(self): + details[CleanText('libelle')(detail)] = CleanText('valeur', default='N/A')(detail) + + details['Reference'] = CleanText('//detailAnnonce/reference')(self) + return details + + obj_phone = CleanText('//contact/telephone') diff --git a/modules/seloger/test.py b/modules/seloger/test.py index 23bf48bf..b9b658c2 100644 --- a/modules/seloger/test.py +++ b/modules/seloger/test.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . - +import itertools from weboob.capabilities.housing import Query from weboob.tools.test import BackendTest @@ -34,7 +34,7 @@ class SeLogerTest(BackendTest): city.backend = self.backend.name query.cities.append(city) - results = list(self.backend.search_housings(query)) + results = list(itertools.islice(self.backend.search_housings(query), 0, 20)) self.assertTrue(len(results) > 0) self.backend.fillobj(results[0], 'phone')