[seloger] adapt to browser2

This commit is contained in:
Bezleputh 2014-10-11 13:47:24 +02:00 committed by Florent Fourcot
commit 8ac7bbb434
4 changed files with 142 additions and 149 deletions

View file

@ -17,40 +17,40 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from weboob.tools.json import json
from weboob.deprecated.browser import Browser
from weboob.capabilities.housing import Query from weboob.capabilities.housing import Query
from .pages import SearchResultsPage, HousingPage from weboob.browser import PagesBrowser, URL
from .pages import SearchResultsPage, HousingPage, CitiesPage
from weboob.browser.profiles import Android
__all__ = ['SeLogerBrowser'] __all__ = ['SeLogerBrowser']
class SeLogerBrowser(Browser): class SeLogerBrowser(PagesBrowser):
PROTOCOL = 'http' BASEURL = 'http://www.seloger.com'
DOMAIN = 'www.seloger.com' PROFILE = Android()
ENCODING = 'utf-8' cities = URL('js,ajax,villequery_v3.htm\?ville=(?P<pattern>.*)', CitiesPage)
USER_AGENT = Browser.USER_AGENTS['android'] search = URL('http://ws.seloger.com/search.xml\?(?P<request>.*)', SearchResultsPage)
PAGES = { housing = URL('http://ws.seloger.com/annonceDetail.xml\?idAnnonce=(?P<_id>\d+)&noAudiotel=(?P<noAudiotel>\d)', HousingPage)
'http://ws.seloger.com/search.xml.*': SearchResultsPage,
'http://ws.seloger.com/annonceDetail.xml\?idAnnonce=(\d+)(&noAudiotel=\d)?': HousingPage,
}
def search_geo(self, pattern): def search_geo(self, pattern):
fp = self.openurl(self.buildurl('http://www.seloger.com/js,ajax,villequery_v3.htm', ville=pattern.encode('utf-8'), mode=1)) return self.cities.open(pattern=pattern.encode('utf-8')).iter_cities()
return json.load(fp)
TYPES = {Query.TYPE_RENT: 1, TYPES = {Query.TYPE_RENT: 1,
Query.TYPE_SALE: 2 Query.TYPE_SALE: 2
} }
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max): RET = {Query.HOUSE_TYPES.HOUSE: '2',
Query.HOUSE_TYPES.APART: '1',
Query.HOUSE_TYPES.LAND: '4',
Query.HOUSE_TYPES.PARKING: '3',
Query.HOUSE_TYPES.OTHER: '10'}
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
data = {'ci': ','.join(cities), data = {'ci': ','.join(cities),
'idtt': self.TYPES.get(type, 1), 'idtt': self.TYPES.get(type, 1),
'idtypebien': 1, #appart
'org': 'advanced_search', 'org': 'advanced_search',
'surfacemax': area_max or '', 'surfacemax': area_max or '',
'surfacemin': area_min or '', 'surfacemin': area_min or '',
@ -67,24 +67,15 @@ class SeLogerBrowser(Browser):
if nb_rooms: if nb_rooms:
data['nb_pieces'] = nb_rooms data['nb_pieces'] = nb_rooms
self.location(self.buildurl('http://ws.seloger.com/search.xml', **data)) ret = []
for house_type in house_types:
if house_type in self.RET:
ret.append(self.RET.get(house_type))
while True: if ret:
assert self.is_on_page(SearchResultsPage) data['idtypebien'] = ','.join(ret)
for housing in self.page.iter_housings(): return self.search.go(request=urllib.urlencode(data)).iter_housings()
yield housing
url = self.page.next_page_url() def get_housing(self, _id, obj=None):
if url is None: return self.housing.go(_id=_id, noAudiotel=1).get_housing(obj)
return
self.location(url)
def get_housing(self, id, obj=None):
self.location(self.buildurl('http://ws.seloger.com/annonceDetail.xml', idAnnonce=id, noAudiotel=1))
assert self.is_on_page(HousingPage)
housing = self.page.get_housing(obj)
return housing

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.housing import CapHousing, City, Housing, HousingPhoto from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto
from weboob.tools.backend import Module from weboob.tools.backend import Module
from .browser import SeLogerBrowser from .browser import SeLogerBrowser
@ -42,10 +42,10 @@ class SeLogerModule(Module, CapHousing):
if len(cities) == 0: if len(cities) == 0:
return list([]) return list([])
with self.browser:
return self.browser.search_housings(query.type, cities, query.nb_rooms, return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max, query.area_min, query.area_max,
query.cost_min, query.cost_max) query.cost_min, query.cost_max,
query.house_types)
def get_housing(self, housing): def get_housing(self, housing):
if isinstance(housing, Housing): if isinstance(housing, Housing):
@ -54,35 +54,23 @@ class SeLogerModule(Module, CapHousing):
id = housing id = housing
housing = None housing = None
with self.browser:
return self.browser.get_housing(id, housing) return self.browser.get_housing(id, housing)
def search_city(self, pattern): def search_city(self, pattern):
with self.browser: return self.browser.search_geo(pattern)
for categories in self.browser.search_geo(pattern):
if categories['label'] != 'Villes':
continue
for city in categories['values']:
if 'value' not in city:
continue
c = City(city['value'])
c.name = unicode(city['label'])
yield c
def fill_housing(self, housing, fields): def fill_housing(self, housing, fields):
with self.browser:
if fields != ['photos'] or not housing.photos: if fields != ['photos'] or not housing.photos:
housing = self.browser.get_housing(housing.id) housing = self.browser.get_housing(housing.id)
if 'photos' in fields: if 'photos' in fields:
for photo in housing.photos: for photo in housing.photos:
if not photo.data: if not photo.data:
photo.data = self.browser.readurl(photo.url) photo.data = self.browser.open(photo.url)
return housing return housing
def fill_photo(self, photo, fields): def fill_photo(self, photo, fields):
with self.browser:
if 'data' in fields and photo.url and not photo.data: if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.readurl(photo.url) photo.data = self.browser.open(photo.url).content
return photo return photo
OBJECTS = {Housing: fill_housing, OBJECTS = {Housing: fill_housing,

View file

@ -18,85 +18,99 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from decimal import Decimal from weboob.browser.pages import XMLPage, JsonPage, pagination
from dateutil.parser import parse as parse_date from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.filters.json import Dict
from weboob.deprecated.browser import Page from weboob.browser.filters.html import XPath
from weboob.browser.filters.standard import CleanText, CleanDecimal, DateTime
from weboob.capabilities.base import NotAvailable from weboob.capabilities.base import NotAvailable
from weboob.capabilities.housing import Housing, HousingPhoto from weboob.capabilities.housing import Housing, HousingPhoto, City
class SearchResultsPage(Page): class DictElement(ListElement):
def next_page_url(self): def find_elements(self):
urls = self.document.getroot().xpath('//pagesuivante') for el in self.el:
if len(urls) == 0: if el.get('label') == 'Villes':
return None for item in el.get('values'):
else: if 'value' in item:
return urls[0].text yield item
def iter_housings(self):
for a in self.document.getroot().xpath('//annonce'):
housing = Housing(a.find('idannonce').text)
housing.title = unicode(a.find('titre').text)
housing.date = parse_date(a.find('dtfraicheur').text)
housing.cost = Decimal(a.find('prix').text)
housing.currency = u''
housing.area = Decimal(a.find('surface').text)
housing.text = unicode(a.find('descriptif').text.strip())
housing.location = unicode(a.find('ville').text)
try:
housing.station = unicode(a.find('proximite').text)
except AttributeError:
housing.station = NotAvailable
housing.photos = []
for photo in a.xpath('./photos/photo'):
url = unicode(photo.find('stdurl').text)
housing.photos.append(HousingPhoto(url))
yield housing
class HousingPage(Page): class CitiesPage(JsonPage):
def get_housing(self, housing=None): @method
if housing is None: class iter_cities(DictElement):
housing = Housing(self.groups[0]) class item(ItemElement):
klass = City
details = self.document.getroot().xpath('//detailannonce')[0] obj_id = Dict('value')
if details.find('titre') is None: obj_name = Dict('label')
return None
housing.title = unicode(details.find('titre').text)
housing.text = details.find('descriptif').text.strip()
housing.cost = Decimal(details.find('prix').text)
housing.currency = u''
housing.date = parse_date(details.find('dtfraicheur').text)
housing.area = Decimal(details.find('surface').text)
housing.phone = unicode(details.find('contact').find('telephone').text)
try: class SeLogerItem(ItemElement):
housing.station = unicode(details.find('proximite').text) klass = Housing
except AttributeError:
housing.station = NotAvailable
housing.location = details.find('adresse').text obj_id = CleanText('idAnnonce')
if not housing.location and details.find('quartier') is not None: obj_title = CleanText('titre')
housing.location = unicode(details.find('quartier').text) obj_date = DateTime(CleanText('dtFraicheur'))
if not housing.location: obj_cost = CleanDecimal('prix')
housing.location = NotAvailable obj_currency = CleanText('prixUnite')
obj_area = CleanDecimal('surface')
obj_text = CleanText('descriptif')
obj_location = CleanText('ville')
obj_station = CleanText('proximite', default=NotAvailable)
obj_url = CleanText('permaLien')
housing.photos = []
for photo in details.xpath('./photos/photo'):
if photo.find('bigurl').text:
url = photo.find('bigurl').text
else:
url = photo.find('stdurl').text
housing.photos.append(HousingPhoto(unicode(url)))
housing.details = {} class SearchResultsPage(XMLPage):
for detail in details.xpath('./details/detail'): @pagination
housing.details[detail.find('libelle').text.strip()] = detail.find('valeur').text or 'N/A' @method
class iter_housings(ListElement):
item_xpath = "//annonce"
housing.details['Reference'] = details.find('reference').text def next_page(self):
return CleanText('//pagesuivante', default=None)(self)
return housing class item(SeLogerItem):
def obj_photos(self):
photos = []
for photo in XPath('./photos/photo/stdurl')(self):
photos.append(HousingPhoto(photo))
return photos
class HousingPage(XMLPage):
@method
class get_housing(SeLogerItem):
def obj_photos(self):
photos = []
for photo in XPath('./photos/photo')(self):
url = CleanText('bigUrl', default=None)(photo)
if not url:
url = CleanText('stdUrl', default=None)(photo)
photos.append(HousingPhoto(url))
return photos
def condition(self):
return CleanText('//detailAnnonce/titre', default=None)(self)
def obj_location(self):
location = CleanText('//detailAnnonce/adresse')(self)
quartier = CleanText('//detailAnnonce/quartier', default=None)(self)
if not location and quartier is not None:
location = quartier
ville = CleanText('ville')(self)
return u'%s %s' % (location, ville)
def obj_details(self):
details = {}
for detail in XPath('//detailAnnonce/details/detail')(self):
details[CleanText('libelle')(detail)] = CleanText('valeur', default='N/A')(detail)
details['Reference'] = CleanText('//detailAnnonce/reference')(self)
return details
obj_phone = CleanText('//contact/telephone')

View file

@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import itertools
from weboob.capabilities.housing import Query from weboob.capabilities.housing import Query
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
@ -34,7 +34,7 @@ class SeLogerTest(BackendTest):
city.backend = self.backend.name city.backend = self.backend.name
query.cities.append(city) query.cities.append(city)
results = list(self.backend.search_housings(query)) results = list(itertools.islice(self.backend.search_housings(query), 0, 20))
self.assertTrue(len(results) > 0) self.assertTrue(len(results) > 0)
self.backend.fillobj(results[0], 'phone') self.backend.fillobj(results[0], 'phone')