[seloger] adapt to browser2

This commit is contained in:
Bezleputh 2014-10-11 13:47:24 +02:00 committed by Florent Fourcot
commit 8ac7bbb434
4 changed files with 142 additions and 149 deletions

View file

@ -17,45 +17,45 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from weboob.tools.json import json
from weboob.deprecated.browser import Browser
from weboob.capabilities.housing import Query
from .pages import SearchResultsPage, HousingPage
from weboob.browser import PagesBrowser, URL
from .pages import SearchResultsPage, HousingPage, CitiesPage
from weboob.browser.profiles import Android
__all__ = ['SeLogerBrowser']
class SeLogerBrowser(Browser):
PROTOCOL = 'http'
DOMAIN = 'www.seloger.com'
ENCODING = 'utf-8'
USER_AGENT = Browser.USER_AGENTS['android']
PAGES = {
'http://ws.seloger.com/search.xml.*': SearchResultsPage,
'http://ws.seloger.com/annonceDetail.xml\?idAnnonce=(\d+)(&noAudiotel=\d)?': HousingPage,
}
class SeLogerBrowser(PagesBrowser):
BASEURL = 'http://www.seloger.com'
PROFILE = Android()
cities = URL('js,ajax,villequery_v3.htm\?ville=(?P<pattern>.*)', CitiesPage)
search = URL('http://ws.seloger.com/search.xml\?(?P<request>.*)', SearchResultsPage)
housing = URL('http://ws.seloger.com/annonceDetail.xml\?idAnnonce=(?P<_id>\d+)&noAudiotel=(?P<noAudiotel>\d)', HousingPage)
def search_geo(self, pattern):
fp = self.openurl(self.buildurl('http://www.seloger.com/js,ajax,villequery_v3.htm', ville=pattern.encode('utf-8'), mode=1))
return json.load(fp)
return self.cities.open(pattern=pattern.encode('utf-8')).iter_cities()
TYPES = {Query.TYPE_RENT: 1,
Query.TYPE_SALE: 2
}
}
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max):
RET = {Query.HOUSE_TYPES.HOUSE: '2',
Query.HOUSE_TYPES.APART: '1',
Query.HOUSE_TYPES.LAND: '4',
Query.HOUSE_TYPES.PARKING: '3',
Query.HOUSE_TYPES.OTHER: '10'}
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
data = {'ci': ','.join(cities),
'idtt': self.TYPES.get(type, 1),
'idtypebien': 1, #appart
'org': 'advanced_search',
'surfacemax': area_max or '',
'surfacemin': area_min or '',
'tri': 'd_dt_crea',
}
}
if type == Query.TYPE_SALE:
data['pxmax'] = cost_max or ''
@ -67,24 +67,15 @@ class SeLogerBrowser(Browser):
if nb_rooms:
data['nb_pieces'] = nb_rooms
self.location(self.buildurl('http://ws.seloger.com/search.xml', **data))
ret = []
for house_type in house_types:
if house_type in self.RET:
ret.append(self.RET.get(house_type))
while True:
assert self.is_on_page(SearchResultsPage)
if ret:
data['idtypebien'] = ','.join(ret)
for housing in self.page.iter_housings():
yield housing
return self.search.go(request=urllib.urlencode(data)).iter_housings()
url = self.page.next_page_url()
if url is None:
return
self.location(url)
def get_housing(self, id, obj=None):
self.location(self.buildurl('http://ws.seloger.com/annonceDetail.xml', idAnnonce=id, noAudiotel=1))
assert self.is_on_page(HousingPage)
housing = self.page.get_housing(obj)
return housing
def get_housing(self, _id, obj=None):
return self.housing.go(_id=_id, noAudiotel=1).get_housing(obj)

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.housing import CapHousing, City, Housing, HousingPhoto
from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto
from weboob.tools.backend import Module
from .browser import SeLogerBrowser
@ -42,10 +42,10 @@ class SeLogerModule(Module, CapHousing):
if len(cities) == 0:
return list([])
with self.browser:
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max)
return self.browser.search_housings(query.type, cities, query.nb_rooms,
query.area_min, query.area_max,
query.cost_min, query.cost_max,
query.house_types)
def get_housing(self, housing):
if isinstance(housing, Housing):
@ -54,37 +54,25 @@ class SeLogerModule(Module, CapHousing):
id = housing
housing = None
with self.browser:
return self.browser.get_housing(id, housing)
return self.browser.get_housing(id, housing)
def search_city(self, pattern):
with self.browser:
for categories in self.browser.search_geo(pattern):
if categories['label'] != 'Villes':
continue
for city in categories['values']:
if 'value' not in city:
continue
c = City(city['value'])
c.name = unicode(city['label'])
yield c
return self.browser.search_geo(pattern)
def fill_housing(self, housing, fields):
with self.browser:
if fields != ['photos'] or not housing.photos:
housing = self.browser.get_housing(housing.id)
if 'photos' in fields:
for photo in housing.photos:
if not photo.data:
photo.data = self.browser.readurl(photo.url)
if fields != ['photos'] or not housing.photos:
housing = self.browser.get_housing(housing.id)
if 'photos' in fields:
for photo in housing.photos:
if not photo.data:
photo.data = self.browser.open(photo.url)
return housing
def fill_photo(self, photo, fields):
with self.browser:
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.readurl(photo.url)
if 'data' in fields and photo.url and not photo.data:
photo.data = self.browser.open(photo.url).content
return photo
OBJECTS = {Housing: fill_housing,
HousingPhoto: fill_photo,
}
}

View file

@ -18,85 +18,99 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from decimal import Decimal
from dateutil.parser import parse as parse_date
from weboob.deprecated.browser import Page
from weboob.browser.pages import XMLPage, JsonPage, pagination
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.filters.json import Dict
from weboob.browser.filters.html import XPath
from weboob.browser.filters.standard import CleanText, CleanDecimal, DateTime
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.housing import Housing, HousingPhoto
from weboob.capabilities.housing import Housing, HousingPhoto, City
class SearchResultsPage(Page):
def next_page_url(self):
urls = self.document.getroot().xpath('//pagesuivante')
if len(urls) == 0:
return None
else:
return urls[0].text
def iter_housings(self):
for a in self.document.getroot().xpath('//annonce'):
housing = Housing(a.find('idannonce').text)
housing.title = unicode(a.find('titre').text)
housing.date = parse_date(a.find('dtfraicheur').text)
housing.cost = Decimal(a.find('prix').text)
housing.currency = u''
housing.area = Decimal(a.find('surface').text)
housing.text = unicode(a.find('descriptif').text.strip())
housing.location = unicode(a.find('ville').text)
try:
housing.station = unicode(a.find('proximite').text)
except AttributeError:
housing.station = NotAvailable
housing.photos = []
for photo in a.xpath('./photos/photo'):
url = unicode(photo.find('stdurl').text)
housing.photos.append(HousingPhoto(url))
yield housing
class DictElement(ListElement):
def find_elements(self):
for el in self.el:
if el.get('label') == 'Villes':
for item in el.get('values'):
if 'value' in item:
yield item
class HousingPage(Page):
def get_housing(self, housing=None):
if housing is None:
housing = Housing(self.groups[0])
class CitiesPage(JsonPage):
@method
class iter_cities(DictElement):
class item(ItemElement):
klass = City
details = self.document.getroot().xpath('//detailannonce')[0]
if details.find('titre') is None:
return None
obj_id = Dict('value')
obj_name = Dict('label')
housing.title = unicode(details.find('titre').text)
housing.text = details.find('descriptif').text.strip()
housing.cost = Decimal(details.find('prix').text)
housing.currency = u''
housing.date = parse_date(details.find('dtfraicheur').text)
housing.area = Decimal(details.find('surface').text)
housing.phone = unicode(details.find('contact').find('telephone').text)
try:
housing.station = unicode(details.find('proximite').text)
except AttributeError:
housing.station = NotAvailable
class SeLogerItem(ItemElement):
klass = Housing
housing.location = details.find('adresse').text
if not housing.location and details.find('quartier') is not None:
housing.location = unicode(details.find('quartier').text)
if not housing.location:
housing.location = NotAvailable
obj_id = CleanText('idAnnonce')
obj_title = CleanText('titre')
obj_date = DateTime(CleanText('dtFraicheur'))
obj_cost = CleanDecimal('prix')
obj_currency = CleanText('prixUnite')
obj_area = CleanDecimal('surface')
obj_text = CleanText('descriptif')
obj_location = CleanText('ville')
obj_station = CleanText('proximite', default=NotAvailable)
obj_url = CleanText('permaLien')
housing.photos = []
for photo in details.xpath('./photos/photo'):
if photo.find('bigurl').text:
url = photo.find('bigurl').text
else:
url = photo.find('stdurl').text
housing.photos.append(HousingPhoto(unicode(url)))
housing.details = {}
for detail in details.xpath('./details/detail'):
housing.details[detail.find('libelle').text.strip()] = detail.find('valeur').text or 'N/A'
class SearchResultsPage(XMLPage):
@pagination
@method
class iter_housings(ListElement):
item_xpath = "//annonce"
housing.details['Reference'] = details.find('reference').text
def next_page(self):
return CleanText('//pagesuivante', default=None)(self)
return housing
class item(SeLogerItem):
def obj_photos(self):
photos = []
for photo in XPath('./photos/photo/stdurl')(self):
photos.append(HousingPhoto(photo))
return photos
class HousingPage(XMLPage):
@method
class get_housing(SeLogerItem):
def obj_photos(self):
photos = []
for photo in XPath('./photos/photo')(self):
url = CleanText('bigUrl', default=None)(photo)
if not url:
url = CleanText('stdUrl', default=None)(photo)
photos.append(HousingPhoto(url))
return photos
def condition(self):
return CleanText('//detailAnnonce/titre', default=None)(self)
def obj_location(self):
location = CleanText('//detailAnnonce/adresse')(self)
quartier = CleanText('//detailAnnonce/quartier', default=None)(self)
if not location and quartier is not None:
location = quartier
ville = CleanText('ville')(self)
return u'%s %s' % (location, ville)
def obj_details(self):
details = {}
for detail in XPath('//detailAnnonce/details/detail')(self):
details[CleanText('libelle')(detail)] = CleanText('valeur', default='N/A')(detail)
details['Reference'] = CleanText('//detailAnnonce/reference')(self)
return details
obj_phone = CleanText('//contact/telephone')

View file

@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import itertools
from weboob.capabilities.housing import Query
from weboob.tools.test import BackendTest
@ -34,7 +34,7 @@ class SeLogerTest(BackendTest):
city.backend = self.backend.name
query.cities.append(city)
results = list(self.backend.search_housings(query))
results = list(itertools.islice(self.backend.search_housings(query), 0, 20))
self.assertTrue(len(results) > 0)
self.backend.fillobj(results[0], 'phone')