[pap] adapt to browser2
This commit is contained in:
parent
c3c2ed404a
commit
f00c9233c3
3 changed files with 144 additions and 145 deletions
|
|
@ -20,35 +20,37 @@
|
||||||
|
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
from weboob.tools.json import json
|
from weboob.browser import PagesBrowser, URL
|
||||||
|
|
||||||
from weboob.deprecated.browser import Browser
|
|
||||||
from weboob.capabilities.housing import Query
|
from weboob.capabilities.housing import Query
|
||||||
|
|
||||||
from .pages import SearchResultsPage, HousingPage
|
from .pages import SearchResultsPage, HousingPage, CitiesPage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['PapBrowser']
|
__all__ = ['PapBrowser']
|
||||||
|
|
||||||
|
|
||||||
class PapBrowser(Browser):
|
class PapBrowser(PagesBrowser):
|
||||||
PROTOCOL = 'http'
|
|
||||||
DOMAIN = 'www.pap.fr'
|
BASEURL = 'http://www.pap.fr'
|
||||||
ENCODING = 'utf-8'
|
search_page = URL('annonce/.*', SearchResultsPage)
|
||||||
PAGES = {
|
housing = URL('annonces/(?P<_id>.*)', HousingPage)
|
||||||
'http://www.pap.fr/annonce/.*': SearchResultsPage,
|
cities = URL('index/ac-geo2\?q=(?P<pattern>.*)', CitiesPage)
|
||||||
'http://www.pap.fr/annonces/.*': HousingPage,
|
|
||||||
}
|
|
||||||
|
|
||||||
def search_geo(self, pattern):
|
def search_geo(self, pattern):
|
||||||
fp = self.openurl(self.buildurl('http://www.pap.fr/index/ac-geo', q=pattern.encode('utf-8')))
|
return self.cities.open(pattern=pattern).iter_cities()
|
||||||
return json.load(fp)
|
|
||||||
|
|
||||||
TYPES = {Query.TYPE_RENT: 'location',
|
TYPES = {Query.TYPE_RENT: 'location',
|
||||||
Query.TYPE_SALE: 'vente',
|
Query.TYPE_SALE: 'vente'}
|
||||||
}
|
|
||||||
|
RET = {Query.HOUSE_TYPES.HOUSE: 'maison',
|
||||||
|
Query.HOUSE_TYPES.APART: 'appartement',
|
||||||
|
Query.HOUSE_TYPES.LAND: 'terrain',
|
||||||
|
Query.HOUSE_TYPES.PARKING: 'garage-parking',
|
||||||
|
Query.HOUSE_TYPES.OTHER: 'divers'}
|
||||||
|
|
||||||
|
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
|
||||||
|
self.session.headers.update({'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'})
|
||||||
|
|
||||||
def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max):
|
|
||||||
data = {'geo_objets_ids': ','.join(cities),
|
data = {'geo_objets_ids': ','.join(cities),
|
||||||
'surface[min]': area_min or '',
|
'surface[min]': area_min or '',
|
||||||
'surface[max]': area_max or '',
|
'surface[max]': area_max or '',
|
||||||
|
|
@ -57,21 +59,19 @@ class PapBrowser(Browser):
|
||||||
'produit': self.TYPES.get(type, 'location'),
|
'produit': self.TYPES.get(type, 'location'),
|
||||||
'recherche': 1,
|
'recherche': 1,
|
||||||
'nb_resultats_par_page': 40,
|
'nb_resultats_par_page': 40,
|
||||||
'submit': 'rechercher',
|
}
|
||||||
'typesbien[]': 'appartement',
|
|
||||||
}
|
|
||||||
|
|
||||||
if nb_rooms:
|
if nb_rooms:
|
||||||
data['nb_pieces[min]'] = nb_rooms
|
data['nb_pieces[min]'] = nb_rooms
|
||||||
data['nb_pieces[max]'] = nb_rooms
|
data['nb_pieces[max]'] = nb_rooms
|
||||||
|
|
||||||
self.location('/annonce/', urllib.urlencode(data))
|
ret = []
|
||||||
assert self.is_on_page(SearchResultsPage)
|
for house_type in house_types:
|
||||||
|
if house_type in self.RET:
|
||||||
|
ret.append(self.RET.get(house_type))
|
||||||
|
|
||||||
return self.page.iter_housings()
|
_data = '%s%s%s' % (urllib.urlencode(data), '&typesbien%5B%5D=', '&typesbien%5B%5D='.join(ret))
|
||||||
|
return self.search_page.go(data=_data).iter_housings()
|
||||||
|
|
||||||
def get_housing(self, housing):
|
def get_housing(self, _id, housing=None):
|
||||||
self.location('/annonces/%s' % urllib.quote(housing))
|
return self.housing.go(_id=_id).get_housing(obj=housing)
|
||||||
|
|
||||||
assert self.is_on_page(HousingPage)
|
|
||||||
return self.page.get_housing()
|
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,7 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto
|
||||||
from weboob.capabilities.housing import CapHousing, City, Housing, HousingPhoto
|
|
||||||
from weboob.tools.backend import Module
|
from weboob.tools.backend import Module
|
||||||
|
|
||||||
from .browser import PapBrowser
|
from .browser import PapBrowser
|
||||||
|
|
@ -38,41 +37,35 @@ class PapModule(Module, CapHousing):
|
||||||
BROWSER = PapBrowser
|
BROWSER = PapBrowser
|
||||||
|
|
||||||
def search_housings(self, query):
|
def search_housings(self, query):
|
||||||
cities = [c.id for c in query.cities if c.backend == self.name]
|
cities = ['%s' % c.id for c in query.cities if c.backend == self.name]
|
||||||
if len(cities) == 0:
|
if len(cities) == 0:
|
||||||
return list()
|
return list()
|
||||||
|
|
||||||
with self.browser:
|
return self.browser.search_housings(query.type, cities, query.nb_rooms,
|
||||||
return self.browser.search_housings(query.type, cities, query.nb_rooms,
|
query.area_min, query.area_max,
|
||||||
query.area_min, query.area_max,
|
query.cost_min, query.cost_max,
|
||||||
query.cost_min, query.cost_max)
|
query.house_types)
|
||||||
|
|
||||||
def get_housing(self, housing):
|
def get_housing(self, housing):
|
||||||
if isinstance(housing, Housing):
|
if isinstance(housing, Housing):
|
||||||
id = housing.id
|
id = housing.id
|
||||||
else:
|
else:
|
||||||
id = housing
|
id = housing
|
||||||
|
housing = None
|
||||||
|
|
||||||
with self.browser:
|
return self.browser.get_housing(id, housing)
|
||||||
return self.browser.get_housing(id)
|
|
||||||
|
|
||||||
def search_city(self, pattern):
|
def search_city(self, pattern):
|
||||||
with self.browser:
|
return self.browser.search_geo(pattern)
|
||||||
for city in self.browser.search_geo(pattern):
|
|
||||||
c = City(city['id'])
|
|
||||||
c.name = unicode(city['name'])
|
|
||||||
yield c
|
|
||||||
|
|
||||||
def fill_housing(self, housing, fields):
|
def fill_housing(self, housing, fields):
|
||||||
with self.browser:
|
return self.browser.get_housing(housing.id, housing)
|
||||||
return self.browser.get_housing(housing.id)
|
|
||||||
|
|
||||||
def fill_photo(self, photo, fields):
|
def fill_photo(self, photo, fields):
|
||||||
with self.browser:
|
if 'data' in fields and photo.url and not photo.data:
|
||||||
if 'data' in fields and photo.url and not photo.data:
|
photo.data = self.browser.readurl(photo.url)
|
||||||
photo.data = self.browser.readurl(photo.url)
|
|
||||||
return photo
|
return photo
|
||||||
|
|
||||||
OBJECTS = {Housing: fill_housing,
|
OBJECTS = {Housing: fill_housing,
|
||||||
HousingPhoto: fill_photo,
|
HousingPhoto: fill_photo,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,105 +18,111 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
import re
|
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
from dateutil.parser import parse as parse_date
|
|
||||||
|
|
||||||
from weboob.deprecated.browser import Page
|
from weboob.tools.date import parse_french_date
|
||||||
|
from weboob.browser.pages import HTMLPage, JsonPage, pagination
|
||||||
|
from weboob.browser.elements import ItemElement, ListElement, method
|
||||||
|
from weboob.browser.filters.standard import CleanText, CleanDecimal, Regexp, Env, BrowserURL, Format
|
||||||
|
from weboob.browser.filters.html import Link, XPath, CleanHTML
|
||||||
|
from weboob.browser.filters.json import Dict
|
||||||
from weboob.capabilities.base import NotAvailable
|
from weboob.capabilities.base import NotAvailable
|
||||||
from weboob.capabilities.housing import Housing
|
from weboob.capabilities.housing import Housing, City, HousingPhoto
|
||||||
|
|
||||||
|
|
||||||
class SearchResultsPage(Page):
|
class DictElement(ListElement):
|
||||||
DATE_RE = re.compile('Annonce \w+ du (.*)')
|
def find_elements(self):
|
||||||
MONTHS = {u'janvier': 'january',
|
if self.item_xpath is not None:
|
||||||
u'février': 'february',
|
for el in self.el:
|
||||||
u'mars': 'march',
|
yield el
|
||||||
u'avril': 'april',
|
|
||||||
u'mai': 'may',
|
|
||||||
u'juin': 'june',
|
|
||||||
u'juillet': 'july',
|
|
||||||
u'août': 'august',
|
|
||||||
u'septembre': 'september',
|
|
||||||
u'octobre': 'october',
|
|
||||||
u'novembre': 'november',
|
|
||||||
u'décembre': 'december',
|
|
||||||
}
|
|
||||||
|
|
||||||
def iter_housings(self):
|
|
||||||
for div in self.document.getroot().cssselect('div.annonce-resume'):
|
|
||||||
a = div.cssselect('td.lien-annonce')[0].find('a')
|
|
||||||
if a is None:
|
|
||||||
# not a real announce.
|
|
||||||
continue
|
|
||||||
|
|
||||||
id = a.attrib['href'].split('-')[-1]
|
|
||||||
housing = Housing(id)
|
|
||||||
housing.title = a.text.strip()
|
|
||||||
m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title)
|
|
||||||
if m:
|
|
||||||
housing.area = Decimal(m.group(3))
|
|
||||||
|
|
||||||
housing.cost = Decimal(div.cssselect('td.prix')[0].text.strip(u' \t\u20ac\xa0€\n\r').replace('.', '').replace(',', '.'))
|
|
||||||
housing.currency = u'€'
|
|
||||||
|
|
||||||
m = self.DATE_RE.match(div.cssselect('p.date-publication')[0].text.strip())
|
|
||||||
if m:
|
|
||||||
date = m.group(1)
|
|
||||||
for fr, en in self.MONTHS.iteritems():
|
|
||||||
date = date.replace(fr, en)
|
|
||||||
housing.date = parse_date(date)
|
|
||||||
|
|
||||||
metro = div.cssselect('p.metro')
|
|
||||||
if len(metro) > 0:
|
|
||||||
housing.station = unicode(metro[0].text.strip())
|
|
||||||
else:
|
|
||||||
housing.station = NotAvailable
|
|
||||||
|
|
||||||
p = div.cssselect('p.annonce-resume-texte')[0]
|
|
||||||
b = p.findall('b')
|
|
||||||
if len(b) > 0:
|
|
||||||
housing.text = b[0].tail.strip()
|
|
||||||
housing.location = unicode(b[0].text)
|
|
||||||
else:
|
|
||||||
housing.text = p.text.strip()
|
|
||||||
|
|
||||||
housing.photos = NotAvailable
|
|
||||||
|
|
||||||
yield housing
|
|
||||||
|
|
||||||
|
|
||||||
class HousingPage(Page):
|
|
||||||
def get_housing(self):
|
|
||||||
div = self.parser.select(self.document.getroot(), 'div#annonce_detail', 1)
|
|
||||||
housing = Housing(self.url.split('-')[-1])
|
|
||||||
|
|
||||||
parts = div.find('h1').text.split(' - ')
|
|
||||||
housing.title = parts[0].strip()
|
|
||||||
housing.cost = Decimal(parts[1].strip(u' \t\u20ac\xa0€\n\r').replace('.', '').replace(',', '.'))
|
|
||||||
housing.currency = u'€'
|
|
||||||
|
|
||||||
m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title)
|
|
||||||
if m:
|
|
||||||
housing.area = Decimal(m.group(3))
|
|
||||||
|
|
||||||
housing.date = housing.station = housing.location = housing.phone = NotAvailable
|
|
||||||
|
|
||||||
metro = div.cssselect('p.metro')
|
|
||||||
if len(metro) > 0:
|
|
||||||
housing.station = metro[0].text.strip()
|
|
||||||
|
|
||||||
p = div.cssselect('p.annonce-detail-texte')[0]
|
|
||||||
b = p.findall('b')
|
|
||||||
if len(b) > 0:
|
|
||||||
housing.text = b[0].tail.strip()
|
|
||||||
housing.location = unicode(b[0].text)
|
|
||||||
if len(b) > 1:
|
|
||||||
housing.phone = b[1].text
|
|
||||||
else:
|
else:
|
||||||
housing.text = p.text.strip()
|
yield self.el
|
||||||
|
|
||||||
housing.details = NotAvailable
|
|
||||||
housing.photos = NotAvailable
|
|
||||||
|
|
||||||
return housing
|
class CitiesPage(JsonPage):
|
||||||
|
@method
|
||||||
|
class iter_cities(DictElement):
|
||||||
|
item_xpath = '.'
|
||||||
|
|
||||||
|
class item(ItemElement):
|
||||||
|
klass = City
|
||||||
|
|
||||||
|
obj_id = Dict('id')
|
||||||
|
obj_name = Dict('name')
|
||||||
|
|
||||||
|
|
||||||
|
class SearchResultsPage(HTMLPage):
|
||||||
|
@pagination
|
||||||
|
@method
|
||||||
|
class iter_housings(ListElement):
|
||||||
|
item_xpath = '//li[@class="annonce"]'
|
||||||
|
|
||||||
|
def next_page(self):
|
||||||
|
return Link('//ul[@class="pagination"]/li[@class="next"]/a')(self)
|
||||||
|
|
||||||
|
class item(ItemElement):
|
||||||
|
klass = Housing
|
||||||
|
|
||||||
|
obj_id = Regexp(Link('./div[@class="header-annonce"]/a'), '/annonces/(.*)')
|
||||||
|
obj_title = CleanText('./div[@class="header-annonce"]/a')
|
||||||
|
obj_area = CleanDecimal(Regexp(CleanText('./div[@class="header-annonce"]/a/span[@class="desc"]'),
|
||||||
|
'(.*?)(\d*) m\xb2(.*?)', '\\2'), default=NotAvailable)
|
||||||
|
obj_cost = CleanDecimal(CleanText('./div[@class="header-annonce"]/a/span[@class="prix"]'),
|
||||||
|
replace_dots=(',', '.'), default=Decimal(0))
|
||||||
|
obj_currency = Regexp(CleanText('./div[@class="header-annonce"]/a/span[@class="prix"]'),
|
||||||
|
'.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')
|
||||||
|
|
||||||
|
def obj_date(self):
|
||||||
|
_date = CleanText('./div[@class="header-annonce"]/span[@class="date"]')(self)
|
||||||
|
return parse_french_date(_date)
|
||||||
|
|
||||||
|
obj_station = CleanText('./div/div/div[@cladd=metro]', default=NotAvailable)
|
||||||
|
obj_location = CleanText('./div[@class="clearfix"]/div/a/span/img/@alt')
|
||||||
|
obj_text = CleanText('./div[@class="clearfix"]/div[@class="description clearfix"]/p')
|
||||||
|
|
||||||
|
def obj_photos(self):
|
||||||
|
photos = []
|
||||||
|
for img in XPath('//div[@class="vignette-annonce"]/a/span/img/@src')(self):
|
||||||
|
photos.append(HousingPhoto(u'%s' % img))
|
||||||
|
return photos
|
||||||
|
|
||||||
|
|
||||||
|
class HousingPage(HTMLPage):
|
||||||
|
@method
|
||||||
|
class get_housing(ItemElement):
|
||||||
|
klass = Housing
|
||||||
|
|
||||||
|
obj_id = Env('_id')
|
||||||
|
obj_title = CleanText('//h1[@class="desc clearfix"]/span[@class="title"]')
|
||||||
|
obj_cost = CleanDecimal('//h1[@class="desc clearfix"]/span[@class="prix"]')
|
||||||
|
obj_currency = Regexp(CleanText('//h1[@class="desc clearfix"]/span[@class="prix"]'),
|
||||||
|
'.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€')
|
||||||
|
obj_area = CleanDecimal(Regexp(CleanText('//h1[@class="desc clearfix"]/span[@class="title"]'),
|
||||||
|
'(.*?)(\d*) m\xb2(.*?)', '\\2'), default=NotAvailable)
|
||||||
|
obj_location = CleanText('//div[@class="text-annonce"]/h2')
|
||||||
|
obj_text = CleanHTML('//div[@class="text-annonce"]/p')
|
||||||
|
obj_station = CleanText('//div[@class="metro"]')
|
||||||
|
obj_phone = CleanText('//span[@class="telephone hide-tel"]')
|
||||||
|
obj_url = BrowserURL('housing', _id=Env('_id'))
|
||||||
|
|
||||||
|
def obj_details(self):
|
||||||
|
details = dict()
|
||||||
|
for item in XPath('//div[@class="footer-descriptif"]/ul/li')(self):
|
||||||
|
key = CleanText('./span[@class="label"]')(item)
|
||||||
|
value = CleanText('.', replace=[(key, '')])(item)
|
||||||
|
if value and key:
|
||||||
|
details[key] = value
|
||||||
|
|
||||||
|
key = CleanText('//div[@class="classe-energie-content"]/div/div/span')(self)
|
||||||
|
value = Format('%s(%s)', CleanText('//div[@class="classe-energie-content"]/div/div/p'),
|
||||||
|
CleanText('//div[@class="classe-energie-content"]/div/@class',
|
||||||
|
replace=[('-', ' ')]))(self)
|
||||||
|
if value and key:
|
||||||
|
details[key] = value
|
||||||
|
return details
|
||||||
|
|
||||||
|
def obj_photos(self):
|
||||||
|
photos = []
|
||||||
|
for img in XPath('//div[@class="showcase-thumbnail"]/img/@src')(self):
|
||||||
|
photos.append(HousingPhoto(u'%s' % img))
|
||||||
|
return photos
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue