loop on all results pages

This commit is contained in:
Romain Bignon 2012-02-18 16:21:43 +01:00
commit 1d23f42ffa
2 changed files with 17 additions and 2 deletions

View file

@ -59,9 +59,17 @@ class SeLogerBrowser(BaseBrowser):
self.location(self.buildurl('http://ws.seloger.com/search.xml', **data))
assert self.is_on_page(SearchResultsPage)
while 1:
assert self.is_on_page(SearchResultsPage)
return self.page.iter_housings()
for housing in self.page.iter_housings():
yield housing
url = self.page.next_page_url()
if url is None:
return
self.location(url)
def get_housing(self, id, obj=None):
self.location(self.buildurl('http://ws.seloger.com/annonceDetail.xml', idAnnonce=id, noAudiotel=1))

View file

@ -29,6 +29,13 @@ __all__ = ['SearchResultsPage', 'HousingPage']
class SearchResultsPage(BasePage):
def next_page_url(self):
urls = self.document.getroot().xpath('//pagesuivante')
if len(urls) == 0:
return None
else:
return urls[0].text
def iter_housings(self):
for a in self.document.getroot().xpath('//annonce'):
housing = Housing(a.find('idannonce').text)