[seloger] adapt to browser2

2014-10-11 13:47:24 +02:00 · 2014-10-11 13:47:24 +02:00 · 8ac7bbb434
commit 8ac7bbb434
parent 04220ca44d
4 changed files with 142 additions and 149 deletions
--- a/modules/seloger/browser.py
+++ b/modules/seloger/browser.py
@ -17,45 +17,45 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

+import urllib

-from weboob.tools.json import json
-
-from weboob.deprecated.browser import Browser
 from weboob.capabilities.housing import Query

-from .pages import SearchResultsPage, HousingPage
-
+from weboob.browser import PagesBrowser, URL
+from .pages import SearchResultsPage, HousingPage, CitiesPage
+from weboob.browser.profiles import Android

 __all__ = ['SeLogerBrowser']


-class SeLogerBrowser(Browser):
-    PROTOCOL = 'http'
-    DOMAIN = 'www.seloger.com'
-    ENCODING = 'utf-8'
-    USER_AGENT = Browser.USER_AGENTS['android']
-    PAGES = {
-         'http://ws.seloger.com/search.xml.*': SearchResultsPage,
-         'http://ws.seloger.com/annonceDetail.xml\?idAnnonce=(\d+)(&noAudiotel=\d)?': HousingPage,
-        }
+class SeLogerBrowser(PagesBrowser):
+    BASEURL = 'http://www.seloger.com'
+    PROFILE = Android()
+    cities = URL('js,ajax,villequery_v3.htm\?ville=(?P<pattern>.*)', CitiesPage)
+    search = URL('http://ws.seloger.com/search.xml\?(?P<request>.*)', SearchResultsPage)
+    housing = URL('http://ws.seloger.com/annonceDetail.xml\?idAnnonce=(?P<_id>\d+)&noAudiotel=(?P<noAudiotel>\d)', HousingPage)

    def search_geo(self, pattern):
-        fp = self.openurl(self.buildurl('http://www.seloger.com/js,ajax,villequery_v3.htm', ville=pattern.encode('utf-8'), mode=1))
-        return json.load(fp)
+        return self.cities.open(pattern=pattern.encode('utf-8')).iter_cities()

    TYPES = {Query.TYPE_RENT: 1,
             Query.TYPE_SALE: 2
-            }
+             }

-    def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max):
+    RET = {Query.HOUSE_TYPES.HOUSE: '2',
+           Query.HOUSE_TYPES.APART: '1',
+           Query.HOUSE_TYPES.LAND: '4',
+           Query.HOUSE_TYPES.PARKING: '3',
+           Query.HOUSE_TYPES.OTHER: '10'}
+
+    def search_housings(self, type, cities, nb_rooms, area_min, area_max, cost_min, cost_max, house_types):
        data = {'ci':            ','.join(cities),
                'idtt':          self.TYPES.get(type, 1),
-                'idtypebien':    1, #appart
                'org':           'advanced_search',
                'surfacemax':    area_max or '',
                'surfacemin':    area_min or '',
                'tri':           'd_dt_crea',
-               }
+                }

        if type == Query.TYPE_SALE:
            data['pxmax'] = cost_max or ''
@ -67,24 +67,15 @@ class SeLogerBrowser(Browser):
        if nb_rooms:
            data['nb_pieces'] = nb_rooms

-        self.location(self.buildurl('http://ws.seloger.com/search.xml', **data))
+        ret = []
+        for house_type in house_types:
+            if house_type in self.RET:
+                ret.append(self.RET.get(house_type))

-        while True:
-            assert self.is_on_page(SearchResultsPage)
+        if ret:
+            data['idtypebien'] = ','.join(ret)

-            for housing in self.page.iter_housings():
-                yield housing
+        return self.search.go(request=urllib.urlencode(data)).iter_housings()

-            url = self.page.next_page_url()
-            if url is None:
-                return
-
-            self.location(url)
-
-    def get_housing(self, id, obj=None):
-        self.location(self.buildurl('http://ws.seloger.com/annonceDetail.xml', idAnnonce=id, noAudiotel=1))
-
-        assert self.is_on_page(HousingPage)
-        housing = self.page.get_housing(obj)
-
-        return housing
+    def get_housing(self, _id, obj=None):
+        return self.housing.go(_id=_id, noAudiotel=1).get_housing(obj)
--- a/modules/seloger/module.py
+++ b/modules/seloger/module.py
@ -18,7 +18,7 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.


-from weboob.capabilities.housing import CapHousing, City, Housing, HousingPhoto
+from weboob.capabilities.housing import CapHousing, Housing, HousingPhoto
 from weboob.tools.backend import Module

 from .browser import SeLogerBrowser
@ -42,10 +42,10 @@ class SeLogerModule(Module, CapHousing):
        if len(cities) == 0:
            return list([])

-        with self.browser:
-            return self.browser.search_housings(query.type, cities, query.nb_rooms,
-                                                query.area_min, query.area_max,
-                                                query.cost_min, query.cost_max)
+        return self.browser.search_housings(query.type, cities, query.nb_rooms,
+                                            query.area_min, query.area_max,
+                                            query.cost_min, query.cost_max,
+                                            query.house_types)

    def get_housing(self, housing):
        if isinstance(housing, Housing):
@ -54,37 +54,25 @@ class SeLogerModule(Module, CapHousing):
            id = housing
            housing = None

-        with self.browser:
-            return self.browser.get_housing(id, housing)
+        return self.browser.get_housing(id, housing)

    def search_city(self, pattern):
-        with self.browser:
-            for categories in self.browser.search_geo(pattern):
-                if categories['label'] != 'Villes':
-                    continue
-                for city in categories['values']:
-                    if 'value' not in city:
-                        continue
-                    c = City(city['value'])
-                    c.name = unicode(city['label'])
-                    yield c
+        return self.browser.search_geo(pattern)

    def fill_housing(self, housing, fields):
-        with self.browser:
-            if fields != ['photos'] or not housing.photos:
-                housing = self.browser.get_housing(housing.id)
-            if 'photos' in fields:
-                for photo in housing.photos:
-                    if not photo.data:
-                        photo.data = self.browser.readurl(photo.url)
+        if fields != ['photos'] or not housing.photos:
+            housing = self.browser.get_housing(housing.id)
+        if 'photos' in fields:
+            for photo in housing.photos:
+                if not photo.data:
+                    photo.data = self.browser.open(photo.url)
        return housing

    def fill_photo(self, photo, fields):
-        with self.browser:
-            if 'data' in fields and photo.url and not photo.data:
-                photo.data = self.browser.readurl(photo.url)
+        if 'data' in fields and photo.url and not photo.data:
+            photo.data = self.browser.open(photo.url).content
        return photo

    OBJECTS = {Housing: fill_housing,
               HousingPhoto: fill_photo,
-              }
+               }
--- a/modules/seloger/pages.py
+++ b/modules/seloger/pages.py
@ -18,85 +18,99 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.


-from decimal import Decimal
-from dateutil.parser import parse as parse_date
-
-from weboob.deprecated.browser import Page
+from weboob.browser.pages import XMLPage, JsonPage, pagination
+from weboob.browser.elements import ItemElement, ListElement, method
+from weboob.browser.filters.json import Dict
+from weboob.browser.filters.html import XPath
+from weboob.browser.filters.standard import CleanText, CleanDecimal, DateTime
 from weboob.capabilities.base import NotAvailable
-from weboob.capabilities.housing import Housing, HousingPhoto
+from weboob.capabilities.housing import Housing, HousingPhoto, City


-class SearchResultsPage(Page):
-    def next_page_url(self):
-        urls = self.document.getroot().xpath('//pagesuivante')
-        if len(urls) == 0:
-            return None
-        else:
-            return urls[0].text
-
-    def iter_housings(self):
-        for a in self.document.getroot().xpath('//annonce'):
-            housing = Housing(a.find('idannonce').text)
-            housing.title = unicode(a.find('titre').text)
-            housing.date = parse_date(a.find('dtfraicheur').text)
-            housing.cost = Decimal(a.find('prix').text)
-            housing.currency = u'€'
-            housing.area = Decimal(a.find('surface').text)
-            housing.text = unicode(a.find('descriptif').text.strip())
-            housing.location = unicode(a.find('ville').text)
-            try:
-                housing.station = unicode(a.find('proximite').text)
-            except AttributeError:
-                housing.station = NotAvailable
-
-            housing.photos = []
-            for photo in a.xpath('./photos/photo'):
-                url = unicode(photo.find('stdurl').text)
-                housing.photos.append(HousingPhoto(url))
-
-            yield housing
+class DictElement(ListElement):
+    def find_elements(self):
+        for el in self.el:
+            if el.get('label') == 'Villes':
+                for item in el.get('values'):
+                    if 'value' in item:
+                        yield item


-class HousingPage(Page):
-    def get_housing(self, housing=None):
-        if housing is None:
-            housing = Housing(self.groups[0])
+class CitiesPage(JsonPage):
+    @method
+    class iter_cities(DictElement):
+        class item(ItemElement):
+            klass = City

-        details = self.document.getroot().xpath('//detailannonce')[0]
-        if details.find('titre') is None:
-            return None
+            obj_id = Dict('value')
+            obj_name = Dict('label')

-        housing.title = unicode(details.find('titre').text)
-        housing.text = details.find('descriptif').text.strip()
-        housing.cost = Decimal(details.find('prix').text)
-        housing.currency = u'€'
-        housing.date = parse_date(details.find('dtfraicheur').text)
-        housing.area = Decimal(details.find('surface').text)
-        housing.phone = unicode(details.find('contact').find('telephone').text)

-        try:
-            housing.station = unicode(details.find('proximite').text)
-        except AttributeError:
-            housing.station = NotAvailable
+class SeLogerItem(ItemElement):
+    klass = Housing

-        housing.location = details.find('adresse').text
-        if not housing.location and details.find('quartier') is not None:
-            housing.location = unicode(details.find('quartier').text)
-        if not housing.location:
-            housing.location = NotAvailable
+    obj_id = CleanText('idAnnonce')
+    obj_title = CleanText('titre')
+    obj_date = DateTime(CleanText('dtFraicheur'))
+    obj_cost = CleanDecimal('prix')
+    obj_currency = CleanText('prixUnite')
+    obj_area = CleanDecimal('surface')
+    obj_text = CleanText('descriptif')
+    obj_location = CleanText('ville')
+    obj_station = CleanText('proximite', default=NotAvailable)
+    obj_url = CleanText('permaLien')

-        housing.photos = []
-        for photo in details.xpath('./photos/photo'):
-            if photo.find('bigurl').text:
-                url = photo.find('bigurl').text
-            else:
-                url = photo.find('stdurl').text
-            housing.photos.append(HousingPhoto(unicode(url)))

-        housing.details = {}
-        for detail in details.xpath('./details/detail'):
-            housing.details[detail.find('libelle').text.strip()] = detail.find('valeur').text or 'N/A'
+class SearchResultsPage(XMLPage):
+    @pagination
+    @method
+    class iter_housings(ListElement):
+        item_xpath = "//annonce"

-        housing.details['Reference'] = details.find('reference').text
+        def next_page(self):
+            return CleanText('//pagesuivante', default=None)(self)

-        return housing
+        class item(SeLogerItem):
+            def obj_photos(self):
+                photos = []
+
+                for photo in XPath('./photos/photo/stdurl')(self):
+                    photos.append(HousingPhoto(photo))
+
+                return photos
+
+
+class HousingPage(XMLPage):
+    @method
+    class get_housing(SeLogerItem):
+
+        def obj_photos(self):
+            photos = []
+
+            for photo in XPath('./photos/photo')(self):
+                url = CleanText('bigUrl', default=None)(photo)
+                if not url:
+                    url = CleanText('stdUrl', default=None)(photo)
+                photos.append(HousingPhoto(url))
+            return photos
+
+        def condition(self):
+            return CleanText('//detailAnnonce/titre', default=None)(self)
+
+        def obj_location(self):
+            location = CleanText('//detailAnnonce/adresse')(self)
+            quartier = CleanText('//detailAnnonce/quartier', default=None)(self)
+            if not location and quartier is not None:
+                location = quartier
+            ville = CleanText('ville')(self)
+            return u'%s %s' % (location, ville)
+
+        def obj_details(self):
+            details = {}
+            for detail in XPath('//detailAnnonce/details/detail')(self):
+                details[CleanText('libelle')(detail)] = CleanText('valeur', default='N/A')(detail)
+
+            details['Reference'] = CleanText('//detailAnnonce/reference')(self)
+            return details
+
+        obj_phone = CleanText('//contact/telephone')
--- a/modules/seloger/test.py
+++ b/modules/seloger/test.py
@ -17,7 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

-
+import itertools
 from weboob.capabilities.housing import Query
 from weboob.tools.test import BackendTest

@ -34,7 +34,7 @@ class SeLogerTest(BackendTest):
            city.backend = self.backend.name
            query.cities.append(city)

-        results = list(self.backend.search_housings(query))
+        results = list(itertools.islice(self.backend.search_housings(query), 0, 20))
        self.assertTrue(len(results) > 0)

        self.backend.fillobj(results[0], 'phone')