[yahoo] search implemented

2010-12-23 20:40:40 +01:00 · 2010-12-23 20:40:40 +01:00 · 61016fc13f
commit 61016fc13f
parent 80b45b2cea
1 changed files with 44 additions and 2 deletions
--- a/weboob/backends/yahoo/backend.py
+++ b/weboob/backends/yahoo/backend.py
@ -22,8 +22,9 @@ from xml.dom import minidom
 # TODO store datetime objects instead of strings
 # from datetime import datetime
-from weboob.capabilities.weather import ICapWeather, CityNotFound, Current, Forecast
+from weboob.capabilities.weather import ICapWeather, CityNotFound, Current, Forecast, City
 from weboob.tools.backend import BaseBackend
 from weboob.tools.browser import BaseBrowser
 __all__ = ['YahooBackend']
@ -36,10 +37,51 @@ class YahooBackend(BaseBackend, ICapWeather):
    VERSION = '0.5'
    DESCRIPTION = 'Yahoo'
    LICENSE = 'GPLv3'
    BROWSER = BaseBrowser
    WEATHER_URL = 'http://weather.yahooapis.com/forecastrss?w=%s&u=%s'
    SEARCH_URL = 'http://fr.meteo.yahoo.com/search/weather?p=%s'
    def create_default_browser(self):
        return self.create_browser()
    def iter_city_search(self, pattern):
-        raise NotImplementedError()
+        # minidom doesn't seem to work with that page
        #handler = urllib2.urlopen((self.SEARCH_URL % pattern).replace(' ','+'))
        #dom = minidom.parse(handler)
        #handler.close()
        #results = dom.getElementById('search-results')
        #for no in results.childNodes:
        #    print no.nodeValue
        # so i use a basic but efficient parsing
        with self.browser:
            content = self.browser.readurl((self.SEARCH_URL % pattern).replace(' ','+'))
            page=''
            for line in content.split('\n'):
                if "<title>" in line and "Prévisions et Temps" in line:
                    page="direct"
                elif "<title>" in line and "Résultats de la recherche" in line:
                    page="resultats"
                if page == "resultats":
                    if '/redirwoei/' in line:
                        cities = line.split('/redirwoei/')
                        for c in cities:
                            if "strong" in c:
                                cid = c.split("'")[0]
                                cname = c.split("'")[1].replace("><strong>","").replace("</strong>","").split("</a>")[0]
                                yield City(cid, cname.decode('utf-8'))
                elif page == "direct":
                    if 'div id="yw-breadcrumb"' in line:
                        l = line.split('</a>')
                        region = l[2].split('>')[-1]
                        country = l[1].split('>')[-1]
                        city = l[3].split('</li>')[1].replace('<li>','')
                        cid = line.split("/?unit")[0].split('-')[-1]
                        yield City(cid, (city+", "+region+", "+country).decode('utf-8'))
    def _get_weather_dom(self, city_id):
        handler = urllib2.urlopen(self.WEATHER_URL % (city_id, 'c'))