From 61016fc13fa376183dd973c132a5d1affda26248 Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Thu, 23 Dec 2010 20:40:40 +0100 Subject: [PATCH] [yahoo] search implemented --- weboob/backends/yahoo/backend.py | 46 ++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/weboob/backends/yahoo/backend.py b/weboob/backends/yahoo/backend.py index 0a0befaa..50e5d432 100644 --- a/weboob/backends/yahoo/backend.py +++ b/weboob/backends/yahoo/backend.py @@ -22,8 +22,9 @@ from xml.dom import minidom # TODO store datetime objects instead of strings # from datetime import datetime -from weboob.capabilities.weather import ICapWeather, CityNotFound, Current, Forecast +from weboob.capabilities.weather import ICapWeather, CityNotFound, Current, Forecast, City from weboob.tools.backend import BaseBackend +from weboob.tools.browser import BaseBrowser __all__ = ['YahooBackend'] @@ -36,10 +37,51 @@ class YahooBackend(BaseBackend, ICapWeather): VERSION = '0.5' DESCRIPTION = 'Yahoo' LICENSE = 'GPLv3' + BROWSER = BaseBrowser WEATHER_URL = 'http://weather.yahooapis.com/forecastrss?w=%s&u=%s' + SEARCH_URL = 'http://fr.meteo.yahoo.com/search/weather?p=%s' + + def create_default_browser(self): + return self.create_browser() def iter_city_search(self, pattern): - raise NotImplementedError() + # minidom doesn't seem to work with that page + + #handler = urllib2.urlopen((self.SEARCH_URL % pattern).replace(' ','+')) + #dom = minidom.parse(handler) + #handler.close() + #results = dom.getElementById('search-results') + #for no in results.childNodes: + # print no.nodeValue + + # so i use a basic but efficient parsing + with self.browser: + content = self.browser.readurl((self.SEARCH_URL % pattern).replace(' ','+')) + + page='' + for line in content.split('\n'): + if "" in line and "Prévisions et Temps" in line: + page="direct" + elif "<title>" in line and "Résultats de la recherche" in line: + page="resultats" + + if page == "resultats": + if '/redirwoei/' in line: + cities = line.split('/redirwoei/') + for c in cities: + if "strong" in c: + cid = c.split("'")[0] + cname = c.split("'")[1].replace("><strong>","").replace("</strong>","").split("</a>")[0] + yield City(cid, cname.decode('utf-8')) + elif page == "direct": + if 'div id="yw-breadcrumb"' in line: + l = line.split('</a>') + region = l[2].split('>')[-1] + country = l[1].split('>')[-1] + city = l[3].split('</li>')[1].replace('<li>','') + cid = line.split("/?unit")[0].split('-')[-1] + yield City(cid, (city+", "+region+", "+country).decode('utf-8')) + def _get_weather_dom(self, city_id): handler = urllib2.urlopen(self.WEATHER_URL % (city_id, 'c'))