diff --git a/modules/meteofrance/browser.py b/modules/meteofrance/browser.py index f939908a..3dcc8654 100644 --- a/modules/meteofrance/browser.py +++ b/modules/meteofrance/browser.py @@ -21,26 +21,25 @@ import urllib from weboob.tools.browser import BaseBrowser +from weboob.tools.json import json as simplejson +from weboob.capabilities.weather import City -from .pages.meteo import WeatherPage, CityPage +from .pages.meteo import WeatherPage __all__ = ['MeteofranceBrowser'] class MeteofranceBrowser(BaseBrowser): - DOMAIN = 'france.meteofrance.com' + DOMAIN = 'www.meteofrance.com' PROTOCOL = 'http' ENCODING = 'utf-8' USER_AGENT = BaseBrowser.USER_AGENTS['wget'] - WEATHER_URL = '{0}://{1}/france/meteo?PREVISIONS_PORTLET.path=previsionsville/{{cityid}}'.format(PROTOCOL, DOMAIN) - CITY_SEARCH_URL = '{0}://{1}/france/accueil/resultat?RECHERCHE_RESULTAT_PORTLET.path=rechercheresultat&' \ - 'query={{city_pattern}}&type=PREV_FRANCE&satellite=france'.format(PROTOCOL, DOMAIN) + WEATHER_URL = '{0}://{1}/previsions-meteo-france/{{city_name}}/{{city_id}}'.format(PROTOCOL, DOMAIN) + CITY_SEARCH_URL = '{0}://{1}/mf3-rpc-portlet/rest/lieu/facet/previsions/search/{{city_pattern}}'\ + .format(PROTOCOL, DOMAIN) PAGES = { - WEATHER_URL.format(cityid=".*"): WeatherPage, - CITY_SEARCH_URL.format(city_pattern=".*"): CityPage, - 'http://france.meteofrance.com/france/accueil/resultat.*': CityPage, - 'http://france.meteofrance.com/france/meteo.*': WeatherPage, + WEATHER_URL.format(city_id=".*", city_name=".*"): WeatherPage, } def __init__(self, *args, **kwargs): @@ -48,24 +47,24 @@ class MeteofranceBrowser(BaseBrowser): def iter_city_search(self, pattern): searchurl = self.CITY_SEARCH_URL.format(city_pattern=urllib.quote_plus(pattern.encode('utf-8'))) - self.location(searchurl) + response = self.openurl(searchurl) + return self.parse_cities_result(response) - if self.is_on_page(CityPage): - # Case 1: there are multiple results for the pattern: - return self.page.iter_city_search() - else: - # Case 2: there is only one result, and the website send directly - # the browser on the forecast page: - return [self.page.get_city()] + def parse_cities_result(self, datas): + cities = simplejson.loads(datas.read(), self.ENCODING) + for city in cities: + mcity = City(int(city['codePostal']), u'%s' % city['slug']) + yield mcity def iter_forecast(self, city_id): - self.location(self.WEATHER_URL.format(cityid=city_id)) - + mcity = self.iter_city_search(city_id).next() + self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name)) assert self.is_on_page(WeatherPage) + return self.page.iter_forecast() def get_current(self, city_id): - self.location(self.WEATHER_URL.format(cityid=city_id)) - + mcity = self.iter_city_search(city_id).next() + self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name)) assert self.is_on_page(WeatherPage) return self.page.get_current() diff --git a/modules/meteofrance/pages/meteo.py b/modules/meteofrance/pages/meteo.py index 19c11d6b..ea9d09b9 100644 --- a/modules/meteofrance/pages/meteo.py +++ b/modules/meteofrance/pages/meteo.py @@ -1,3 +1,4 @@ + # -*- coding: utf-8 -*- # Copyright(C) 2010-2011 Cedric Defortis @@ -19,69 +20,50 @@ from weboob.tools.browser import BasePage -from weboob.capabilities.weather import Forecast, Current, City +from weboob.capabilities.weather import Forecast, Current import datetime -__all__ = ['WeatherPage', 'CityPage'] +__all__ = ['WeatherPage'] class WeatherPage(BasePage): def get_temp_without_unit(self, temp_str): # It seems that the mechanize module give us some old style # ISO character - return int(temp_str.replace(u"\xb0C", "").strip()) + return float(temp_str.replace(u"\xb0C", "").strip()) def iter_forecast(self): - for div in self.document.getiterator('li'): - if div.attrib.get('class', '').startswith('jour'): - mdate = div.xpath('./dl/dt')[0].text - t_low = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[0].text) - t_high = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[1].text) - mtxt = div.xpath('.//dd')[0].text - yield Forecast(mdate, t_low, t_high, mtxt, 'C') - elif div.attrib.get('class', '').startswith('lijourle'): - for em in div.getiterator('em'): - templist = em.text_content().split("/") - - t_low = self.get_temp_without_unit(templist[0]) - t_high = self.get_temp_without_unit(templist[1]) - break - for strong in div.getiterator("strong"): - mdate = strong.text_content() - break - for img in div.getiterator("img"): - mtxt = img.attrib["title"] - break - yield Forecast(mdate, t_low, t_high, mtxt, "C") + lis = self.document.getroot().xpath('//ul[@class="list-days-summary slides"]/li') + for li in lis: + divs = self.parser.select(li, 'div[@class="group-days-summary"]', 1, method='xpath') + for div in divs: + day_div = self.parser.select(div, 'div[@class="box"]', 1, method='xpath') + date = self.parser.select(day_div, 'div[@class="box-header"]/h3', 1, method='xpath').text + temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]', + 1, method='xpath').text_content() + low = self.get_temp_without_unit(temp.split('|')[0]) + high = self.get_temp_without_unit(temp.split('|')[1]) + broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]', + 1, method='xpath').text_content() + uvs = self.parser.select(div, 'div/div/div[@class="day-summary-uv"]', + method='xpath') + uv = u'' + if uvs is not None and len(uvs) > 0: + uv = u'%s' % uvs[0].text_content() + wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]', + 1, method='xpath').text_content() + text = u'%s %s %s' % (broad, uv, wind) + yield Forecast(date, low, high, text, u'C') def get_current(self): - div = self.document.getroot().xpath('//div[@class="bloc_details"]/ul/li/dl')[0] + div = self.document.getroot().xpath('//div[@class="bloc-day-summary"]')[0] mdate = datetime.datetime.now() - temp = self.get_temp_without_unit(div.xpath('./dd[@class="minmax"]')[0].text) - mtxt = div.find('dd').find('img').attrib['title'] - return Current(mdate, temp, mtxt, 'C') - - def get_city(self): - """ - Return the city from the forecastpage. - """ - for div in self.document.getiterator('div'): - if div.attrib.get("class", "") == "choix": - for strong in div.getiterator("strong"): - city_name = strong.text + " " + strong.tail.replace("(", "").replace(")", "") - city_id = self.url.split("/")[-1] - return City(city_id, city_name) - - -class CityPage(BasePage): - def iter_city_search(self): - for div in self.document.getiterator('div'): - if div.attrib.get('id') == "column1": - for li in div.getiterator('li'): - city_name = li.text_content() - for children in li.getchildren(): - city_id = children.attrib.get("href").split("/")[-1] - mcity = City(city_id, city_name) - yield mcity + temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]', + 1, method='xpath').text_content() + temperature = self.get_temp_without_unit(temp.split('|')[0]) + broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]', 1, method='xpath').text_content() + wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]', 1, method='xpath').text_content() + mtxt = u'%s %s' % (broad, wind) + return Current(mdate, temperature, mtxt, u'C')