[meteofrance] site changes

This commit is contained in:
Bezleputh 2013-11-29 21:50:05 +01:00 committed by Florent Fourcot
commit 37a0bd3aa5
2 changed files with 53 additions and 72 deletions

View file

@ -21,26 +21,25 @@
import urllib import urllib
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from weboob.tools.json import json as simplejson
from weboob.capabilities.weather import City
from .pages.meteo import WeatherPage, CityPage from .pages.meteo import WeatherPage
__all__ = ['MeteofranceBrowser'] __all__ = ['MeteofranceBrowser']
class MeteofranceBrowser(BaseBrowser): class MeteofranceBrowser(BaseBrowser):
DOMAIN = 'france.meteofrance.com' DOMAIN = 'www.meteofrance.com'
PROTOCOL = 'http' PROTOCOL = 'http'
ENCODING = 'utf-8' ENCODING = 'utf-8'
USER_AGENT = BaseBrowser.USER_AGENTS['wget'] USER_AGENT = BaseBrowser.USER_AGENTS['wget']
WEATHER_URL = '{0}://{1}/france/meteo?PREVISIONS_PORTLET.path=previsionsville/{{cityid}}'.format(PROTOCOL, DOMAIN) WEATHER_URL = '{0}://{1}/previsions-meteo-france/{{city_name}}/{{city_id}}'.format(PROTOCOL, DOMAIN)
CITY_SEARCH_URL = '{0}://{1}/france/accueil/resultat?RECHERCHE_RESULTAT_PORTLET.path=rechercheresultat&' \ CITY_SEARCH_URL = '{0}://{1}/mf3-rpc-portlet/rest/lieu/facet/previsions/search/{{city_pattern}}'\
'query={{city_pattern}}&type=PREV_FRANCE&satellite=france'.format(PROTOCOL, DOMAIN) .format(PROTOCOL, DOMAIN)
PAGES = { PAGES = {
WEATHER_URL.format(cityid=".*"): WeatherPage, WEATHER_URL.format(city_id=".*", city_name=".*"): WeatherPage,
CITY_SEARCH_URL.format(city_pattern=".*"): CityPage,
'http://france.meteofrance.com/france/accueil/resultat.*': CityPage,
'http://france.meteofrance.com/france/meteo.*': WeatherPage,
} }
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -48,24 +47,24 @@ class MeteofranceBrowser(BaseBrowser):
def iter_city_search(self, pattern): def iter_city_search(self, pattern):
searchurl = self.CITY_SEARCH_URL.format(city_pattern=urllib.quote_plus(pattern.encode('utf-8'))) searchurl = self.CITY_SEARCH_URL.format(city_pattern=urllib.quote_plus(pattern.encode('utf-8')))
self.location(searchurl) response = self.openurl(searchurl)
return self.parse_cities_result(response)
if self.is_on_page(CityPage): def parse_cities_result(self, datas):
# Case 1: there are multiple results for the pattern: cities = simplejson.loads(datas.read(), self.ENCODING)
return self.page.iter_city_search() for city in cities:
else: mcity = City(int(city['codePostal']), u'%s' % city['slug'])
# Case 2: there is only one result, and the website send directly yield mcity
# the browser on the forecast page:
return [self.page.get_city()]
def iter_forecast(self, city_id): def iter_forecast(self, city_id):
self.location(self.WEATHER_URL.format(cityid=city_id)) mcity = self.iter_city_search(city_id).next()
self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name))
assert self.is_on_page(WeatherPage) assert self.is_on_page(WeatherPage)
return self.page.iter_forecast() return self.page.iter_forecast()
def get_current(self, city_id): def get_current(self, city_id):
self.location(self.WEATHER_URL.format(cityid=city_id)) mcity = self.iter_city_search(city_id).next()
self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name))
assert self.is_on_page(WeatherPage) assert self.is_on_page(WeatherPage)
return self.page.get_current() return self.page.get_current()

View file

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Cedric Defortis # Copyright(C) 2010-2011 Cedric Defortis
@ -19,69 +20,50 @@
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.capabilities.weather import Forecast, Current, City from weboob.capabilities.weather import Forecast, Current
import datetime import datetime
__all__ = ['WeatherPage', 'CityPage'] __all__ = ['WeatherPage']
class WeatherPage(BasePage): class WeatherPage(BasePage):
def get_temp_without_unit(self, temp_str): def get_temp_without_unit(self, temp_str):
# It seems that the mechanize module give us some old style # It seems that the mechanize module give us some old style
# ISO character # ISO character
return int(temp_str.replace(u"\xb0C", "").strip()) return float(temp_str.replace(u"\xb0C", "").strip())
def iter_forecast(self): def iter_forecast(self):
for div in self.document.getiterator('li'): lis = self.document.getroot().xpath('//ul[@class="list-days-summary slides"]/li')
if div.attrib.get('class', '').startswith('jour'): for li in lis:
mdate = div.xpath('./dl/dt')[0].text divs = self.parser.select(li, 'div[@class="group-days-summary"]', 1, method='xpath')
t_low = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[0].text) for div in divs:
t_high = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[1].text) day_div = self.parser.select(div, 'div[@class="box"]', 1, method='xpath')
mtxt = div.xpath('.//dd')[0].text date = self.parser.select(day_div, 'div[@class="box-header"]/h3', 1, method='xpath').text
yield Forecast(mdate, t_low, t_high, mtxt, 'C') temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]',
elif div.attrib.get('class', '').startswith('lijourle'): 1, method='xpath').text_content()
for em in div.getiterator('em'): low = self.get_temp_without_unit(temp.split('|')[0])
templist = em.text_content().split("/") high = self.get_temp_without_unit(temp.split('|')[1])
broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]',
t_low = self.get_temp_without_unit(templist[0]) 1, method='xpath').text_content()
t_high = self.get_temp_without_unit(templist[1]) uvs = self.parser.select(div, 'div/div/div[@class="day-summary-uv"]',
break method='xpath')
for strong in div.getiterator("strong"): uv = u''
mdate = strong.text_content() if uvs is not None and len(uvs) > 0:
break uv = u'%s' % uvs[0].text_content()
for img in div.getiterator("img"): wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]',
mtxt = img.attrib["title"] 1, method='xpath').text_content()
break text = u'%s %s %s' % (broad, uv, wind)
yield Forecast(mdate, t_low, t_high, mtxt, "C") yield Forecast(date, low, high, text, u'C')
def get_current(self): def get_current(self):
div = self.document.getroot().xpath('//div[@class="bloc_details"]/ul/li/dl')[0] div = self.document.getroot().xpath('//div[@class="bloc-day-summary"]')[0]
mdate = datetime.datetime.now() mdate = datetime.datetime.now()
temp = self.get_temp_without_unit(div.xpath('./dd[@class="minmax"]')[0].text) temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]',
mtxt = div.find('dd').find('img').attrib['title'] 1, method='xpath').text_content()
return Current(mdate, temp, mtxt, 'C') temperature = self.get_temp_without_unit(temp.split('|')[0])
broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]', 1, method='xpath').text_content()
def get_city(self): wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]', 1, method='xpath').text_content()
""" mtxt = u'%s %s' % (broad, wind)
Return the city from the forecastpage. return Current(mdate, temperature, mtxt, u'C')
"""
for div in self.document.getiterator('div'):
if div.attrib.get("class", "") == "choix":
for strong in div.getiterator("strong"):
city_name = strong.text + " " + strong.tail.replace("(", "").replace(")", "")
city_id = self.url.split("/")[-1]
return City(city_id, city_name)
class CityPage(BasePage):
def iter_city_search(self):
for div in self.document.getiterator('div'):
if div.attrib.get('id') == "column1":
for li in div.getiterator('li'):
city_name = li.text_content()
for children in li.getchildren():
city_id = children.attrib.get("href").split("/")[-1]
mcity = City(city_id, city_name)
yield mcity