[meteofrance] site changes

This commit is contained in:
Bezleputh 2013-11-29 21:50:05 +01:00 committed by Florent Fourcot
commit 37a0bd3aa5
2 changed files with 53 additions and 72 deletions

View file

@ -21,26 +21,25 @@
import urllib
from weboob.tools.browser import BaseBrowser
from weboob.tools.json import json as simplejson
from weboob.capabilities.weather import City
from .pages.meteo import WeatherPage, CityPage
from .pages.meteo import WeatherPage
__all__ = ['MeteofranceBrowser']
class MeteofranceBrowser(BaseBrowser):
DOMAIN = 'france.meteofrance.com'
DOMAIN = 'www.meteofrance.com'
PROTOCOL = 'http'
ENCODING = 'utf-8'
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
WEATHER_URL = '{0}://{1}/france/meteo?PREVISIONS_PORTLET.path=previsionsville/{{cityid}}'.format(PROTOCOL, DOMAIN)
CITY_SEARCH_URL = '{0}://{1}/france/accueil/resultat?RECHERCHE_RESULTAT_PORTLET.path=rechercheresultat&' \
'query={{city_pattern}}&type=PREV_FRANCE&satellite=france'.format(PROTOCOL, DOMAIN)
WEATHER_URL = '{0}://{1}/previsions-meteo-france/{{city_name}}/{{city_id}}'.format(PROTOCOL, DOMAIN)
CITY_SEARCH_URL = '{0}://{1}/mf3-rpc-portlet/rest/lieu/facet/previsions/search/{{city_pattern}}'\
.format(PROTOCOL, DOMAIN)
PAGES = {
WEATHER_URL.format(cityid=".*"): WeatherPage,
CITY_SEARCH_URL.format(city_pattern=".*"): CityPage,
'http://france.meteofrance.com/france/accueil/resultat.*': CityPage,
'http://france.meteofrance.com/france/meteo.*': WeatherPage,
WEATHER_URL.format(city_id=".*", city_name=".*"): WeatherPage,
}
def __init__(self, *args, **kwargs):
@ -48,24 +47,24 @@ class MeteofranceBrowser(BaseBrowser):
def iter_city_search(self, pattern):
searchurl = self.CITY_SEARCH_URL.format(city_pattern=urllib.quote_plus(pattern.encode('utf-8')))
self.location(searchurl)
response = self.openurl(searchurl)
return self.parse_cities_result(response)
if self.is_on_page(CityPage):
# Case 1: there are multiple results for the pattern:
return self.page.iter_city_search()
else:
# Case 2: there is only one result, and the website send directly
# the browser on the forecast page:
return [self.page.get_city()]
def parse_cities_result(self, datas):
cities = simplejson.loads(datas.read(), self.ENCODING)
for city in cities:
mcity = City(int(city['codePostal']), u'%s' % city['slug'])
yield mcity
def iter_forecast(self, city_id):
self.location(self.WEATHER_URL.format(cityid=city_id))
mcity = self.iter_city_search(city_id).next()
self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name))
assert self.is_on_page(WeatherPage)
return self.page.iter_forecast()
def get_current(self, city_id):
self.location(self.WEATHER_URL.format(cityid=city_id))
mcity = self.iter_city_search(city_id).next()
self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name))
assert self.is_on_page(WeatherPage)
return self.page.get_current()

View file

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Cedric Defortis
@ -19,69 +20,50 @@
from weboob.tools.browser import BasePage
from weboob.capabilities.weather import Forecast, Current, City
from weboob.capabilities.weather import Forecast, Current
import datetime
__all__ = ['WeatherPage', 'CityPage']
__all__ = ['WeatherPage']
class WeatherPage(BasePage):
def get_temp_without_unit(self, temp_str):
# It seems that the mechanize module give us some old style
# ISO character
return int(temp_str.replace(u"\xb0C", "").strip())
return float(temp_str.replace(u"\xb0C", "").strip())
def iter_forecast(self):
for div in self.document.getiterator('li'):
if div.attrib.get('class', '').startswith('jour'):
mdate = div.xpath('./dl/dt')[0].text
t_low = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[0].text)
t_high = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[1].text)
mtxt = div.xpath('.//dd')[0].text
yield Forecast(mdate, t_low, t_high, mtxt, 'C')
elif div.attrib.get('class', '').startswith('lijourle'):
for em in div.getiterator('em'):
templist = em.text_content().split("/")
t_low = self.get_temp_without_unit(templist[0])
t_high = self.get_temp_without_unit(templist[1])
break
for strong in div.getiterator("strong"):
mdate = strong.text_content()
break
for img in div.getiterator("img"):
mtxt = img.attrib["title"]
break
yield Forecast(mdate, t_low, t_high, mtxt, "C")
lis = self.document.getroot().xpath('//ul[@class="list-days-summary slides"]/li')
for li in lis:
divs = self.parser.select(li, 'div[@class="group-days-summary"]', 1, method='xpath')
for div in divs:
day_div = self.parser.select(div, 'div[@class="box"]', 1, method='xpath')
date = self.parser.select(day_div, 'div[@class="box-header"]/h3', 1, method='xpath').text
temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]',
1, method='xpath').text_content()
low = self.get_temp_without_unit(temp.split('|')[0])
high = self.get_temp_without_unit(temp.split('|')[1])
broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]',
1, method='xpath').text_content()
uvs = self.parser.select(div, 'div/div/div[@class="day-summary-uv"]',
method='xpath')
uv = u''
if uvs is not None and len(uvs) > 0:
uv = u'%s' % uvs[0].text_content()
wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]',
1, method='xpath').text_content()
text = u'%s %s %s' % (broad, uv, wind)
yield Forecast(date, low, high, text, u'C')
def get_current(self):
div = self.document.getroot().xpath('//div[@class="bloc_details"]/ul/li/dl')[0]
div = self.document.getroot().xpath('//div[@class="bloc-day-summary"]')[0]
mdate = datetime.datetime.now()
temp = self.get_temp_without_unit(div.xpath('./dd[@class="minmax"]')[0].text)
mtxt = div.find('dd').find('img').attrib['title']
return Current(mdate, temp, mtxt, 'C')
def get_city(self):
"""
Return the city from the forecastpage.
"""
for div in self.document.getiterator('div'):
if div.attrib.get("class", "") == "choix":
for strong in div.getiterator("strong"):
city_name = strong.text + " " + strong.tail.replace("(", "").replace(")", "")
city_id = self.url.split("/")[-1]
return City(city_id, city_name)
class CityPage(BasePage):
def iter_city_search(self):
for div in self.document.getiterator('div'):
if div.attrib.get('id') == "column1":
for li in div.getiterator('li'):
city_name = li.text_content()
for children in li.getchildren():
city_id = children.attrib.get("href").split("/")[-1]
mcity = City(city_id, city_name)
yield mcity
temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]',
1, method='xpath').text_content()
temperature = self.get_temp_without_unit(temp.split('|')[0])
broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]', 1, method='xpath').text_content()
wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]', 1, method='xpath').text_content()
mtxt = u'%s %s' % (broad, wind)
return Current(mdate, temperature, mtxt, u'C')