[meteofrance] site changes

2013-11-29 21:50:05 +01:00 · 2013-11-29 21:50:05 +01:00 · 37a0bd3aa5
commit 37a0bd3aa5
parent d68de0d0d7
2 changed files with 53 additions and 72 deletions
--- a/modules/meteofrance/browser.py
+++ b/modules/meteofrance/browser.py
@ -21,26 +21,25 @@
 import urllib
 from weboob.tools.browser import BaseBrowser
 from weboob.tools.json import json as simplejson
 from weboob.capabilities.weather import City
-from .pages.meteo import WeatherPage, CityPage
+from .pages.meteo import WeatherPage
 __all__ = ['MeteofranceBrowser']
 class MeteofranceBrowser(BaseBrowser):
-    DOMAIN = 'france.meteofrance.com'
+    DOMAIN = 'www.meteofrance.com'
    PROTOCOL = 'http'
    ENCODING = 'utf-8'
    USER_AGENT = BaseBrowser.USER_AGENTS['wget']
-    WEATHER_URL = '{0}://{1}/france/meteo?PREVISIONS_PORTLET.path=previsionsville/{{cityid}}'.format(PROTOCOL, DOMAIN)
+    WEATHER_URL = '{0}://{1}/previsions-meteo-france/{{city_name}}/{{city_id}}'.format(PROTOCOL, DOMAIN)
-    CITY_SEARCH_URL = '{0}://{1}/france/accueil/resultat?RECHERCHE_RESULTAT_PORTLET.path=rechercheresultat&' \
+    CITY_SEARCH_URL = '{0}://{1}/mf3-rpc-portlet/rest/lieu/facet/previsions/search/{{city_pattern}}'\
-        'query={{city_pattern}}&type=PREV_FRANCE&satellite=france'.format(PROTOCOL, DOMAIN)
+                      .format(PROTOCOL, DOMAIN)
    PAGES = {
-        WEATHER_URL.format(cityid=".*"): WeatherPage,
+        WEATHER_URL.format(city_id=".*", city_name=".*"): WeatherPage,
        CITY_SEARCH_URL.format(city_pattern=".*"): CityPage,
        'http://france.meteofrance.com/france/accueil/resultat.*': CityPage,
        'http://france.meteofrance.com/france/meteo.*': WeatherPage,
        }
    def __init__(self, *args, **kwargs):
@ -48,24 +47,24 @@ class MeteofranceBrowser(BaseBrowser):
    def iter_city_search(self, pattern):
        searchurl = self.CITY_SEARCH_URL.format(city_pattern=urllib.quote_plus(pattern.encode('utf-8')))
-        self.location(searchurl)
+        response = self.openurl(searchurl)
        return self.parse_cities_result(response)
-        if self.is_on_page(CityPage):
+    def parse_cities_result(self, datas):
-            # Case 1: there are multiple results for the pattern:
+        cities = simplejson.loads(datas.read(), self.ENCODING)
-            return self.page.iter_city_search()
+        for city in cities:
-        else:
+            mcity = City(int(city['codePostal']), u'%s' % city['slug'])
-            # Case 2: there is only one result, and the website send directly
+            yield mcity
            # the browser on the forecast page:
            return [self.page.get_city()]
    def iter_forecast(self, city_id):
-        self.location(self.WEATHER_URL.format(cityid=city_id))
+        mcity = self.iter_city_search(city_id).next()
-
+        self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name))
        assert self.is_on_page(WeatherPage)
        return self.page.iter_forecast()
    def get_current(self, city_id):
-        self.location(self.WEATHER_URL.format(cityid=city_id))
+        mcity = self.iter_city_search(city_id).next()
-
+        self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name))
        assert self.is_on_page(WeatherPage)
        return self.page.get_current()
--- a/modules/meteofrance/pages/meteo.py
+++ b/modules/meteofrance/pages/meteo.py
@ -1,3 +1,4 @@
 # -*- coding: utf-8 -*-
 # Copyright(C) 2010-2011 Cedric Defortis
@ -19,69 +20,50 @@
 from weboob.tools.browser import BasePage
-from weboob.capabilities.weather import Forecast, Current, City
+from weboob.capabilities.weather import Forecast, Current
 import datetime
-__all__ = ['WeatherPage', 'CityPage']
+__all__ = ['WeatherPage']
 class WeatherPage(BasePage):
    def get_temp_without_unit(self, temp_str):
        # It seems that the mechanize module give us some old style
        # ISO character
-        return int(temp_str.replace(u"\xb0C", "").strip())
+        return float(temp_str.replace(u"\xb0C", "").strip())
    def iter_forecast(self):
-        for div in self.document.getiterator('li'):
+        lis = self.document.getroot().xpath('//ul[@class="list-days-summary slides"]/li')
-            if div.attrib.get('class', '').startswith('jour'):
+        for li in lis:
-                mdate = div.xpath('./dl/dt')[0].text
+            divs = self.parser.select(li, 'div[@class="group-days-summary"]', 1, method='xpath')
-                t_low = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[0].text)
+            for div in divs:
-                t_high = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[1].text)
+                day_div = self.parser.select(div, 'div[@class="box"]', 1, method='xpath')
-                mtxt = div.xpath('.//dd')[0].text
+                date = self.parser.select(day_div, 'div[@class="box-header"]/h3', 1, method='xpath').text
-                yield Forecast(mdate, t_low, t_high, mtxt, 'C')
+                temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]',
-            elif div.attrib.get('class', '').startswith('lijourle'):
+                                          1, method='xpath').text_content()
-                for em in div.getiterator('em'):
+                low = self.get_temp_without_unit(temp.split('|')[0])
-                    templist = em.text_content().split("/")
+                high = self.get_temp_without_unit(temp.split('|')[1])
-
+                broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]',
-                    t_low = self.get_temp_without_unit(templist[0])
+                                           1, method='xpath').text_content()
-                    t_high = self.get_temp_without_unit(templist[1])
+                uvs = self.parser.select(div, 'div/div/div[@class="day-summary-uv"]',
-                    break
+                                         method='xpath')
-                for strong in div.getiterator("strong"):
+                uv = u''
-                    mdate = strong.text_content()
+                if uvs is not None and len(uvs) > 0:
-                    break
+                    uv = u'%s' % uvs[0].text_content()
-                for img in div.getiterator("img"):
+                wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]',
-                    mtxt = img.attrib["title"]
+                                          1, method='xpath').text_content()
-                    break
+                text = u'%s %s %s' % (broad, uv, wind)
-                yield Forecast(mdate, t_low, t_high, mtxt, "C")
+                yield Forecast(date, low, high, text, u'C')
    def get_current(self):
-        div = self.document.getroot().xpath('//div[@class="bloc_details"]/ul/li/dl')[0]
+        div = self.document.getroot().xpath('//div[@class="bloc-day-summary"]')[0]
        mdate = datetime.datetime.now()
-        temp = self.get_temp_without_unit(div.xpath('./dd[@class="minmax"]')[0].text)
+        temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]',
-        mtxt = div.find('dd').find('img').attrib['title']
+                                  1, method='xpath').text_content()
-        return Current(mdate, temp, mtxt, 'C')
+        temperature = self.get_temp_without_unit(temp.split('|')[0])
-
+        broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]', 1, method='xpath').text_content()
-    def get_city(self):
+        wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]', 1, method='xpath').text_content()
-        """
+        mtxt = u'%s %s' % (broad, wind)
-        Return the city from the forecastpage.
+        return Current(mdate, temperature, mtxt, u'C')
        """
        for div in self.document.getiterator('div'):
            if div.attrib.get("class", "") == "choix":
                for strong in div.getiterator("strong"):
                    city_name = strong.text + " " + strong.tail.replace("(", "").replace(")", "")
                    city_id = self.url.split("/")[-1]
                    return City(city_id, city_name)
 class CityPage(BasePage):
    def iter_city_search(self):
        for div in self.document.getiterator('div'):
            if div.attrib.get('id') == "column1":
                for li in div.getiterator('li'):
                    city_name = li.text_content()
                    for children in li.getchildren():
                        city_id = children.attrib.get("href").split("/")[-1]
                    mcity = City(city_id, city_name)
                    yield mcity