[meteofrance] site changes

2013-11-29 21:50:05 +01:00 · 2013-11-29 21:50:05 +01:00 · 37a0bd3aa5
commit 37a0bd3aa5
parent d68de0d0d7
2 changed files with 53 additions and 72 deletions
--- a/modules/meteofrance/browser.py
+++ b/modules/meteofrance/browser.py
@ -21,26 +21,25 @@
 import urllib

 from weboob.tools.browser import BaseBrowser
+from weboob.tools.json import json as simplejson
+from weboob.capabilities.weather import City

-from .pages.meteo import WeatherPage, CityPage
+from .pages.meteo import WeatherPage


 __all__ = ['MeteofranceBrowser']


 class MeteofranceBrowser(BaseBrowser):
-    DOMAIN = 'france.meteofrance.com'
+    DOMAIN = 'www.meteofrance.com'
    PROTOCOL = 'http'
    ENCODING = 'utf-8'
    USER_AGENT = BaseBrowser.USER_AGENTS['wget']
-    WEATHER_URL = '{0}://{1}/france/meteo?PREVISIONS_PORTLET.path=previsionsville/{{cityid}}'.format(PROTOCOL, DOMAIN)
-    CITY_SEARCH_URL = '{0}://{1}/france/accueil/resultat?RECHERCHE_RESULTAT_PORTLET.path=rechercheresultat&' \
-        'query={{city_pattern}}&type=PREV_FRANCE&satellite=france'.format(PROTOCOL, DOMAIN)
+    WEATHER_URL = '{0}://{1}/previsions-meteo-france/{{city_name}}/{{city_id}}'.format(PROTOCOL, DOMAIN)
+    CITY_SEARCH_URL = '{0}://{1}/mf3-rpc-portlet/rest/lieu/facet/previsions/search/{{city_pattern}}'\
+                      .format(PROTOCOL, DOMAIN)
    PAGES = {
-        WEATHER_URL.format(cityid=".*"): WeatherPage,
-        CITY_SEARCH_URL.format(city_pattern=".*"): CityPage,
-        'http://france.meteofrance.com/france/accueil/resultat.*': CityPage,
-        'http://france.meteofrance.com/france/meteo.*': WeatherPage,
+        WEATHER_URL.format(city_id=".*", city_name=".*"): WeatherPage,
        }

    def __init__(self, *args, **kwargs):
@ -48,24 +47,24 @@ class MeteofranceBrowser(BaseBrowser):

    def iter_city_search(self, pattern):
        searchurl = self.CITY_SEARCH_URL.format(city_pattern=urllib.quote_plus(pattern.encode('utf-8')))
-        self.location(searchurl)
+        response = self.openurl(searchurl)
+        return self.parse_cities_result(response)

-        if self.is_on_page(CityPage):
-            # Case 1: there are multiple results for the pattern:
-            return self.page.iter_city_search()
-        else:
-            # Case 2: there is only one result, and the website send directly
-            # the browser on the forecast page:
-            return [self.page.get_city()]
+    def parse_cities_result(self, datas):
+        cities = simplejson.loads(datas.read(), self.ENCODING)
+        for city in cities:
+            mcity = City(int(city['codePostal']), u'%s' % city['slug'])
+            yield mcity

    def iter_forecast(self, city_id):
-        self.location(self.WEATHER_URL.format(cityid=city_id))
-
+        mcity = self.iter_city_search(city_id).next()
+        self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name))
        assert self.is_on_page(WeatherPage)
+
        return self.page.iter_forecast()

    def get_current(self, city_id):
-        self.location(self.WEATHER_URL.format(cityid=city_id))
-
+        mcity = self.iter_city_search(city_id).next()
+        self.location(self.WEATHER_URL.format(city_id=mcity.id, city_name=mcity.name))
        assert self.is_on_page(WeatherPage)
        return self.page.get_current()
--- a/modules/meteofrance/pages/meteo.py
+++ b/modules/meteofrance/pages/meteo.py
@ -1,3 +1,4 @@
+
 # -*- coding: utf-8 -*-

 # Copyright(C) 2010-2011 Cedric Defortis
@ -19,69 +20,50 @@


 from weboob.tools.browser import BasePage
-from weboob.capabilities.weather import Forecast, Current, City
+from weboob.capabilities.weather import Forecast, Current

 import datetime


-__all__ = ['WeatherPage', 'CityPage']
+__all__ = ['WeatherPage']


 class WeatherPage(BasePage):
    def get_temp_without_unit(self, temp_str):
        # It seems that the mechanize module give us some old style
        # ISO character
-        return int(temp_str.replace(u"\xb0C", "").strip())
+        return float(temp_str.replace(u"\xb0C", "").strip())

    def iter_forecast(self):
-        for div in self.document.getiterator('li'):
-            if div.attrib.get('class', '').startswith('jour'):
-                mdate = div.xpath('./dl/dt')[0].text
-                t_low = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[0].text)
-                t_high = self.get_temp_without_unit(div.xpath('.//dd[@class="minmax"]/strong')[1].text)
-                mtxt = div.xpath('.//dd')[0].text
-                yield Forecast(mdate, t_low, t_high, mtxt, 'C')
-            elif div.attrib.get('class', '').startswith('lijourle'):
-                for em in div.getiterator('em'):
-                    templist = em.text_content().split("/")
-
-                    t_low = self.get_temp_without_unit(templist[0])
-                    t_high = self.get_temp_without_unit(templist[1])
-                    break
-                for strong in div.getiterator("strong"):
-                    mdate = strong.text_content()
-                    break
-                for img in div.getiterator("img"):
-                    mtxt = img.attrib["title"]
-                    break
-                yield Forecast(mdate, t_low, t_high, mtxt, "C")
+        lis = self.document.getroot().xpath('//ul[@class="list-days-summary slides"]/li')
+        for li in lis:
+            divs = self.parser.select(li, 'div[@class="group-days-summary"]', 1, method='xpath')
+            for div in divs:
+                day_div = self.parser.select(div, 'div[@class="box"]', 1, method='xpath')
+                date = self.parser.select(day_div, 'div[@class="box-header"]/h3', 1, method='xpath').text
+                temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]',
+                                          1, method='xpath').text_content()
+                low = self.get_temp_without_unit(temp.split('|')[0])
+                high = self.get_temp_without_unit(temp.split('|')[1])
+                broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]',
+                                           1, method='xpath').text_content()
+                uvs = self.parser.select(div, 'div/div/div[@class="day-summary-uv"]',
+                                         method='xpath')
+                uv = u''
+                if uvs is not None and len(uvs) > 0:
+                    uv = u'%s' % uvs[0].text_content()
+                wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]',
+                                          1, method='xpath').text_content()
+                text = u'%s %s %s' % (broad, uv, wind)
+                yield Forecast(date, low, high, text, u'C')

    def get_current(self):
-        div = self.document.getroot().xpath('//div[@class="bloc_details"]/ul/li/dl')[0]
+        div = self.document.getroot().xpath('//div[@class="bloc-day-summary"]')[0]
        mdate = datetime.datetime.now()
-        temp = self.get_temp_without_unit(div.xpath('./dd[@class="minmax"]')[0].text)
-        mtxt = div.find('dd').find('img').attrib['title']
-        return Current(mdate, temp, mtxt, 'C')
-
-    def get_city(self):
-        """
-        Return the city from the forecastpage.
-        """
-        for div in self.document.getiterator('div'):
-            if div.attrib.get("class", "") == "choix":
-                for strong in div.getiterator("strong"):
-                    city_name = strong.text + " " + strong.tail.replace("(", "").replace(")", "")
-                    city_id = self.url.split("/")[-1]
-                    return City(city_id, city_name)
-
-
-class CityPage(BasePage):
-    def iter_city_search(self):
-        for div in self.document.getiterator('div'):
-            if div.attrib.get('id') == "column1":
-                for li in div.getiterator('li'):
-                    city_name = li.text_content()
-                    for children in li.getchildren():
-                        city_id = children.attrib.get("href").split("/")[-1]
-                    mcity = City(city_id, city_name)
-                    yield mcity
+        temp = self.parser.select(div, 'div/div/div[@class="day-summary-temperature"]',
+                                  1, method='xpath').text_content()
+        temperature = self.get_temp_without_unit(temp.split('|')[0])
+        broad = self.parser.select(div, 'div/div/div[@class="day-summary-broad"]', 1, method='xpath').text_content()
+        wind = self.parser.select(div, 'div/div/div[@class="day-summary-wind"]', 1, method='xpath').text_content()
+        mtxt = u'%s %s' % (broad, wind)
+        return Current(mdate, temperature, mtxt, u'C')