improve date parsing method

2013-07-05 19:27:21 +02:00 · 2013-07-05 19:27:21 +02:00 · 9b1bc4269a
commit 9b1bc4269a
parent 61718199d6
1 changed files with 15 additions and 3 deletions
--- a/modules/adecco/pages.py
+++ b/modules/adecco/pages.py
@ -20,11 +20,12 @@

 from weboob.tools.browser import BasePage
 from .job import AdeccoJobAdvert
-from datetime import datetime
+import datetime
 import locale
 import re

 __all__ = ['SearchPage', 'AdvertPage']
+MONTHS = [u'janvier', u'février', u'mars', u'avril', u'mai', u'juin', u'juillet', u'août', u'septembre', u'octobre', u'novembre', u'décembre']


 class SearchPage(BasePage):
@ -44,7 +45,13 @@ class SearchPage(BasePage):
                advert = AdeccoJobAdvert(_id)

                date = u'%s' % self.parser.select(div, "div/span[@class='offreDatePublication']", 1, method='xpath').text
-                advert.publication_date = datetime.strptime(date, "%d %B %Y").date()
+                m = re.match('(\d{2})\s(.*?)\s(\d{4})', date)
+                if m:
+                    dd = int(m.group(1))
+                    mm = MONTHS.index(m.group(2)) + 1
+                    yyyy = int(m.group(3))
+                    advert.publication_date = datetime.date(yyyy, mm, dd)
+
                advert.title = u'%s' % self.parser.select(div, "div/h3/a", 1, method='xpath').text_content()
                advert.place = u'%s' % self.parser.select(div, "div/h3/span[@class='offreLocalisation']", 1, method='xpath').text
                yield advert
@ -61,7 +68,12 @@ class AdvertPage(BasePage):
        div = self.document.getroot().xpath("//div[@class='contain_MoreResults']")[0]

        date = u'%s' % self.parser.select(div, "div[@class='dateResult']", 1, method='xpath').text.strip()
-        advert.publication_date = datetime.strptime(date, "%d %B %Y").date()
+        m = re.match('(\d{2})\s(.*?)\s(\d{4})', date)
+        if m:
+            dd = int(m.group(1))
+            mm = MONTHS.index(m.group(2)) + 1
+            yyyy = int(m.group(3))
+            advert.publication_date = datetime.date(yyyy, mm, dd)

        title = self.parser.select(div, "h1", 1, method='xpath').text_content().strip()
        town = self.parser.select(div, "h1/span[@class='town']", 1, method='xpath').text_content()