improve date parsing method
This commit is contained in:
parent
61718199d6
commit
9b1bc4269a
1 changed files with 15 additions and 3 deletions
|
|
@ -20,11 +20,12 @@
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
from .job import AdeccoJobAdvert
|
from .job import AdeccoJobAdvert
|
||||||
from datetime import datetime
|
import datetime
|
||||||
import locale
|
import locale
|
||||||
import re
|
import re
|
||||||
|
|
||||||
__all__ = ['SearchPage', 'AdvertPage']
|
__all__ = ['SearchPage', 'AdvertPage']
|
||||||
|
MONTHS = [u'janvier', u'février', u'mars', u'avril', u'mai', u'juin', u'juillet', u'août', u'septembre', u'octobre', u'novembre', u'décembre']
|
||||||
|
|
||||||
|
|
||||||
class SearchPage(BasePage):
|
class SearchPage(BasePage):
|
||||||
|
|
@ -44,7 +45,13 @@ class SearchPage(BasePage):
|
||||||
advert = AdeccoJobAdvert(_id)
|
advert = AdeccoJobAdvert(_id)
|
||||||
|
|
||||||
date = u'%s' % self.parser.select(div, "div/span[@class='offreDatePublication']", 1, method='xpath').text
|
date = u'%s' % self.parser.select(div, "div/span[@class='offreDatePublication']", 1, method='xpath').text
|
||||||
advert.publication_date = datetime.strptime(date, "%d %B %Y").date()
|
m = re.match('(\d{2})\s(.*?)\s(\d{4})', date)
|
||||||
|
if m:
|
||||||
|
dd = int(m.group(1))
|
||||||
|
mm = MONTHS.index(m.group(2)) + 1
|
||||||
|
yyyy = int(m.group(3))
|
||||||
|
advert.publication_date = datetime.date(yyyy, mm, dd)
|
||||||
|
|
||||||
advert.title = u'%s' % self.parser.select(div, "div/h3/a", 1, method='xpath').text_content()
|
advert.title = u'%s' % self.parser.select(div, "div/h3/a", 1, method='xpath').text_content()
|
||||||
advert.place = u'%s' % self.parser.select(div, "div/h3/span[@class='offreLocalisation']", 1, method='xpath').text
|
advert.place = u'%s' % self.parser.select(div, "div/h3/span[@class='offreLocalisation']", 1, method='xpath').text
|
||||||
yield advert
|
yield advert
|
||||||
|
|
@ -61,7 +68,12 @@ class AdvertPage(BasePage):
|
||||||
div = self.document.getroot().xpath("//div[@class='contain_MoreResults']")[0]
|
div = self.document.getroot().xpath("//div[@class='contain_MoreResults']")[0]
|
||||||
|
|
||||||
date = u'%s' % self.parser.select(div, "div[@class='dateResult']", 1, method='xpath').text.strip()
|
date = u'%s' % self.parser.select(div, "div[@class='dateResult']", 1, method='xpath').text.strip()
|
||||||
advert.publication_date = datetime.strptime(date, "%d %B %Y").date()
|
m = re.match('(\d{2})\s(.*?)\s(\d{4})', date)
|
||||||
|
if m:
|
||||||
|
dd = int(m.group(1))
|
||||||
|
mm = MONTHS.index(m.group(2)) + 1
|
||||||
|
yyyy = int(m.group(3))
|
||||||
|
advert.publication_date = datetime.date(yyyy, mm, dd)
|
||||||
|
|
||||||
title = self.parser.select(div, "h1", 1, method='xpath').text_content().strip()
|
title = self.parser.select(div, "h1", 1, method='xpath').text_content().strip()
|
||||||
town = self.parser.select(div, "h1/span[@class='town']", 1, method='xpath').text_content()
|
town = self.parser.select(div, "h1/span[@class='town']", 1, method='xpath').text_content()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue