diff --git a/modules/monster/browser.py b/modules/monster/browser.py
index 6fde3c3c..dfdd2084 100644
--- a/modules/monster/browser.py
+++ b/modules/monster/browser.py
@@ -18,48 +18,28 @@
# along with weboob. If not, see .
import urllib
-from weboob.deprecated.browser.decorators import id2url
-from weboob.deprecated.browser import Browser
+from weboob.browser import PagesBrowser, URL
from .pages import SearchPage, AdvertPage
-from .job import MonsterJobAdvert
__all__ = ['MonsterBrowser']
-class MonsterBrowser(Browser):
- PROTOCOL = 'http'
- DOMAIN = 'offres.monster.fr'
- ENCODING = 'utf-8'
+class MonsterBrowser(PagesBrowser):
- PAGES = {
- '%s://%s/offres-d-emploi/\?q=(.*?)' % (PROTOCOL, DOMAIN): SearchPage,
- '%s://%s/rechercher/(.*?)' % (PROTOCOL, DOMAIN): SearchPage,
- 'http://offre-emploi.monster.fr/(.*?).aspx': AdvertPage,
- }
+ BASEURL = 'http://offres.monster.fr'
+ advert = URL('http://offre-emploi.monster.fr/(?P<_id>.*).aspx', AdvertPage)
+ search = URL('rechercher\?q=(?P.*)',
+ 'PowerSearch.aspx\?q=(?P.*)&where=(?P.*)&jt=(?P.*)&occ=(?P.*)&tm=(?P.*)&indid=(?P)',
+ 'rechercher/.*',
+ SearchPage)
def search_job(self, pattern=None):
- self.location('%s://%s/offres-d-emploi/?q=%s'
- % (self.PROTOCOL, self.DOMAIN, urllib.quote_plus(pattern.encode(self.ENCODING))))
- assert self.is_on_page(SearchPage)
- return self.page.iter_job_adverts()
+ return self.search.go(pattern=urllib.quote_plus(pattern)).iter_job_adverts()
def advanced_search_job(self, job_name, place, contract, job_category, activity_domain, limit_date):
- self.location(
- '%s://%s/PowerSearch.aspx?q=%s&where=%s&jt=%s&occ=%s&tm=%s&indid=%s' % (self.PROTOCOL,
- self.DOMAIN,
- urllib.quote(
- job_name.encode(self.ENCODING)),
- place,
- contract,
- job_category,
- limit_date,
- activity_domain))
- assert self.is_on_page(SearchPage)
- return self.page.iter_job_adverts()
+ return self.search.go(job_name=job_name, place=place, contract=contract, job_category=job_category,
+ limit_date=limit_date, activity_domain=activity_domain).iter_job_adverts()
- @id2url(MonsterJobAdvert.id2url)
- def get_job_advert(self, url, advert):
- self.location(url)
- assert self.is_on_page(AdvertPage)
- return self.page.get_job_advert(url, advert)
+ def get_job_advert(self, _id, advert):
+ return self.advert.go(_id=_id).get_job_advert(obj=advert)
diff --git a/modules/monster/job.py b/modules/monster/job.py
deleted file mode 100644
index 4fe2fad7..00000000
--- a/modules/monster/job.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright(C) 2013 Bezleputh
-#
-# This file is part of weboob.
-#
-# weboob is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# weboob is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with weboob. If not, see .
-
-from weboob.capabilities.job import BaseJobAdvert
-
-
-class MonsterJobAdvert(BaseJobAdvert):
- @classmethod
- def id2url(cls, _id):
- return 'http://offre-emploi.monster.fr/%s.aspx' % _id
diff --git a/modules/monster/module.py b/modules/monster/module.py
index 12acfc6b..73a84f95 100644
--- a/modules/monster/module.py
+++ b/modules/monster/module.py
@@ -19,12 +19,11 @@
from weboob.tools.backend import Module, BackendConfig
-from weboob.capabilities.job import CapJob
+from weboob.capabilities.job import CapJob, BaseJobAdvert
from weboob.tools.value import Value
from weboob.tools.ordereddict import OrderedDict
from .browser import MonsterBrowser
-from .job import MonsterJobAdvert
__all__ = ['MonsterModule']
@@ -157,25 +156,20 @@ class MonsterModule(Module, CapJob):
)
def search_job(self, pattern=None):
- with self.browser:
- for advert in self.browser.search_job(pattern):
- yield advert
+ return self.browser.search_job(pattern)
def advanced_search_job(self):
- with self.browser:
- for advert in self.browser.advanced_search_job(job_name=self.config['job_name'].get(),
- place=self.config['place'].get(),
- contract=self.config['contract'].get(),
- job_category=self.config['job_category'].get(),
- activity_domain=self.config['activity_domain'].get(),
- limit_date=self.config['limit_date'].get()):
- yield advert
+ return self.browser.advanced_search_job(job_name=self.config['job_name'].get(),
+ place=self.config['place'].get(),
+ contract=self.config['contract'].get(),
+ job_category=self.config['job_category'].get(),
+ activity_domain=self.config['activity_domain'].get(),
+ limit_date=self.config['limit_date'].get())
def get_job_advert(self, _id, advert=None):
- with self.browser:
- return self.browser.get_job_advert(_id, advert)
+ return self.browser.get_job_advert(_id, advert)
def fill_obj(self, advert, fields):
- self.get_job_advert(advert.id, advert)
+ return self.get_job_advert(advert.id, advert)
- OBJECTS = {MonsterJobAdvert: fill_obj}
+ OBJECTS = {BaseJobAdvert: fill_obj}
diff --git a/modules/monster/pages.py b/modules/monster/pages.py
index 8f904e08..309c9dc0 100644
--- a/modules/monster/pages.py
+++ b/modules/monster/pages.py
@@ -18,109 +18,64 @@
# along with weboob. If not, see .
-from weboob.deprecated.browser import Page
-from weboob.tools.html import html2text
import re
from datetime import datetime, time, timedelta
-from .job import MonsterJobAdvert
+
+from weboob.browser.pages import HTMLPage, pagination
+from weboob.browser.elements import ItemElement, ListElement, method
+from weboob.browser.filters.standard import CleanText, Regexp, Filter, Env, BrowserURL, Join
+from weboob.browser.filters.html import Link, CleanHTML
+from weboob.capabilities.job import BaseJobAdvert
+from weboob.capabilities.base import NotAvailable
-class SearchPage(Page):
- def iter_job_adverts(self):
- re_id = re.compile('http://offre-emploi.monster.fr/(.*?).aspx', re.DOTALL)
- trs = self.document.getroot().xpath("//table[@class='listingsTable']/tbody/tr")
- for tr in trs:
- if 'class' in tr.attrib and tr.attrib['class'] != 'aceHidden':
- a = self.parser.select(tr, 'td/div/div[@class="jobTitleContainer"]/a', 1, method='xpath')
- _id = u'%s' % re_id.search(a.attrib['href']).group(1)
- advert = MonsterJobAdvert(_id)
- advert.society_name = u'%s' % self.parser.select(tr, 'td/div/div[@class="companyContainer"]/div/a',
- 1, method='xpath').attrib['title']
- advert.title = u'%s' % a.text
-
- date = self.parser.select(tr, 'td/div/div[@class="fnt20"]', 1, method='xpath').text_content().strip()
- now = datetime.now()
- number = re.search("\d+", date)
- if number:
- if 'heures' in date:
- date = now - timedelta(hours=int(number.group(0)))
- advert.publication_date = datetime.combine(date, time())
- elif 'jour' in date:
- date = now - timedelta(days=int(number.group(0)))
- advert.publication_date = datetime.combine(date, time())
- else:
- advert.publication_date = datetime.combine(now, time.min)
-
- place = self.parser.select(tr, 'td/div/div[@class="jobLocationSingleLine"]/a', method='xpath')
- if len(place) != 0:
- advert.place = u'%s' % place[0].attrib['title']
-
- yield advert
-
-
-class AdvertPage(Page):
- def get_job_advert(self, url, advert):
- re_id = re.compile('http://offre-emploi.monster.fr/(.*?).aspx', re.DOTALL)
- if advert is None:
- _id = u'%s' % re_id.search(url).group(1)
- advert = MonsterJobAdvert(_id)
-
- advert.url = url
-
- div_normal = self.document.getroot().xpath('//div[@id="jobcopy"]')
- div_special = self.document.getroot().xpath('//div[@id="divtxt"]')
- if len(div_normal) > 0:
- return self.fill_normal_advert(advert, div_normal[0])
-
- elif len(div_special) > 0:
- return self.fill_special_advert(advert, div_special[0])
-
+class MonsterDate(Filter):
+ def filter(self, date):
+ now = datetime.now()
+ number = re.search("\d+", date)
+ if number:
+ if 'heures' in date:
+ date = now - timedelta(hours=int(number.group(0)))
+ return datetime.combine(date, time())
+ elif 'jour' in date:
+ date = now - timedelta(days=int(number.group(0)))
+ return datetime.combine(date, time())
else:
- return advert
+ return datetime.combine(now, time.min)
- def fill_special_advert(self, advert, div):
- advert.title = u'%s' % self.parser.select(div, 'div[@class="poste"]', 1, method='xpath').text
- description = self.parser.select(div, 'div[@id="jobBodyContent"]', 1, method='xpath')
- advert.description = html2text(self.parser.tostring(description))
- titresmenuG = self.document.getroot().xpath('//div[@id="divmenuGauche"]')[0]
- contract_type = self.parser.select(titresmenuG, '//span[@itemprop="employmentType"]', method='xpath')
- if len(contract_type) != 0:
- advert.contract_type = u'%s' % contract_type[0].text_content()
+class SearchPage(HTMLPage):
+ @pagination
+ @method
+ class iter_job_adverts(ListElement):
+ item_xpath = '//table[@class="listingsTable"]/tbody/tr[@class="odd"] | //table[@class="listingsTable"]/tbody/tr[@class="even"]'
- return self.fill_advert(advert, titresmenuG)
+ def next_page(self):
+ return Link('//a[@title="Suivant"]', default=None)(self)
- def fill_normal_advert(self, advert, div):
- advert.title = u'%s' % self.parser.select(div, 'h1', 1, method='xpath').text
- description = self.parser.select(div, 'div[@id="jobBodyContent"]', 1, method='xpath')
- advert.description = html2text(self.parser.tostring(description))
+ class item(ItemElement):
+ klass = BaseJobAdvert
- jobsummary = self.document.getroot().xpath('//div[@id="jobsummary_content"]')[0]
- contract_type = self.parser.select(jobsummary, 'dl/dd[@class="multipleddlast"]/span', method='xpath')
- if len(contract_type) != 0:
- advert.contract_type = u'%s' % contract_type[0].text_content()
+ obj_id = Regexp(Link('./td/div/div[@class="jobTitleContainer"]/a'),
+ 'http://offre-emploi.monster.fr:80/(.*?).aspx')
+ obj_society_name = CleanText('./td/div/div[@class="companyContainer"]/div/a')
+ obj_title = CleanText('./td/div/div[@class="jobTitleContainer"]/a')
+ obj_publication_date = MonsterDate(CleanText('td/div/div[@class="fnt20"]'))
+ obj_place = CleanText('./td/div/div[@class="jobLocationSingleLine"]/a/@title', default=NotAvailable)
- society_name = self.parser.select(jobsummary, '//span[@itemprop="name"]', method='xpath')
- if len(society_name) != 0:
- advert.society_name = u'%s' % society_name[0].text_content()
- return self.fill_advert(advert, jobsummary)
+class AdvertPage(HTMLPage):
+ @method
+ class get_job_advert(ItemElement):
+ klass = BaseJobAdvert
- def fill_advert(self, advert, jobsummary):
- place = self.parser.select(jobsummary, '//span[@itemprop="jobLocation"]', method='xpath')
- if len(place) != 0:
- advert.place = u'%s' % place[0].text_content()
-
- pay = self.parser.select(jobsummary, '//span[@itemprop="baseSalary"]', method='xpath')
- if len(pay) != 0:
- advert.pay = u'%s' % pay[0].text_content()
-
- formation = self.parser.select(jobsummary, '//span[@itemprop="educationRequirements"]', method='xpath')
- if len(formation) != 0:
- advert.formation = u'%s' % formation[0].text_content()
-
- experience = self.parser.select(jobsummary, '//span[@itemprop="qualifications"]', method='xpath')
- if len(experience) != 0:
- advert.experience = u'%s' % experience[0].text_content()
-
- return advert
+ obj_id = Env('_id')
+ obj_url = BrowserURL('advert', _id=Env('_id'))
+ obj_title = CleanText('//div[@id="jobcopy"]/h1[@itemprop="title"]')
+ obj_description = CleanHTML('//div[@id="jobBodyContent"]')
+ obj_contract_type = Join('%s ', '//dd[starts-with(@class, "multipledd")]')
+ obj_society_name = CleanText('//dd[@itemprop="hiringOrganization"]')
+ obj_place = CleanText('//span[@itemprop="jobLocation"]')
+ obj_pay = CleanText('//span[@itemprop="baseSalary"]')
+ obj_formation = CleanText('//span[@itemprop="educationRequirements"]')
+ obj_experience = CleanText('//span[@itemprop="qualifications"]')