diff --git a/modules/regionsjob/backend.py b/modules/regionsjob/backend.py
index 8f24ebc1..c4ff23f7 100644
--- a/modules/regionsjob/backend.py
+++ b/modules/regionsjob/backend.py
@@ -19,13 +19,11 @@
from weboob.tools.backend import BaseBackend, BackendConfig
-from weboob.capabilities.job import ICapJob
+from weboob.capabilities.job import ICapJob, BaseJobAdvert
from .browser import RegionsjobBrowser
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.value import Value
-from .job import RegionsJobAdvert
-
__all__ = ['RegionsjobBackend']
@@ -153,21 +151,19 @@ class RegionsjobBackend(BaseBackend, ICapJob):
return self.create_browser(self.config['website'].get())
def search_job(self, pattern=''):
- with self.browser:
- return self.browser.search_job(pattern=pattern)
+ return self.browser.search_job(pattern=pattern)
def advanced_search_job(self):
- return self.browser.advanced_search_job(metier=self.config['metier'].get(),
- fonction=int(self.config['fonction'].get()),
- secteur=int(self.config['secteur'].get()),
- contract=int(self.config['contract'].get()),
- experience=int(self.config['experience'].get()))
+ return self.browser.search_job(pattern=self.config['metier'].get(),
+ fonction=int(self.config['fonction'].get()),
+ secteur=int(self.config['secteur'].get()),
+ contract=int(self.config['contract'].get()),
+ experience=int(self.config['experience'].get()))
def get_job_advert(self, _id, advert=None):
- with self.browser:
- return self.browser.get_job_advert(_id, advert)
+ return self.browser.get_job_advert(_id, advert)
def fill_obj(self, advert, fields):
- self.get_job_advert(advert.id, advert)
+ return self.get_job_advert(advert.id, advert)
- OBJECTS = {RegionsJobAdvert: fill_obj}
+ OBJECTS = {BaseJobAdvert: fill_obj}
diff --git a/modules/regionsjob/browser.py b/modules/regionsjob/browser.py
index f4fdc4d8..68c52162 100644
--- a/modules/regionsjob/browser.py
+++ b/modules/regionsjob/browser.py
@@ -18,54 +18,31 @@
# along with weboob. If not, see .
import urllib
-from weboob.tools.browser import BaseBrowser
-from weboob.tools.browser.decorators import id2url
+from weboob.tools.browser2 import PagesBrowser, URL
from .pages import SearchPage, AdvertPage
-from .job import RegionsJobAdvert
-
__all__ = ['RegionsjobBrowser']
-class RegionsjobBrowser(BaseBrowser):
- PROTOCOL = 'http'
- ENCODING = 'utf-8'
+class RegionsjobBrowser(PagesBrowser):
- PAGES = {
- '%s://(.*?)/offre_emploi/index.aspx\?v=___(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_' % (PROTOCOL): SearchPage,
- '%s://(.*?)/offre_emploi/detailoffre.aspx\?numoffre=(.*?)&de=consultation' % (PROTOCOL): AdvertPage,
- }
+ advert_page = URL('/offre_emploi/detailoffre.aspx\?numoffre=(?P<_id>.*)&de=consultation', AdvertPage)
+ search_page = URL('/offre_emploi/index.aspx\?v=___0_(?P.*)_(?P.*)_0_(?P.*)_0_0_(?P.*)_0_(?P.*)_', SearchPage)
def __init__(self, website, *args, **kwargs):
- self.DOMAIN = website
- BaseBrowser.__init__(self, *args, **kwargs)
+ self.BASEURL = 'http://%s' % website
+ PagesBrowser.__init__(self, *args, **kwargs)
- def search_job(self, pattern=''):
- self.location('%s://%s/offre_emploi/index.aspx?v=___0_0_0_0_0_0_0_0_0_%s_'
- % (self.PROTOCOL, self.DOMAIN, urllib.quote_plus(pattern.encode(self.ENCODING))))
- assert self.is_on_page(SearchPage)
- return self.page.iter_job_adverts(self.DOMAIN)
+ def search_job(self, pattern='', fonction=0, secteur=0, contract=0, experience=0):
+ return self.search_page.go(fonction=fonction,
+ experience=experience,
+ contract=contract,
+ secteur=secteur,
+ metier=urllib.quote_plus(pattern.encode('utf-8'))
+ ).iter_job_adverts(domain=self.BASEURL)
- def advanced_search_job(self, metier, fonction, secteur, contract, experience):
- self.location('%s://%s/offre_emploi/index.aspx?v=___%s_%s_%s_%s_%s_%s_%s_%s_%s_%s_'
- % (self.PROTOCOL,
- self.DOMAIN,
- '0',
- fonction,
- experience,
- '0',
- contract,
- '0',
- '0',
- secteur,
- '0',
- urllib.quote_plus(metier.encode(self.ENCODING))))
- assert self.is_on_page(SearchPage)
- return self.page.iter_job_adverts(self.DOMAIN)
-
- @id2url(RegionsJobAdvert.id2url)
- def get_job_advert(self, url, advert):
- self.location(url)
- assert self.is_on_page(AdvertPage)
- return self.page.get_job_advert(url, advert)
+ def get_job_advert(self, _id, advert):
+ splitted_id = _id.split('#')
+ self.BASEURL = splitted_id[0]
+ return self.advert_page.go(_id=splitted_id[1]).get_job_advert(obj=advert)
diff --git a/modules/regionsjob/job.py b/modules/regionsjob/job.py
deleted file mode 100644
index 743592af..00000000
--- a/modules/regionsjob/job.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright(C) 2013 Bezleputh
-#
-# This file is part of weboob.
-#
-# weboob is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# weboob is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with weboob. If not, see .
-
-from weboob.capabilities.job import BaseJobAdvert
-
-
-class RegionsJobAdvert(BaseJobAdvert):
- @classmethod
- def id2url(cls, _id):
- splitted_id = _id.split('|')
- return 'http://%s/offre_emploi/detailoffre.aspx?numoffre=%s&de=consultation' \
- % (splitted_id[0], splitted_id[1])
-
diff --git a/modules/regionsjob/pages.py b/modules/regionsjob/pages.py
index 6add5623..b0ee3199 100644
--- a/modules/regionsjob/pages.py
+++ b/modules/regionsjob/pages.py
@@ -17,100 +17,59 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-from weboob.tools.misc import html2text
-from weboob.tools.browser import BasePage
-from .job import RegionsJobAdvert
-from datetime import datetime, date
-import re
+from weboob.tools.browser2.page import HTMLPage, method, ItemElement, SkipItem, ListElement
+from weboob.tools.browser2.filters import Link, CleanText, Regexp, Format, Env, DateGuesser, CleanHTML, DateTime
+from weboob.tools.date import LinearDateGuesser
+from weboob.capabilities.job import BaseJobAdvert
__all__ = ['SearchPage']
-class SearchPage(BasePage):
- def iter_job_adverts(self, website):
- re_id = re.compile('(.*?)numoffre=(.*?)&de=consultation', re.DOTALL)
- lis = self.document.getroot().xpath('//div[@id="liste_offres"]/ul/li')
- for li in lis:
- a = self.parser.select(li, 'div/span[@class="offres_poste"]/a', 1, method='xpath')
- _id = u'%s|%s' % (website, re_id.search(a.attrib['href']).group(2))
- advert = RegionsJobAdvert(_id)
- advert.title = u'%s' % a.text
+class SearchPage(HTMLPage):
+ @method
+ class iter_job_adverts(ListElement):
+ item_xpath = '//div[@id="liste_offres"]/ul/li'
- society_name = self.parser.select(li, 'div/span[@class="offres_entreprise"]/span/a',
- method='xpath')
- if len(society_name) > 0:
- advert.society_name = u'%s' % society_name[0].text
+ class item(ItemElement):
+ klass = BaseJobAdvert
- advert.place = u'%s' % self.parser.select(li, 'div/span[@class="offres_ville"]/span/span/span',
- 1, method='xpath').text.strip()
- _date = u'%s' % self.parser.select(li, 'div/span[@class="offres_date"]',
- 1, method='xpath').text_content()
- year = date.today().year
- splitted_date = _date.split('/')
- advert.publication_date = datetime(year, int(splitted_date[1]), int(splitted_date[0]))
- advert.contract_type = u'%s' % self.parser.select(li, 'div/span[@class="offres_poste"]/span',
- 1, method='xpath').text
- yield advert
+ obj_id = Format(u'%s#%s',
+ Env('domain'),
+ Regexp(Link('div/span[@class="offres_poste"]/a'), '.*?numoffre=(.*?)&de=consultation'))
+ obj_title = CleanText('div/span[@class="offres_poste"]/a')
+ obj_society_name = CleanText('div/span[@class="offres_entreprise"]/span/a')
+ obj_place = CleanText('div/span[@class="offres_ville"]/span/span/span')
+ obj_contract_type = CleanText('div/span[@class="offres_poste"]/span')
+ obj_publication_date = DateGuesser(CleanText('div/span[@class="offres_date"]'), LinearDateGuesser())
-class AdvertPage(BasePage):
- def get_job_advert(self, url, advert):
- re_id = re.compile('http://(.*?)/offre_emploi/detailoffre.aspx\?numoffre=(.*?)&de=consultation', re.DOTALL)
- if advert is None:
- _id = u'%s|%s' % (re_id.search(url).group(1), re_id.search(url).group(2))
- advert = RegionsJobAdvert(_id)
+class AdvertPage(HTMLPage):
+ @method
+ class get_job_advert(ItemElement):
+ klass = BaseJobAdvert
- advert.url = u'%s' % url
+ def parse(self, el):
+ if self.obj.id:
+ advert = self.obj
+ advert.url = self.page.url
+ advert.description = Format(u'%s\r\n%s',
+ CleanHTML('//div[@id="annonce"]/p[@id="description_annonce"]'),
+ CleanHTML('//div[@id="annonce"]/p[@id="description_annonce"]/following-sibling::p[1]'))(el)
+ advert.pay = CleanText('//div[@id="annonce"]/p[@class="rubrique_annonce"]/following-sibling::p[1]')(el)
+ raise SkipItem()
- div = self.document.getroot().xpath('//div[@id="annonce"]')[0]
+ self.env['url'] = self.page.url
- advert.title = u'%s' % self.parser.select(div, 'h1', 1, method='xpath').text
+ obj_description = Format(u'%s%s',
+ CleanHTML('//div[@id="annonce"]/p[@id="description_annonce"]'),
+ CleanHTML('//div[@id="annonce"]/p[@id="description_annonce"]/following-sibling::p[1]'))
- content = self.parser.select(div, 'p', method='xpath')
-
- next_is_date = False
- next_is_pay = False
- description = ''
-
- for p in content:
- if next_is_date:
- m = re.match('(\d{2})\s(\d{2})\s(\d{4})', date)
- if m:
- dd = int(m.group(1))
- mm = int(m.group(2))
- yyyy = int(m.group(3))
- advert.publication_date = datetime.date(yyyy, mm, dd)
- next_is_date = False
-
- elif next_is_pay:
- advert.pay = html2text(self.parser.tostring(p))
- next_is_pay = False
-
- elif 'class' in p.attrib:
- if p.attrib['class'] == 'contrat_loc':
- _p = self.parser.select(div, 'p[@class="contrat_loc"]', 1, method='xpath')
- content_p = _p.text_content().strip().split('\r\n')
- for el in content_p:
- splitted_el = el.split(':')
- if len(splitted_el) == 2:
- if splitted_el[0] == 'Entreprise':
- advert.society_name = splitted_el[1]
- elif splitted_el[0] == 'Contrat':
- advert.contract_type = splitted_el[1]
- elif splitted_el[0] == 'Localisation':
- advert.place = splitted_el[1]
-
- elif p.attrib['class'] == 'date_ref':
- next_is_date = True
-
- elif p.attrib['class'] == 'rubrique_annonce' and p.text == 'Salaire':
- next_is_pay = True
-
- else:
- description = description + html2text(self.parser.tostring(p))
- else:
- description = description + html2text(self.parser.tostring(p))
-
- advert.description = u'%s' % description
-
- return advert
+ obj_id = Env('_id')
+ obj_url = Env('url')
+ obj_publication_date = DateTime(Regexp(CleanText('//div[@id="annonce"]/p[@class="date_ref"]'),
+ '(\d{2}/\d{2}/\d{4})'))
+ obj_title = CleanText('//div[@id="annonce"]/h1')
+ obj_society_name = CleanText('//div[@id="annonce"]/p[@class="contrat_loc"]/strong[1]')
+ obj_contract_type = CleanText('//div[@id="annonce"]/p[@class="contrat_loc"]/strong[2]')
+ obj_place = CleanText('//div[@id="annonce"]/p[@class="contrat_loc"]/strong[3]')
+ obj_pay = CleanText('//div[@id="annonce"]/p[@class="rubrique_annonce"]/following-sibling::p[1]')