[cci] adapt to browser2
This commit is contained in:
parent
7555938470
commit
bd38a16d76
3 changed files with 59 additions and 69 deletions
|
|
@ -18,8 +18,9 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
from weboob.tools.backend import BaseBackend
|
from weboob.tools.backend import BaseBackend, BackendConfig
|
||||||
from weboob.capabilities.job import ICapJob, BaseJobAdvert
|
from weboob.capabilities.job import ICapJob, BaseJobAdvert
|
||||||
|
from weboob.tools.value import Value
|
||||||
|
|
||||||
from .browser import CciBrowser
|
from .browser import CciBrowser
|
||||||
|
|
||||||
|
|
@ -37,19 +38,18 @@ class CciBackend(BaseBackend, ICapJob):
|
||||||
|
|
||||||
BROWSER = CciBrowser
|
BROWSER = CciBrowser
|
||||||
|
|
||||||
|
CONFIG = BackendConfig(Value('metier', label='Job name', masked=False, default=''))
|
||||||
|
|
||||||
def search_job(self, pattern=None):
|
def search_job(self, pattern=None):
|
||||||
with self.browser:
|
return self.browser.search_job(pattern)
|
||||||
for job_advert in self.browser.search_job(pattern):
|
|
||||||
yield job_advert
|
|
||||||
|
|
||||||
def advanced_search_job(self):
|
def advanced_search_job(self):
|
||||||
return []
|
return self.browser.search_job(pattern=self.config['metier'].get())
|
||||||
|
|
||||||
def get_job_advert(self, _id, advert=None):
|
def get_job_advert(self, _id, advert=None):
|
||||||
with self.browser:
|
return self.browser.get_job_advert(_id, advert)
|
||||||
return self.browser.get_job_advert(_id, advert)
|
|
||||||
|
|
||||||
def fill_obj(self, advert, fields):
|
def fill_obj(self, advert, fields):
|
||||||
self.get_job_advert(advert.id, advert)
|
return self.get_job_advert(advert.id, advert)
|
||||||
|
|
||||||
OBJECTS = {BaseJobAdvert: fill_obj}
|
OBJECTS = {BaseJobAdvert: fill_obj}
|
||||||
|
|
|
||||||
|
|
@ -17,8 +17,8 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from weboob.tools.browser2 import PagesBrowser, URL
|
||||||
from weboob.tools.browser import BaseBrowser
|
from weboob.capabilities.job import BaseJobAdvert
|
||||||
|
|
||||||
from .pages import SearchPage
|
from .pages import SearchPage
|
||||||
|
|
||||||
|
|
@ -26,21 +26,15 @@ from .pages import SearchPage
|
||||||
__all__ = ['CciBrowser']
|
__all__ = ['CciBrowser']
|
||||||
|
|
||||||
|
|
||||||
class CciBrowser(BaseBrowser):
|
class CciBrowser(PagesBrowser):
|
||||||
PROTOCOL = 'http'
|
BASEURL = 'http://www.cci.fr'
|
||||||
DOMAIN = 'www.cci.fr/web/recrutement/les-offres-d-emploi'
|
|
||||||
ENCODING = "UTF-8"
|
|
||||||
|
|
||||||
PAGES = {
|
search_page = URL('/web/recrutement/les-offres-d-emploi', SearchPage)
|
||||||
'%s://%s' % (PROTOCOL, DOMAIN): SearchPage,
|
|
||||||
}
|
|
||||||
|
|
||||||
def search_job(self, pattern):
|
def search_job(self, pattern):
|
||||||
self.location('%s://%s' % (self.PROTOCOL, self.DOMAIN))
|
return self.search_page.go().iter_job_adverts(pattern=pattern)
|
||||||
assert self.is_on_page(SearchPage)
|
|
||||||
return self.page.iter_job_adverts(pattern)
|
|
||||||
|
|
||||||
def get_job_advert(self, _id, advert):
|
def get_job_advert(self, _id, advert):
|
||||||
self.location('%s://%s' % (self.PROTOCOL, self.DOMAIN))
|
if advert is None:
|
||||||
assert self.is_on_page(SearchPage)
|
advert = BaseJobAdvert(_id)
|
||||||
return self.page.get_job_advert(_id, advert)
|
return self.search_page.stay_or_go().get_job_advert(obj=advert)
|
||||||
|
|
|
||||||
|
|
@ -17,63 +17,59 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import dateutil.parser
|
from weboob.tools.browser2.page import HTMLPage, method, ItemElement, TableElement
|
||||||
|
from weboob.tools.browser2.filters import Filter, Link, CleanText, Format, Env, DateTime, CleanHTML, TableCell, Join
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage
|
|
||||||
from weboob.capabilities.job import BaseJobAdvert
|
from weboob.capabilities.job import BaseJobAdvert
|
||||||
|
|
||||||
__all__ = ['SearchPage']
|
__all__ = ['SearchPage']
|
||||||
|
|
||||||
|
|
||||||
class SearchPage(BasePage):
|
class Child(Filter):
|
||||||
def iter_job_adverts(self, pattern):
|
def filter(self, el):
|
||||||
trs = self.document.getroot().xpath("//tr[@class='texteCol2TableauClair']") \
|
return list(el[0].iterchildren())
|
||||||
+ self.document.getroot().xpath("//tr[@class='texteCol2TableauFonce']")
|
|
||||||
|
|
||||||
for tr in trs:
|
|
||||||
tds = self.parser.select(tr, 'td', method='xpath')
|
|
||||||
a = self.parser.select(tds[2], 'a', 1, method='xpath')
|
|
||||||
advert = BaseJobAdvert(a.attrib['href'].replace('#', ''))
|
|
||||||
advert.title = u'%s' % a.text_content()
|
|
||||||
advert.society_name = u'CCI %s' % tds[3].text
|
|
||||||
advert.place = u'%s' % tds[0].text
|
|
||||||
advert.job_name = u'%s' % tds[1].text
|
|
||||||
if pattern is not None:
|
|
||||||
if pattern in advert.title or pattern in advert.job_name:
|
|
||||||
yield advert
|
|
||||||
else:
|
|
||||||
yield advert
|
|
||||||
|
|
||||||
def get_job_advert(self, _id, advert):
|
class SearchPage(HTMLPage):
|
||||||
if advert is None:
|
@method
|
||||||
advert = BaseJobAdvert(_id)
|
class iter_job_adverts(TableElement):
|
||||||
|
item_xpath = "//tr[(@class='texteCol2TableauClair' or @class='texteCol2TableauFonce')]"
|
||||||
|
head_xpath = "//tr[1]/td[@class='titreCol2Tableau']/text()"
|
||||||
|
|
||||||
items = self.document.getroot().xpath("//div[@id='divrecueil']")[0]
|
col_place = u'Région'
|
||||||
keep_next = False
|
col_job_name = u'Filière'
|
||||||
for item in items:
|
col_id = u'Intitulé du poste'
|
||||||
|
col_society_name = u'CCI(R)'
|
||||||
|
|
||||||
if keep_next:
|
class item(ItemElement):
|
||||||
if item.tag == 'div' and item.attrib['id'] == u'offre':
|
klass = BaseJobAdvert
|
||||||
first_div = self.parser.select(item, 'div/span', 2, method='xpath')
|
|
||||||
advert.society_name = u'CCI %s' % first_div[0].text_content()
|
|
||||||
advert.job_name = u'%s' % first_div[1].text_content()
|
|
||||||
|
|
||||||
second_div = self.parser.select(item, 'div/fieldset', 2, method='xpath')
|
def validate(self, advert):
|
||||||
|
if advert and 'pattern' in self.env and self.env['pattern']:
|
||||||
|
return self.env['pattern'].upper() in advert.title.upper() or \
|
||||||
|
self.env['pattern'].upper() in advert.job_name.upper()
|
||||||
|
return True
|
||||||
|
|
||||||
ps_1 = self.parser.select(second_div[0], 'p[@class="normal"]', method='xpath')
|
obj_id = CleanText(Link(Child(TableCell('id'))), replace=[('#', '')])
|
||||||
h2s_1 = self.parser.select(second_div[0], 'h2[@class="titreParagraphe"]', method='xpath')
|
obj_title = Format('%s - %s', CleanText(TableCell('id')), CleanText(TableCell('job_name')))
|
||||||
description = ""
|
obj_society_name = Format(u'CCI %s', CleanText(TableCell('society_name')))
|
||||||
if len(ps_1) == 5 and len(h2s_1) == 5:
|
obj_place = CleanText(TableCell('place'))
|
||||||
for i in range(0, 5):
|
obj_job_name = CleanText(TableCell('id'))
|
||||||
description += "\r\n-- %s --\r\n" % h2s_1[i].text
|
|
||||||
description += "%s\r\n" % ps_1[i].text_content()
|
|
||||||
advert.description = description
|
|
||||||
advert.url = self.url + '#' + advert.id
|
|
||||||
date = self.parser.select(item, 'div/fieldset/p[@class="dateOffre"]', 1, method='xpath')
|
|
||||||
advert.publication_date = dateutil.parser.parse(date.text_content()).date()
|
|
||||||
break
|
|
||||||
|
|
||||||
if item.tag == 'a' and u'%s' % item.attrib['name'] == u'%s' % _id:
|
@method
|
||||||
keep_next = True
|
class get_job_advert(ItemElement):
|
||||||
|
klass = BaseJobAdvert
|
||||||
|
|
||||||
return advert
|
obj_url = Format('%s#%s', Env('url'), Env('id'))
|
||||||
|
obj_description = Join('%s\r\n',
|
||||||
|
'div/fieldset/*[(@class="titreParagraphe" or @class="normal")]',
|
||||||
|
textCleaner=CleanHTML)
|
||||||
|
obj_title = CleanText('div/span[@class="intituleposte"]')
|
||||||
|
obj_job_name = CleanText('div/span[@class="intituleposte"]')
|
||||||
|
obj_society_name = Format('CCI %s', CleanText('div/span[@class="crci crcititle"]'))
|
||||||
|
obj_publication_date = DateTime(CleanText('div/fieldset/p[@class="dateOffre"]'), dayfirst=True)
|
||||||
|
|
||||||
|
def parse(self, el):
|
||||||
|
self.el = el.xpath("//a[@name='%s']/following-sibling::div[1]" % self.obj.id)[0]
|
||||||
|
self.env['url'] = self.page.url
|
||||||
|
self.env['id'] = self.obj.id
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue