From 60aa6814755826d25f0af6711fd987d16918142d Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Fri, 4 Oct 2013 00:01:35 +0200 Subject: [PATCH] [cci][ICapJob] create CCI module --- modules/cci/__init__.py | 24 ++++++++++++ modules/cci/backend.py | 56 +++++++++++++++++++++++++++ modules/cci/browser.py | 46 ++++++++++++++++++++++ modules/cci/job.py | 27 +++++++++++++ modules/cci/pages.py | 86 +++++++++++++++++++++++++++++++++++++++++ modules/cci/test.py | 32 +++++++++++++++ 6 files changed, 271 insertions(+) create mode 100644 modules/cci/__init__.py create mode 100644 modules/cci/backend.py create mode 100644 modules/cci/browser.py create mode 100644 modules/cci/job.py create mode 100644 modules/cci/pages.py create mode 100644 modules/cci/test.py diff --git a/modules/cci/__init__.py b/modules/cci/__init__.py new file mode 100644 index 00000000..a8957098 --- /dev/null +++ b/modules/cci/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import CciBackend + + +__all__ = ['CciBackend'] diff --git a/modules/cci/backend.py b/modules/cci/backend.py new file mode 100644 index 00000000..eed5ed64 --- /dev/null +++ b/modules/cci/backend.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import BaseBackend +from weboob.capabilities.job import ICapJob, BaseJobAdvert + +from .browser import CciBrowser + + +__all__ = ['CciBackend'] + + +class CciBackend(BaseBackend, ICapJob): + NAME = 'cci' + DESCRIPTION = u'cci website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '0.h' + + BROWSER = CciBrowser + + def search_job(self, pattern=None): + if not pattern: + with self.browser: + for job_advert in self.browser.search_job(): + yield job_advert + + def advanced_search_job(self): + return [] + + def get_job_advert(self, _id, advert=None): + with self.browser: + return self.browser.get_job_advert(_id, advert) + + def fill_obj(self, advert, fields): + self.get_job_advert(advert.id, advert) + + OBJECTS = {BaseJobAdvert: fill_obj} diff --git a/modules/cci/browser.py b/modules/cci/browser.py new file mode 100644 index 00000000..054b2d7f --- /dev/null +++ b/modules/cci/browser.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser + +from .pages import SearchPage + + +__all__ = ['CciBrowser'] + + +class CciBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'www.cci.fr/web/recrutement/les-offres-d-emploi' + ENCODING = None + + PAGES = { + '%s://%s' % (PROTOCOL, DOMAIN): SearchPage, + } + + def search_job(self): + self.location('%s://%s' % (self.PROTOCOL, self.DOMAIN)) + assert self.is_on_page(SearchPage) + return self.page.iter_job_adverts() + + def get_job_advert(self, _id, advert): + self.location('%s://%s' % (self.PROTOCOL, self.DOMAIN)) + assert self.is_on_page(SearchPage) + return self.page.get_job_advert(_id, advert) diff --git a/modules/cci/job.py b/modules/cci/job.py new file mode 100644 index 00000000..999d3986 --- /dev/null +++ b/modules/cci/job.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.job import BaseJobAdvert + + +class ApecJobAdvert(BaseJobAdvert): + @classmethod + def id2url(cls, _id): + splitted_id = _id.split('/') + return 'http://cadres.apec.fr/offres-emploi-cadres/offres-emploi-cadres/0_0_0_%s________%s.html' % (splitted_id[0], splitted_id[1]) diff --git a/modules/cci/pages.py b/modules/cci/pages.py new file mode 100644 index 00000000..2665b02f --- /dev/null +++ b/modules/cci/pages.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import dateutil.parser + +from weboob.tools.browser import BasePage +from weboob.capabilities.job import BaseJobAdvert + +__all__ = ['SearchPage'] + + +class SearchPage(BasePage): + def iter_job_adverts(self): + trs = self.document.getroot().xpath("//tr[@class='texteCol2TableauClair']") \ + + self.document.getroot().xpath("//tr[@class='texteCol2TableauFonce']") + + for tr in trs: + tds = self.parser.select(tr, 'td', method='xpath') + a = self.parser.select(tds[2], 'a', 1, method='xpath') + advert = BaseJobAdvert(a.attrib['href'].replace('#', '')) + advert.title = u'%s' % a.text_content() + advert.society_name = u'CCI %s' % tds[3].text + advert.place = u'%s' % tds[0].text + advert.job_name = u'%s' % tds[1].text + yield advert + + def get_job_advert(self, _id, advert): + if advert is None: + advert = BaseJobAdvert(_id) + + items = self.document.getroot().xpath("//div[@id='divrecueil']")[0] + keep_next = False + for item in items: + + if keep_next: + if item.tag == 'div' and item.attrib['id'] == u'offre': + first_div = self.parser.select(item, 'div/span', 2, method='xpath') + advert.society_name = u'CCI %s' % first_div[0].text_content() + advert.job_name = u'%s' % first_div[1].text_content() + + second_div = self.parser.select(item, 'div/fieldset', 2, method='xpath') + description = "" + ps_1 = self.parser.select(second_div[0], 'p[@class="normal"]', method='xpath') + h2s_1 = self.parser.select(second_div[0], 'h2[@class="titreParagraphe"]', method='xpath') + if len(ps_1) == 5 and len(h2s_1) == 5: + for i in range(0, 5): + description += "\r\n-- %s --\r\n" % h2s_1[i].text + description += "%s\r\n" % ps_1[i].text_content() + + ps_2 = self.parser.select(second_div[1], 'p[@class="normal"]', method='xpath') + h2s_2 = self.parser.select(second_div[1], 'h2[@class="titreParagraphe"]', method='xpath') + if len(ps_2) == 3 and len(h2s_2) == 2: + description += "\r\n-- %s --\r\n" % h2s_2[0].text + a = self.parser.select(ps_2[0], 'a', 1, method='xpath') + description += "%s\r\n" % a.text_content() + + description += "\r\n-- %s --\r\n" % h2s_2[1].text + description += "%s\r\n" % ps_2[1].text_content() + description += "%s\r\n" % ps_2[2].text_content() + + advert.description = description + advert.url = self.url + '#' + advert.id + date = self.parser.select(item, 'div/fieldset/p[@class="dateOffre"]', 1, method='xpath') + advert.publication_date = dateutil.parser.parse(date.text_content()).date() + break + + if item.tag == 'a' and u'%s' % item.attrib['name'] == u'%s' % _id: + keep_next = True + + return advert diff --git a/modules/cci/test.py b/modules/cci/test.py new file mode 100644 index 00000000..5509fdaa --- /dev/null +++ b/modules/cci/test.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +class CciTest(BackendTest): + BACKEND = 'cci' + + def test_cci_search(self): + l = list(self.backend.search_job()) + assert len(l) + advert = self.backend.get_job_advert(l[0].id, None) + self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url)) +