From f9192b1e6bc083917616944452674eee35526f1a Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Wed, 24 Jul 2013 13:00:47 +0200 Subject: [PATCH] add pole emploi module, advanced filters will come later --- modules/popolemploi/__init__.py | 24 +++++++++ modules/popolemploi/backend.py | 50 +++++++++++++++++++ modules/popolemploi/browser.py | 50 +++++++++++++++++++ modules/popolemploi/job.py | 26 ++++++++++ modules/popolemploi/pages.py | 86 +++++++++++++++++++++++++++++++++ modules/popolemploi/test.py | 31 ++++++++++++ 6 files changed, 267 insertions(+) create mode 100644 modules/popolemploi/__init__.py create mode 100644 modules/popolemploi/backend.py create mode 100644 modules/popolemploi/browser.py create mode 100644 modules/popolemploi/job.py create mode 100644 modules/popolemploi/pages.py create mode 100644 modules/popolemploi/test.py diff --git a/modules/popolemploi/__init__.py b/modules/popolemploi/__init__.py new file mode 100644 index 00000000..6188c944 --- /dev/null +++ b/modules/popolemploi/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import PopolemploiBackend + + +__all__ = ['PopolemploiBackend'] diff --git a/modules/popolemploi/backend.py b/modules/popolemploi/backend.py new file mode 100644 index 00000000..5a48a9e2 --- /dev/null +++ b/modules/popolemploi/backend.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import BaseBackend +from weboob.capabilities.job import ICapJob + +from .browser import PopolemploiBrowser +from .job import PopolemploiJobAdvert + +__all__ = ['PopolemploiBackend'] + + +class PopolemploiBackend(BaseBackend, ICapJob): + NAME = 'popolemploi' + DESCRIPTION = u'Pole Emploi website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + VERSION = '0.g' + + BROWSER = PopolemploiBrowser + + def search_job(self, pattern=None): + with self.browser: + return self.browser.search_job(pattern) + + def get_job_advert(self, _id, advert=None): + with self.browser: + return self.browser.get_job_advert(_id, advert) + + def fill_obj(self, advert, fields): + self.get_job_advert(advert.id, advert) + + OBJECTS = {PopolemploiJobAdvert: fill_obj} diff --git a/modules/popolemploi/browser.py b/modules/popolemploi/browser.py new file mode 100644 index 00000000..5e3c04de --- /dev/null +++ b/modules/popolemploi/browser.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser.decorators import id2url +from weboob.tools.browser import BaseBrowser + +from .pages import SearchPage, AdvertPage +from .job import PopolemploiJobAdvert + + +__all__ = ['PopolemploiBrowser'] + + +class PopolemploiBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'http://www.pole-emploi.fr/accueil/' + ENCODING = None + + PAGES = { + 'http://candidat.pole-emploi.fr/candidat/rechercheoffres/resultats(.*?)': SearchPage, + 'http://candidat.pole-emploi.fr/candidat/rechercheoffres/detail/(?P.+)': AdvertPage, + } + + def search_job(self, pattern=None): + self.location('http://offre.pole-emploi.fr/resultat?offresPartenaires=true&libMetier=%s' + % pattern.replace(' ', '+')) + assert self.is_on_page(SearchPage) + return self.page.iter_job_adverts() + + @id2url(PopolemploiJobAdvert.id2url) + def get_job_advert(self, url, advert): + self.location(url) + assert self.is_on_page(AdvertPage) + return self.page.get_job_advert(url, advert) diff --git a/modules/popolemploi/job.py b/modules/popolemploi/job.py new file mode 100644 index 00000000..bc86f272 --- /dev/null +++ b/modules/popolemploi/job.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.job import BaseJobAdvert + + +class PopolemploiJobAdvert(BaseJobAdvert): + @classmethod + def id2url(cls, _id): + return 'http://candidat.pole-emploi.fr/candidat/rechercheoffres/detail/%s' % _id diff --git a/modules/popolemploi/pages.py b/modules/popolemploi/pages.py new file mode 100644 index 00000000..62cd3f5b --- /dev/null +++ b/modules/popolemploi/pages.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BasePage +import dateutil.parser + +from .job import PopolemploiJobAdvert + + +__all__ = ['SearchPage', 'AdvertPage'] + + +class SearchPage(BasePage): + def iter_job_adverts(self): + rows = self.document.getroot().xpath('//table[@class="definition-table ordered"]/tbody/tr') + for row in rows: + advert = self.create_job_advert(row) + if advert: + yield advert + + def create_job_advert(self, row): + a = self.parser.select(row, 'td[@headers="offre"]/a', 1, method='xpath') + _id = u'%s' % (a.attrib['href'][-7:]) + if _id: + advert = PopolemploiJobAdvert(_id) + advert.contract_type = u'%s' % self.parser.select(row, 'td[@headers="contrat"]', 1, method='xpath').text + advert.title = u'%s' % a.text_content().strip() + society = self.parser.select(row, 'td/div/p/span[@class="company"]', method='xpath') + if society: + advert.society_name = society[0].text + advert.place = u'%s' % self.parser.select(row, 'td[@headers="lieu"]', 1, method='xpath').text_content() + date = self.parser.select(row, 'td[@headers="dateEmission"]', 1, method='xpath') + advert.publication_date = dateutil.parser.parse(date.text).date() + return advert + + +class AdvertPage(BasePage): + def get_job_advert(self, url, advert): + content = self.document.getroot().xpath('//div[@class="block-content"]/div')[0] + if not advert: + _id = self.parser.select(content, 'ul/li/ul/li/div[@class="value"]/span', 1, method='xpath').text + advert = PopolemploiJobAdvert(_id) + + advert.title = u'%s' % self.parser.select(content, 'h4', 1, method='xpath').text.strip() + advert.job_name = u'%s' % self.parser.select(content, 'h4', 1, method='xpath').text.strip() + advert.description = u'%s' % self.parser.select(content, 'p[@itemprop="description"]', 1, method='xpath').text + advert.society_name = u'%s' % self.parser.select(content, 'div[@class="vcard"]/p[@class="title"]/span', + 1, method='xpath').text + advert.url = url + advert.place = u'%s' % self.parser.select(content, + 'ul/li/div[@class="value"]/ul/li[@itemprop="addressRegion"]', + 1, method='xpath').text.strip() + + advert.contract_type = u'%s' % self.parser.select(content, + 'ul/li/div[@class="value"]/span[@itemprop="employmentType"]', + 1, method='xpath').text.strip() + + advert.experience = u'%s' % self.parser.select(content, + 'ul/li/div[@class="value"]/span[@itemprop="experienceRequirements"]', + 1, method='xpath').text.strip() + + advert.formation = u'%s' % self.parser.select(content, + 'ul/li/div[@class="value"]/span[@itemprop="qualifications"]', + 1, method='xpath').text.strip() + + advert.pay = u'%s' % self.parser.select(content, + 'ul/li/div[@class="value"]/span[@itemprop="baseSalary"]', + 1, method='xpath').text.strip() + return advert diff --git a/modules/popolemploi/test.py b/modules/popolemploi/test.py new file mode 100644 index 00000000..253bfe2f --- /dev/null +++ b/modules/popolemploi/test.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +class PopolemploiTest(BackendTest): + BACKEND = 'popolemploi' + + def test_popolemploi(self): + l = list(self.backend.search_job('infographiste')) + assert len(l) + advert = self.backend.get_job_advert(l[0].id, l[0]) + self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url))