From 5b9f40dfea5a3f5ea93d3a3f5431b135a37de740 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Tue, 18 Jun 2013 12:52:57 +0200 Subject: [PATCH] creation of adecco : module that uses ICapJob in order to find adverts on adecco website --- modules/adecco/__init__.py | 24 +++++++++++ modules/adecco/backend.py | 45 +++++++++++++++++++++ modules/adecco/browser.py | 51 ++++++++++++++++++++++++ modules/adecco/job.py | 27 +++++++++++++ modules/adecco/pages.py | 81 ++++++++++++++++++++++++++++++++++++++ modules/adecco/test.py | 32 +++++++++++++++ 6 files changed, 260 insertions(+) create mode 100644 modules/adecco/__init__.py create mode 100644 modules/adecco/backend.py create mode 100644 modules/adecco/browser.py create mode 100644 modules/adecco/job.py create mode 100644 modules/adecco/pages.py create mode 100644 modules/adecco/test.py diff --git a/modules/adecco/__init__.py b/modules/adecco/__init__.py new file mode 100644 index 00000000..70326c84 --- /dev/null +++ b/modules/adecco/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import AdeccoBackend + + +__all__ = ['AdeccoBackend'] diff --git a/modules/adecco/backend.py b/modules/adecco/backend.py new file mode 100644 index 00000000..ed66507f --- /dev/null +++ b/modules/adecco/backend.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import BaseBackend +from weboob.capabilities.job import ICapJob +from .browser import AdeccoBrowser + + +__all__ = ['AdeccoBackend'] + + +class AdeccoBackend(BaseBackend, ICapJob): + NAME = 'adecco' + DESCRIPTION = u'adecco website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + VERSION = '0.g' + + BROWSER = AdeccoBrowser + + def search_job(self, pattern=None): + with self.browser: + for advert in self.browser.search_job(pattern): + yield advert + + def get_job_advert(self, _id, advert): + with self.browser: + return self.browser.get_job_advert(_id, advert) diff --git a/modules/adecco/browser.py b/modules/adecco/browser.py new file mode 100644 index 00000000..c4ceb7f6 --- /dev/null +++ b/modules/adecco/browser.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser.decorators import id2url +from weboob.tools.browser import BaseBrowser +from .job import AdeccoJobAdvert +from .pages import SearchPage, AdvertPage + + +__all__ = ['AdeccoBrowser'] + + +class AdeccoBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'www.adecco.fr' + ENCODING = None + + PAGES = { + '%s://%s/trouver-un-emploi/Pages/Offres-d-emploi.aspx\?keywords=(.*?)' % (PROTOCOL, DOMAIN): SearchPage, + '%s://%s/trouver-un-emploi/Pages/Details-de-l-Offre/(.*?)/(.*?).aspx\?IOF=(.*?)?$' % (PROTOCOL, DOMAIN): AdvertPage, + } + + def search_job(self, pattern): + if pattern is not None: + self.location('%s://%s/trouver-un-emploi/Pages/Offres-d-emploi.aspx?keywords=%s' % (self.PROTOCOL, self.DOMAIN, pattern.replace(' ','+'))) + assert self.is_on_page(SearchPage) + return self.page.iter_job_adverts() + else: + return [] + + @id2url(AdeccoJobAdvert.id2url) + def get_job_advert(self, url, advert): + self.location(url) + assert self.is_on_page(AdvertPage) + return self.page.get_job_advert(url, advert) diff --git a/modules/adecco/job.py b/modules/adecco/job.py new file mode 100644 index 00000000..a05d0ba0 --- /dev/null +++ b/modules/adecco/job.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.job import BaseJobAdvert + + +class AdeccoJobAdvert(BaseJobAdvert): + @classmethod + def id2url(cls, _id): + splitted_id = _id.split('/') + return 'http://www.adecco.fr/trouver-un-emploi/Pages/Details-de-l-Offre/%s/%s.aspx?IOF=%s' % (splitted_id[0], splitted_id[1], splitted_id[2]) diff --git a/modules/adecco/pages.py b/modules/adecco/pages.py new file mode 100644 index 00000000..a6d9b84f --- /dev/null +++ b/modules/adecco/pages.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BasePage +from .job import AdeccoJobAdvert +from datetime import datetime +import locale +import re + +__all__ = ['SearchPage', 'AdvertPage'] + + +class SearchPage(BasePage): + def iter_job_adverts(self): + locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8') + re_id = re.compile('http://www.adecco.fr/trouver-un-emploi/Pages/Details-de-l-Offre/(.*?)/(.*?).aspx\?IOF=(.*?)?$', re.DOTALL) + + adverts = [] + + divs = self.document.getroot().xpath("//div[@class='resultContain right']") + self.document.getroot().xpath("//div[@class='resultContain left']") + + for div in divs: + + a = self.parser.select(div, 'div/a', 1, method='xpath').attrib['href'] + if re_id.match(a): + + _id = u'%s/%s/%s' % (re_id.search(a).group(1), re_id.search(a).group(2), re_id.search(a).group(3)) + + advert = AdeccoJobAdvert(_id) + + date = u'%s' % self.parser.select(div, "div/span[@class='offreDatePublication']", 1, method='xpath').text + advert.publication_date = datetime.strptime(date, "%d %B %Y").date() + advert.title = u'%s' % self.parser.select(div, "div/h3/a", 1, method='xpath').text_content() + advert.place = u'%s' % self.parser.select(div, "div/h3/span[@class='offreLocalisation']", 1, method='xpath').text + adverts.append(advert) + return adverts + + +class AdvertPage(BasePage): + def get_job_advert(self, url, advert): + re_id = re.compile('http://www.adecco.fr/trouver-un-emploi/Pages/Details-de-l-Offre/(.*?)/(.*?).aspx\?IOF=(.*?)?$', re.DOTALL) + if advert is None: + _id = u'%s/%s/%s' % (re_id.search(url).group(1), re_id.search(url).group(2), re_id.search(url).group(3)) + advert = AdeccoJobAdvert(_id) + + advert.contract_type = re_id.search(url).group(1) + div = self.document.getroot().xpath("//div[@class='contain_MoreResults']")[0] + + date = u'%s' % self.parser.select(div, "div[@class='dateResult']", 1, method='xpath').text.strip() + advert.publication_date = datetime.strptime(date, "%d %B %Y").date() + + title = self.parser.select(div, "h1", 1, method='xpath').text_content().strip() + town = self.parser.select(div, "h1/span[@class='town']", 1, method='xpath').text_content() + page_title = self.parser.select(div, "h1/span[@class='pageTitle']", 1, method='xpath').text_content() + advert.title = u'%s' % title.replace(town, '').replace(page_title, '') + + spans = self.document.getroot().xpath("//div[@class='jobGreyContain']/table/tr/td/span[@class='value']") + advert.job_name = u'%s' % spans[0].text + advert.place = u'%s' % spans[1].text + advert.pay = u'%s' % spans[2].text + advert.contract_type = u'%s' % spans[3].text + advert.url = url + advert.description = self.document.getroot().xpath("//div[@class='descriptionContainer']/p")[0].text_content() + return advert diff --git a/modules/adecco/test.py b/modules/adecco/test.py new file mode 100644 index 00000000..94710a73 --- /dev/null +++ b/modules/adecco/test.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +class AdeccoTest(BackendTest): + BACKEND = 'adecco' + + def test_adecco(self): + l = list(self.backend.search_job(u'valet de chambre')) + assert len(l) + advert = self.backend.get_job_advert(l[0].id, None) + print advert.__repr__() + self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url))