diff --git a/modules/monster/__init__.py b/modules/monster/__init__.py new file mode 100644 index 00000000..ca0a467b --- /dev/null +++ b/modules/monster/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import MonsterBackend + + +__all__ = ['MonsterBackend'] diff --git a/modules/monster/backend.py b/modules/monster/backend.py new file mode 100644 index 00000000..2eb96bbd --- /dev/null +++ b/modules/monster/backend.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import BaseBackend, BackendConfig +from weboob.capabilities.job import ICapJob +from weboob.tools.value import Value +from weboob.tools.ordereddict import OrderedDict + +from .browser import MonsterBrowser +from .job import MonsterJobAdvert + +__all__ = ['MonsterBackend'] + + +class MonsterBackend(BaseBackend, ICapJob): + NAME = 'monster' + DESCRIPTION = u'monster website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '0.h' + + BROWSER = MonsterBrowser + + type_contrat_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ + '97': u'Interim ou CDD ou mission', + '98': u'CDI', + '99': u'Stage', + '000100': u'Autres', + '101': u'Indépendant/Freelance/Franchise', + '102': u'Journalier', + '103': u'Titulaire de la fonction publique', + '104': u'Temps Partiel', + '105': u'Temps Plein', + }.iteritems())]) + + JobCategory_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ + ' ': u'Choisir…', + '78': u'Architecture, Création et Spectacle', + '92': u'Autres', + '76': u'BTP et second oeuvre', + '95': u'Commercial / Vente', + '72': u'Comptabilité et Finance', + '80': u'Edition et Ecriture', + '81': u'Formation / Education', + '93': u'Gestion de projet / programme', + '83': u'Hôtellerie, Restauration et Tourisme', + '86': u'Informatique et Technologies', + '82': u'Ingénierie', + '85': u'Installation, Maintenance et Réparation', + '87': u'Juridique', + '88': u'Logistique, Approvisionnement et Transport', + '90': u'Marketing', + '89': u'Production et Opérations', + '94': u'Qualité / Inspection', + '75': u'Recherche et Analyses', + '84': u'Ressources Humaines', + '91': u'Santé', + '96': u'Sécurité', + '73': u'Services administratifs', + '79': u'Services clientèle et aux particuliers', + '77': u'Stratégie et Management', + }.iteritems())]) + + activityDomain_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ + ' ': u'Choisir…', + '16': u'Aéronautique / Aérospatiale (civil et militaire)', + '17': u'Agriculture / Sylviculture / Pêche / Chasse', + '39': u'Agroalimentaire', + '18': u'Architecture / Design et services associés', + '53': u'Art / Culture / Loisirs', + '51': u'Associations / Bénévolat', + '43': u'Assurance et Mutualité', + '23': u'Audiovisuel / Media / Diffusion Audio et Vidéo', + '14': u'Audit / Comptabilité / Fiscalité', + '20': u'Automobile - Vente, Maintenance et Réparations', + '52': u'Autres', + '24': u'Autres Services aux entreprises', + '21': u'Banques / Organismes financiers', + '32': u'Biens de consommation courante / Cosmétiques', + '31': u'BTP / Construction - bâtiments commerciaux, habitations', + '30': u'BTP / Construction - usines, infrastructures, TP', + '45': u'Cabinets et Services Juridiques', + '46': u'Cabinets conseils en Management et Stratégie', + '25': u'Chimie', + '67': u'Commerce de gros et Import/Export', + '55': u'Edition / Imprimerie', + '35': u'Energie et Eau', + '33': u'Enseignement et Formation', + '66': u'Gestion des déchêts et Recyclage', + '59': u'Grande Distribution et Commerce de détail', + '42': u'Hôtellerie', + '56': u'Immobilier', + '47': u'Industrie / Production, autres', + '19': u'Industrie Automobile - Constructeurs / Équipementiers', + '34': u'Industrie électronique', + '22': u'Industrie pharmaceutique / Biotechnologies', + '26': u'Industrie Textile, Cuir et Confection', + '27': u'Informatique - Hardware', + '29': u'Informatique - Services', + '28': u'Informatique - Software', + '36': u'Ingénierie et services associés', + '44': u'Internet / e-commerce', + '57': u'Location', + '48': u'Marine / Aéronautique', + '15': u'Marketing / Communication / Publicité / RP', + '50': u'Métaux et Minéraux', + '37': u'Parcs d attraction et salles de spectacles', + '62': u'Recrutement / Intérim et bureaux de placement', + '58': u'Restauration', + '41': u'Santé', + '49': u'Santé - Equipement et appareils', + '40': u'Secteur Public', + '60': u'Sécurité et Surveillance', + '54': u'Services aux particuliers', + '38': u'Services financiers', + '61': u'Sport - Equipements et infrastructures', + '63': u'Télécommunication', + '65': u'Tourisme, voyages et transport de personnes', + '64': u'Transport de marchandises, entreprosage, stockage', + }.iteritems())]) + + date_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ + '-1': u'N importe quelle date', + '000000': u'Aujourd hui', + '1': u'2 derniers jours', + '3': u'3 derniers jours', + '7': u'Les 7 derniers jours', + '14': u'Les 14 derniers jours', + '30': u'30 derniers jours', + }.iteritems())]) + + CONFIG = BackendConfig( + Value('job_name', label='Job name', masked=False, default=''), + Value('place', label='Place', masked=False, default=''), + Value('contract', label=u'Contract', choices=type_contrat_choices, default='000100'), + Value('job_category', label=u'Job Category', choices=JobCategory_choices, default=''), + Value('activity_domain', label=u'Activity Domain', choices=activityDomain_choices, default=''), + Value('limit_date', label=u'Date', choices=date_choices, default='-1'), + ) + + def search_job(self, pattern=None): + with self.browser: + for advert in self.browser.search_job(pattern): + yield advert + + def advanced_search_job(self): + with self.browser: + for advert in self.browser.advanced_search_job(job_name=self.config['job_name'].get(), + place=self.config['place'].get(), + contract=self.config['contract'].get(), + job_category=self.config['job_category'].get(), + activity_domain=self.config['activity_domain'].get(), + limit_date=self.config['limit_date'].get()): + yield advert + + def get_job_advert(self, _id, advert=None): + with self.browser: + return self.browser.get_job_advert(_id, advert) + + def fill_obj(self, advert, fields): + self.get_job_advert(advert.id, advert) + + OBJECTS = {MonsterJobAdvert: fill_obj} diff --git a/modules/monster/browser.py b/modules/monster/browser.py new file mode 100644 index 00000000..58c88708 --- /dev/null +++ b/modules/monster/browser.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +import urllib + +from weboob.tools.browser.decorators import id2url +from weboob.tools.browser import BaseBrowser + +from .pages import SearchPage, AdvertPage +from .job import MonsterJobAdvert + +__all__ = ['MonsterBrowser'] + + +class MonsterBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'offres.monster.fr' + ENCODING = 'utf-8' + + PAGES = { + '%s://%s/offres-d-emploi/\?q=(.*?)' % (PROTOCOL, DOMAIN): SearchPage, + '%s://%s/rechercher/(.*?)' % (PROTOCOL, DOMAIN): SearchPage, + 'http://offre-emploi.monster.fr/(.*?).aspx': AdvertPage, + } + + def search_job(self, pattern=None): + self.location('%s://%s/offres-d-emploi/?q=%s' + % (self.PROTOCOL, self.DOMAIN, urllib.quote_plus(pattern.encode(self.ENCODING)))) + assert self.is_on_page(SearchPage) + return self.page.iter_job_adverts() + + def advanced_search_job(self, job_name, place, contract, job_category, activity_domain, limit_date): + self.location( + '%s://%s/PowerSearch.aspx?q=%s&where=%s&jt=%s&occ=%s&tm=%s&indid=%s' % (self.PROTOCOL, + self.DOMAIN, + urllib.quote( + job_name.encode(self.ENCODING)), + place, + contract, + job_category, + limit_date, + activity_domain)) + assert self.is_on_page(SearchPage) + return self.page.iter_job_adverts() + + @id2url(MonsterJobAdvert.id2url) + def get_job_advert(self, url, advert): + self.location(url) + assert self.is_on_page(AdvertPage) + return self.page.get_job_advert(url, advert) diff --git a/modules/monster/job.py b/modules/monster/job.py new file mode 100644 index 00000000..4fe2fad7 --- /dev/null +++ b/modules/monster/job.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.job import BaseJobAdvert + + +class MonsterJobAdvert(BaseJobAdvert): + @classmethod + def id2url(cls, _id): + return 'http://offre-emploi.monster.fr/%s.aspx' % _id diff --git a/modules/monster/pages.py b/modules/monster/pages.py new file mode 100644 index 00000000..8ef412c8 --- /dev/null +++ b/modules/monster/pages.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BasePage +from weboob.tools.misc import html2text +import re +from datetime import datetime, time, timedelta +from .job import MonsterJobAdvert + +__all__ = ['SearchPage', 'AdvertPage'] + + +class SearchPage(BasePage): + def iter_job_adverts(self): + re_id = re.compile('http://offre-emploi.monster.fr/(.*?).aspx', re.DOTALL) + trs = self.document.getroot().xpath("//table[@class='listingsTable']/tbody/tr") + for tr in trs: + if 'class' in tr.attrib and tr.attrib['class'] != 'aceHidden': + a = self.parser.select(tr, 'td/div/div[@class="jobTitleContainer"]/a', 1, method='xpath') + _id = u'%s' % re_id.search(a.attrib['href']).group(1) + advert = MonsterJobAdvert(_id) + advert.society_name = u'%s' % self.parser.select(tr, 'td/div/div[@class="companyContainer"]/div/a', + 1, method='xpath').attrib['title'] + advert.title = u'%s' % a.text + + date = self.parser.select(tr, 'td/div/div[@class="fnt20"]', 1, method='xpath').text_content().strip() + now = datetime.now() + number = re.search("\d+", date) + if number: + if 'heures' in date: + date = now - timedelta(hours=int(number.group(0))) + advert.publication_date = datetime.combine(date, time()) + elif 'jour' in date: + date = now - timedelta(days=int(number.group(0))) + advert.publication_date = datetime.combine(date, time()) + else: + advert.publication_date = datetime.combine(now, time.min) + + place = self.parser.select(tr, 'td/div/div[@class="jobLocationSingleLine"]/a', method='xpath') + if len(place) != 0: + advert.place = u'%s' % place[0].attrib['title'] + + yield advert + + +class AdvertPage(BasePage): + def get_job_advert(self, url, advert): + re_id = re.compile('http://offre-emploi.monster.fr/(.*?).aspx', re.DOTALL) + if advert is None: + _id = u'%s' % re_id.search(url).group(1) + advert = MonsterJobAdvert(_id) + + div = self.document.getroot().xpath('//div[@id="jobcopy"]')[0] + advert.title = u'%s' % self.parser.select(div, 'h1', 1, method='xpath').text + description = self.parser.select(div, 'div[@id="jobBodyContent"]', 1, method='xpath') + advert.description = html2text(self.parser.tostring(description)) + + jobsummary = self.document.getroot().xpath('//div[@id="jobsummary_content"]')[0] + + society_name = self.parser.select(jobsummary, 'dl/dd/span[@itemprop="name"]', method='xpath') + if len(society_name) != 0: + advert.society_name = u'%s' % society_name[0].text + + place = self.parser.select(jobsummary, 'dl/dd/span[@itemprop="jobLocation"]', method='xpath') + if len(place) != 0: + advert.place = u'%s' % place[0].text + + contract_type = self.parser.select(jobsummary, 'dl/dd[@class="multipleddlast"]/span', method='xpath') + if len(contract_type) != 0: + advert.contract_type = u'%s' % contract_type[0].text + + pay = self.parser.select(jobsummary, 'dl/dd/span[@itemprop="baseSalary"]', method='xpath') + if len(pay) != 0: + advert.pay = u'%s' % pay[0].text + + formation = self.parser.select(jobsummary, 'dl/dd/span[@itemprop="educationRequirements"]', method='xpath') + if len(formation) != 0: + advert.formation = u'%s' % formation[0].text + + advert.experience = u'%s' % self.parser.select(jobsummary, 'dl/dd/span[@itemprop="qualifications"]', 1, method='xpath').text + advert.url = url + return advert diff --git a/modules/monster/test.py b/modules/monster/test.py new file mode 100644 index 00000000..f4f4ec21 --- /dev/null +++ b/modules/monster/test.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +class MonsterTest(BackendTest): + BACKEND = 'monster' + + def test_monster_search(self): + l = list(self.backend.search_job(u'marketing')) + assert len(l) + advert = self.backend.get_job_advert(l[0].id, None) + self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url)) + + def test_monster_advanced_search(self): + l = list(self.backend.advanced_search_job()) + assert len(l) + advert = self.backend.get_job_advert(l[0].id, None) + self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url))