From 6b5f55f9ad59453c0c65f044ac41caf44020b778 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Tue, 18 Jun 2013 12:51:43 +0200 Subject: [PATCH] creation of Lolix : module that uses ICapJob in order to find adverts on lolix website --- modules/lolix/__init__.py | 24 ++++++++++ modules/lolix/backend.py | 47 ++++++++++++++++++++ modules/lolix/browser.py | 47 ++++++++++++++++++++ modules/lolix/job.py | 26 +++++++++++ modules/lolix/pages.py | 94 +++++++++++++++++++++++++++++++++++++++ modules/lolix/test.py | 32 +++++++++++++ 6 files changed, 270 insertions(+) create mode 100644 modules/lolix/__init__.py create mode 100644 modules/lolix/backend.py create mode 100644 modules/lolix/browser.py create mode 100644 modules/lolix/job.py create mode 100644 modules/lolix/pages.py create mode 100644 modules/lolix/test.py diff --git a/modules/lolix/__init__.py b/modules/lolix/__init__.py new file mode 100644 index 00000000..d027c5d8 --- /dev/null +++ b/modules/lolix/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import LolixBackend + + +__all__ = ['LolixBackend'] diff --git a/modules/lolix/backend.py b/modules/lolix/backend.py new file mode 100644 index 00000000..9b624dc3 --- /dev/null +++ b/modules/lolix/backend.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.backend import BaseBackend #, BackendConfig +# from weboob.tools.value import Value + +from .browser import LolixBrowser + +from weboob.capabilities.job import ICapJob + + +__all__ = ['LolixBackend'] + + +class LolixBackend(BaseBackend, ICapJob): + NAME = 'lolix' + DESCRIPTION = u'Lolix est un centre de compétences spécialisé dans les technologies à base de Logiciel Libre.' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + VERSION = '0.g' + + BROWSER = LolixBrowser + + def search_job(self, pattern=None): + with self.browser: + for advert in self.browser.search_job(): + yield advert + + def get_job_advert(self, _id, advert): + with self.browser: + return self.browser.get_job_advert(_id, advert) diff --git a/modules/lolix/browser.py b/modules/lolix/browser.py new file mode 100644 index 00000000..c46893d9 --- /dev/null +++ b/modules/lolix/browser.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser.decorators import id2url +from weboob.tools.browser import BaseBrowser +from .job import LolixJobAdvert +from .pages import SearchPage, AdvertPage + +__all__ = ['LolixBrowser'] + + +class LolixBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'fr.lolix.org/search/offre' + ENCODING = 'iso-8859-1' + + PAGES = { + '%s://%s/date.php' % (PROTOCOL, DOMAIN): SearchPage, + '%s://%s/offre.php\?id=(?P.+)' % (PROTOCOL, DOMAIN): AdvertPage, + } + + def search_job(self): + self.location('%s://%s/date.php' % (self.PROTOCOL, self.DOMAIN)) + assert self.is_on_page(SearchPage) + return self.page.iter_job_adverts() + + @id2url(LolixJobAdvert.id2url) + def get_job_advert(self, url, advert): + self.location(url) + assert self.is_on_page(AdvertPage) + return self.page.get_job_advert(url, advert) diff --git a/modules/lolix/job.py b/modules/lolix/job.py new file mode 100644 index 00000000..7812a012 --- /dev/null +++ b/modules/lolix/job.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.job import BaseJobAdvert + + +class LolixJobAdvert(BaseJobAdvert): + @classmethod + def id2url(cls, _id): + return 'http://fr.lolix.org/search/offre/offre.php?id=%s' % _id diff --git a/modules/lolix/pages.py b/modules/lolix/pages.py new file mode 100644 index 00000000..96bcb41a --- /dev/null +++ b/modules/lolix/pages.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BasePage +import dateutil.parser +import re + +from .job import LolixJobAdvert + +__all__ = ['SearchPage', 'AdvertPage'] + + +class AdvertPage(BasePage): + def get_job_advert(self, url, advert): + tables = self.document.getroot().xpath('//td[@class="Contenu"]/table') + rows = self.parser.select(tables[2], 'tr') + + if not advert: + advert = LolixJobAdvert(self.group_dict['id']) + + advert.url = url + advert.society_name = u'%s' % self.parser.select(tables[3], 'tr/td/a', 1, method='xpath').text + return self.fill_job_advert(rows, advert) + + def fill_job_advert(self, rows, advert): + advert.title = u'%s' % self.parser.select(rows[0], 'td', 1).text_content() + isDescription = False + for row in rows: + cols = self.parser.select(row, 'td') + if isDescription: + advert.description = u'%s' % cols[0].text_content() + isDescription = False + + elif cols[0].text == u'Poste :': + advert.job_name = u'%s' % cols[1].text_content() + + elif cols[0].text == u'Contrat :': + advert.contract_type = u'%s' % cols[1].text_content() + + elif cols[0].text and cols[0].text.find(u'Rémunération :') != -1: + advert.pay = u'%s' % cols[1].text_content() + + elif cols[0].text and cols[0].text.find(u'Région :') != -1: + advert.place = u'%s' % cols[1].text_content() + + elif cols[0].text == u'Détails :': + isDescription = True + + #else: + # print cols[0].text + return advert + + +class SearchPage(BasePage): + def iter_job_adverts(self): + adverts = [] + rows = self.document.getroot().xpath('//td[@class="Contenu"]/table/tr') + for row in rows: + cols = self.is_row_advert(row) + if cols is not None: + adverts.append(self.create_job_advert(cols)) + return adverts + + def is_row_advert(self, row): + cols = self.parser.select(row, 'td') + d = dict(cols[1].attrib) + if 'class' in d.keys(): + if 'ListeDark' == d['class'] or 'ListeLight' == d['class']: + return cols + + def create_job_advert(self, cols): + a = self.parser.select(cols[3], 'a')[0] + advert = LolixJobAdvert(re.match(r'offre.php\?id=(.*)', a.attrib['href']).group(1)) + advert.publication_date = dateutil.parser.parse(cols[1].text).date() + advert.society_name = u'%s' % self.parser.select(cols[2], 'a')[0].text + advert.title = u'%s' % a.text + return advert diff --git a/modules/lolix/test.py b/modules/lolix/test.py new file mode 100644 index 00000000..8189b7a9 --- /dev/null +++ b/modules/lolix/test.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +class LolixTest(BackendTest): + BACKEND = 'lolix' + + def test_lolix(self): + l = list(self.backend.search_job()) + assert len(l) + advert = self.backend.get_job_advert(l[0].id, l[0]) + print advert.__repr__() + self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url))