# -*- coding: utf-8 -*- # Copyright(C) 2013 Bezleputh # # This file is part of weboob. # # weboob is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # weboob is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . from weboob.tools.browser import BasePage from .job import AdeccoJobAdvert from datetime import datetime import locale import re __all__ = ['SearchPage', 'AdvertPage'] class SearchPage(BasePage): def iter_job_adverts(self): locale.setlocale(locale.LC_TIME, 'fr_FR.UTF-8') re_id = re.compile('http://www.adecco.fr/trouver-un-emploi/Pages/Details-de-l-Offre/(.*?)/(.*?).aspx\?IOF=(.*?)$', re.DOTALL) adverts = [] divs = self.document.getroot().xpath("//div[@class='resultContain right']") + self.document.getroot().xpath("//div[@class='resultContain left']") for div in divs: a = self.parser.select(div, 'div/a', 1, method='xpath').attrib['href'] if re_id.match(a): _id = u'%s/%s/%s' % (re_id.search(a).group(1), re_id.search(a).group(2), re_id.search(a).group(3)) advert = AdeccoJobAdvert(_id) date = u'%s' % self.parser.select(div, "div/span[@class='offreDatePublication']", 1, method='xpath').text advert.publication_date = datetime.strptime(date, "%d %B %Y").date() advert.title = u'%s' % self.parser.select(div, "div/h3/a", 1, method='xpath').text_content() advert.place = u'%s' % self.parser.select(div, "div/h3/span[@class='offreLocalisation']", 1, method='xpath').text adverts.append(advert) return adverts class AdvertPage(BasePage): def get_job_advert(self, url, advert): re_id = re.compile('http://www.adecco.fr/trouver-un-emploi/Pages/Details-de-l-Offre/(.*?)/(.*?).aspx\?IOF=(.*?)$', re.DOTALL) if advert is None: _id = u'%s/%s/%s' % (re_id.search(url).group(1), re_id.search(url).group(2), re_id.search(url).group(3)) advert = AdeccoJobAdvert(_id) advert.contract_type = re_id.search(url).group(1) div = self.document.getroot().xpath("//div[@class='contain_MoreResults']")[0] date = u'%s' % self.parser.select(div, "div[@class='dateResult']", 1, method='xpath').text.strip() advert.publication_date = datetime.strptime(date, "%d %B %Y").date() title = self.parser.select(div, "h1", 1, method='xpath').text_content().strip() town = self.parser.select(div, "h1/span[@class='town']", 1, method='xpath').text_content() page_title = self.parser.select(div, "h1/span[@class='pageTitle']", 1, method='xpath').text_content() advert.title = u'%s' % title.replace(town, '').replace(page_title, '') spans = self.document.getroot().xpath("//div[@class='jobGreyContain']/table/tr/td/span[@class='value']") advert.job_name = u'%s' % spans[0].text advert.place = u'%s' % spans[1].text advert.pay = u'%s' % spans[2].text advert.contract_type = u'%s' % spans[3].text advert.url = url advert.description = self.document.getroot().xpath("//div[@class='descriptionContainer']/p")[0].text_content() return advert