[apec] adapt to the new version of the website and use browser2

This commit is contained in:
Bezleputh 2015-07-17 17:10:33 +02:00
commit 184bd6869a
3 changed files with 329 additions and 171 deletions

View file

@ -17,50 +17,97 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.deprecated.browser.decorators import id2url from weboob.browser.profiles import Profile
from weboob.deprecated.browser import Browser from weboob.browser import PagesBrowser, URL
import urllib from .pages import IdsPage, OffrePage
from .pages import SearchPage, AdvertPage
from .job import ApecJobAdvert
__all__ = ['ApecBrowser'] __all__ = ['ApecBrowser']
class ApecBrowser(Browser): class JsonProfile(Profile):
PROTOCOL = 'https' def setup_session(self, session):
DOMAIN = 'www.apec.fr' session.headers["Content-Type"] = "application/json"
ENCODING = 'ISO-8859-1'
PAGES = {
'https://cadres.apec.fr/liste-offres-emploi-cadres/71____(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)___offre-d-emploi.html': SearchPage, class ApecBrowser(PagesBrowser):
'https://cadres.apec.fr/MesOffres/RechercheOffres/ApecRechercheOffre.jsp\?keywords=(.*?)': SearchPage, BASEURL = 'https://cadres.apec.fr'
'https://cadres.apec.fr/offres-emploi-cadres/offres-emploi-cadres/\d*_\d*_\d*_(.*?)________(.*?).html(.*?)': AdvertPage, PROFILE = JsonProfile()
}
start = 0
json_count = URL('/cms/webservices/rechercheOffre/count', IdsPage)
json_ids = URL('/cms/webservices/rechercheOffre/ids', IdsPage)
json_offre = URL('/cms/webservices/offre/public\?numeroOffre=(?P<_id>.*)', OffrePage)
def create_parameters(self, pattern='', fonctions='[]', lieux='[]', secteursActivite='[]', typesContrat='[]', typesConvention='[]', niveauxExperience='[]', salaire_min='', salaire_max='', date_publication='', start=0, range=20):
if date_publication:
date_publication = ',"anciennetePublication":%s' % (date_publication)
if salaire_max:
salaire_max = ',"salaireMaximum":%s' % (salaire_max)
if salaire_min:
salaire_min = ',"salaireMinimum":%s' % (salaire_min)
return '{"activeFiltre":true,"motsCles":"%s","fonctions":%s,"lieux":%s,"secteursActivite":%s,"typesContrat":%s,"typesConvention":%s,"niveauxExperience":%s%s%s%s,"sorts":[{"type":"SCORE","direction":"DESCENDING"}],"pagination":{"startIndex":%s,"range":%s},"typeClient":"CADRE"}' % (pattern, fonctions, lieux, secteursActivite, typesContrat, typesConvention, niveauxExperience, salaire_min, salaire_max, date_publication, start, range)
def search_job(self, pattern=None): def search_job(self, pattern=None):
self.location('https://cadres.apec.fr/MesOffres/RechercheOffres/ApecRechercheOffre.jsp?keywords=%s' data = self.create_parameters(pattern=pattern)
% urllib.quote_plus(pattern.encode(self.ENCODING))) count = self.json_count.go(data=data).get_adverts_number()
assert self.is_on_page(SearchPage) self.start = 0
return self.page.iter_job_adverts() if count:
ids = self.json_ids.go(data=data).iter_job_adverts(pattern=pattern,
fonctions='[]',
lieux='[]',
secteursActivite='[]',
typesContrat='[]',
typesConvention='[]',
niveauxExperience='[]',
salaire_min='',
salaire_max='',
date_publication='',
start=self.start,
count=count,
range=20)
for _id in ids:
yield self.json_offre.go(_id=_id.id).get_job_advert()
def advanced_search_job(self, region=None, fonction=None, secteur=None, salaire=None, contrat=None, limit_date=None, level=None): def get_job_advert(self, _id, advert=None):
self.location( return self.json_offre.go(_id=_id).get_job_advert(obj=advert)
'https://cadres.apec.fr/liste-offres-emploi-cadres/71____%s_%s_%s_%s_%s_%s_%s___offre-d-emploi.html'
% (
region,
fonction,
secteur,
salaire,
level,
limit_date,
contrat
))
assert self.is_on_page(SearchPage)
return self.page.iter_job_adverts()
@id2url(ApecJobAdvert.id2url) def advanced_search_job(self, region='', fonction='', secteur='', salaire='', contrat='', limit_date='', level=''):
def get_job_advert(self, url, advert): salaire_max = ''
self.location(url) salaire_min = ''
assert self.is_on_page(AdvertPage)
return self.page.get_job_advert(url, advert) if salaire:
s = salaire.split('|')
salaire_max = s[1]
salaire_min = s[0]
data = self.create_parameters(fonctions='[%s]' % fonction,
lieux='[%s]' % region,
secteursActivite='[%s]' % secteur,
typesContrat='[%s]' % contrat,
niveauxExperience='[%s]' % level,
salaire_min=salaire_min,
salaire_max=salaire_max,
date_publication=limit_date)
count = self.json_count.go(data=data).get_adverts_number()
self.start
if count:
ids = self.json_ids.go(data=data).iter_job_adverts(pattern='',
fonctions='[%s]' % fonction,
lieux='[%s]' % region,
secteursActivite='[%s]' % secteur,
typesContrat='[%s]' % contrat,
niveauxExperience='[%s]' % level,
salaire_min=salaire_min,
salaire_max=salaire_max,
date_publication=limit_date,
start=self.start,
count=count,
range=20)
for _id in ids:
yield self.json_offre.go(_id=_id).get_job_advert()

View file

@ -17,13 +17,12 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.job import BaseJobAdvert
from weboob.tools.backend import Module, BackendConfig from weboob.tools.backend import Module, BackendConfig
from weboob.capabilities.job import CapJob from weboob.capabilities.job import CapJob
from weboob.tools.ordereddict import OrderedDict from weboob.tools.ordereddict import OrderedDict
from weboob.tools.value import Value from weboob.tools.value import Value
from .browser import ApecBrowser from .browser import ApecBrowser
from .job import ApecJobAdvert
__all__ = ['ApecModule'] __all__ = ['ApecModule']
@ -38,63 +37,174 @@ class ApecModule(Module, CapJob):
BROWSER = ApecBrowser BROWSER = ApecBrowser
places_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ places_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({
'00|': u'-- Indifférent --', '001|99700': u'UE Hors France',
'01|700': u'Alsace', '002|99126': u'..Grèce',
'02|701': u'Aquitaine', '003|99132': u'..Royaume Uni',
'03|702': u'Auvergne', '004|99134': u'..Espagne',
'04|703': u'Basse-Normandie', '005|99136': u'..Irlande',
'05|704': u'Bourgogne', '006|99139': u'..Portugal',
'06|705': u'Bretagne', '007|99254': u'..Chypre',
'07|706': u'Centre', '008|99127': u'..Italie',
'08|707': u'Champagne', '009|99131': u'..Belgique',
'09|20': u'Corse', '010|99135': u'..Pays Bas',
'10|99712': u'France Outre-Mer', '011|99137': u'..Luxembourg',
'11|709': u'Franche-Comté', '012|99144': u'..Malte',
'12|710': u'Haute-Normandie', '013|99145': u'..Slovénie',
'13|711': u'Ile-de-France', '014|99101': u'..Danemark',
'14|712': u'Languedoc-Roussillon', '015|99104': u'..Suède',
'15|713': u'Limousin', '016|99105': u'..Finlande',
'16|714': u'Lorraine', '017|99106': u'..Estonie',
'17|715': u'Midi-Pyrénées', '018|99107': u'..Lettonie',
'18|716': u'Nord-Pas-de-Calais', '019|99108': u'..Lituanie',
'19|720': u'PACA', '020|99109': u'..Allemagne',
'20|717': u'Pays de La Loire', '021|99110': u'..Autriche',
'21|718': u'Picardie', '022|99111': u'..Bulgarie',
'22|719': u'Poitou-Charentes', '023|99112': u'..Hongrie',
'23|721': u'Rhône-Alpes', '024|99114': u'..Roumanie',
'24|99109': u'Allemagne', '025|99116': u'..République Tchèque',
'25|99106': u'Estonie', '026|99117': u'..Slovaquie',
'26|99108': u'Lituanie', '027|99119': u'..Croatie',
'27|99116': u'République Tchèque', '028|99122': u'..Pologne',
'28|99110': u'Autriche', '029|799': u'France',
'29|99105': u'Finlande', '030|711': u'..Ile-de-France',
'30|99137': u'Luxembourg', '031|75': u'....Paris',
'31|99114': u'Roumanie', '032|77': u'....Seine-et-Marne',
'32|99131': u'Belgique', '033|78': u'....Yvelines',
'33|99126': u'Grèce', '034|91': u'....Essonne',
'34|99144': u'Malte', '035|92': u'....Hauts-de-Seine',
'35|99132': u'Royaume Uni', '036|93': u'....Seine-Saint-Denis',
'36|99111': u'Bulgarie', '037|94': u'....Val-de-Marne',
'37|99112': u'Hongrie', '038|95': u'....Val-d\'Oise',
'38|99135': u'Pays Bas', '039|703': u'..Basse-Normandie',
'39|99117': u'Slovaquie', '040|14': u'....Calvados',
'40|99254': u'Chypre', '041|50': u'....Manche',
'41|99136': u'Irlande', '042|61': u'....Orne',
'42|99122': u'Pologne', '043|705': u'..Bretagne',
'43|99145': u'Slovénie', '044|22': u'....Côtes d\'Armor',
'44|99101': u'Danemark', '045|29': u'....Finistère',
'45|99127': u'Italie', '046|35': u'....Ille-et-Vilaine',
'46|99139': u'Portugal', '047|56': u'....Morbihan',
'47|99104': u'Suède', '048|706': u'..Centre',
'48|99134': u'Espagne', '049|18': u'....Cher',
'49|99107': u'Lettonie', '050|28': u'....Eure-et-Loir',
'50|99700': u'UE Hors France', '051|36': u'....Indre',
'51|99702': u'Amérique du Nord', '052|37': u'....Indre-et-Loire',
'52|99715': u'Afrique', '053|41': u'....Loir-et-Cher',
'53|99711': u'Océanie', '054|45': u'....Loiret',
'54|99701': u'Europe Hors UE', '055|710': u'..Haute-Normandie',
'55|99714': u'Amérique Latine', '056|27': u'....Eure',
'56|99716': u'Asie', '057|76': u'....Seine-Maritime',
'058|717': u'..Pays de La Loire',
'059|44': u'....Loire-Atlantique',
'060|49': u'....Maine-et-Loire',
'061|53': u'....Mayenne',
'062|72': u'....Sarthe',
'063|85': u'....Vendée',
'064|700': u'..Alsace',
'065|67': u'....Bas-Rhin',
'066|68': u'....Haut-Rhin',
'067|704': u'..Bourgogne',
'068|21': u'....Côte d\'Or',
'069|58': u'....Nièvre',
'070|71': u'....Saône-et-Loire',
'071|89': u'....Yonne',
'072|707': u'..Champagne',
'073|8': u'....Ardennes',
'074|10': u'....Aube',
'075|51': u'....Marne',
'076|52': u'....Haute-Marne',
'077|709': u'..Franche-Comté',
'078|25': u'....Doubs',
'079|39': u'....Jura',
'080|70': u'....Haute-Saône',
'081|90': u'....Territoire de Belfort',
'082|714': u'..Lorraine',
'083|54': u'....Meurthe-et-Moselle',
'084|55': u'....Meuse',
'085|57': u'....Moselle',
'086|88': u'....Vosges',
'087|716': u'..Nord-Pas-de-Calais',
'088|59': u'....Nord',
'089|62': u'....Pas-de-Calais',
'090|718': u'..Picardie',
'091|2': u'....Aisne',
'092|60': u'....Oise',
'093|80': u'....Somme',
'094|20': u'..Corse',
'095|750': u'....Corse du Sud',
'096|751': u'....Haute-Corse',
'097|702': u'..Auvergne',
'098|3': u'....Allier',
'099|15': u'....Cantal',
'100|43': u'....Haute-Loire',
'101|63': u'....Puy-de-Dôme',
'102|720': u'..PACA',
'103|4': u'....Alpes-de-Haute-Provence',
'104|5': u'....Hautes-Alpes',
'105|6': u'....Alpes-Maritimes',
'106|13': u'....Bouches-du-Rhône',
'107|83': u'....Var',
'108|84': u'....Vaucluse',
'109|721': u'..Rhône-Alpes',
'110|1': u'....Ain',
'111|7': u'....Ardèche',
'112|26': u'....Drôme',
'113|38': u'....Isère',
'114|42': u'....Loire',
'115|69': u'....Rhône',
'116|73': u'....Savoie',
'117|74': u'....Haute-Savoie',
'118|701': u'..Aquitaine',
'119|24': u'....Dordogne',
'120|33': u'....Gironde',
'121|40': u'....Landes',
'122|47': u'....Lot-et-Garonne',
'123|64': u'....Pyrénées-Atlantiques',
'124|712': u'..Languedoc-Roussillon',
'125|11': u'....Aude',
'126|30': u'....Gard',
'127|34': u'....Hérault',
'128|48': u'....Lozère',
'129|66': u'....Pyrénées-Orientales',
'130|713': u'..Limousin',
'131|19': u'....Corrèze',
'132|23': u'....Creuse',
'133|87': u'....Haute-Vienne',
'134|715': u'..Midi-Pyrénées',
'135|9': u'....Ariège',
'136|12': u'....Aveyron',
'137|31': u'....Haute-Garonne',
'138|32': u'....Gers',
'139|46': u'....Lot',
'140|65': u'....Hautes-Pyrénées',
'141|81': u'....Tarn',
'142|82': u'....Tarn-et-Garonne',
'143|719': u'..Poitou-Charentes',
'144|16': u'....Charente',
'145|17': u'....Charente-Maritime',
'146|79': u'....Deux-Sèvres',
'147|86': u'....Vienne',
'148|99712': u'..France Outre-Mer',
'149|99519': u'....Terres Australes et Antarctiques Françaises',
'150|97100': u'....Guadeloupe',
'151|97200': u'....Martinique',
'152|97300': u'....Guyane',
'153|97400': u'....La Réunion',
'154|97500': u'....Saint-Pierre-et-Miquelon',
'155|97600': u'....Mayotte',
'156|98300': u'....Polynésie Française',
'157|98600': u'....Wallis et Futuna',
'158|98800': u'....Nouvelle Calédonie',
'159|97800': u'....Saint-Martin',
'160|97700': u'....Saint-Barthélémy',
'161|102099': u'International',
'162|99715': u'..Afrique',
'163|99716': u'..Asie',
'164|99700': u'..UE Hors France',
'165|99701': u'..Europe Hors UE',
'166|99702': u'..Amérique du Nord',
'167|99711': u'..Océanie',
'168|99714': u'..Amérique Latine',
}.iteritems())]) }.iteritems())])
fonction_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ fonction_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({
@ -191,18 +301,18 @@ class ApecModule(Module, CapJob):
type_contrat_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ type_contrat_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({
' ': u'-- Indifférent --', ' ': u'-- Indifférent --',
'143694': u'CDI', '101888': u'CDI',
'143695': u'CDD', '101887': u'CDD',
'143696': u'Travail Temporaire', '101889': u'Interim',
}.iteritems())]) }.iteritems())])
salary_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ salary_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({
' ': u'-- Indifférent --', ' ': u'-- Indifférent --',
'101839': u'Moins de 35 K€', '0|35': u'Moins de 35 K€',
'101840': u'Entre 35 et 49 K€', '35|50': u'Entre 35 et 49 K€',
'101841': u'Entre 50 et 69 K€', '50|70': u'Entre 50 et 69 K€',
'101842': u'Entre 70 et 90 K€', '70|90': u'Entre 70 et 90 K€',
'101843': u'Plus de 90 K€', '90|1000': u'Plus de 90 K€',
}.iteritems())]) }.iteritems())])
date_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ date_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({
@ -214,9 +324,9 @@ class ApecModule(Module, CapJob):
}.iteritems())]) }.iteritems())])
level_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ level_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({
' ': u'-- Indifférent --', '101882': u'Tous niveaux d\'expérience',
'101846': u'Débutant', '101881': u'Débutant',
'101848': u'Expérimenté', '101883': u'Expérimenté',
}.iteritems())]) }.iteritems())])
CONFIG = BackendConfig(Value('place', label=u'Lieu', choices=places_choices, default=''), CONFIG = BackendConfig(Value('place', label=u'Lieu', choices=places_choices, default=''),
@ -228,9 +338,8 @@ class ApecModule(Module, CapJob):
Value('level', label=u'Expérience', choices=level_choices, default='')) Value('level', label=u'Expérience', choices=level_choices, default=''))
def search_job(self, pattern=None): def search_job(self, pattern=None):
with self.browser:
for job_advert in self.browser.search_job(pattern=pattern): for job_advert in self.browser.search_job(pattern=pattern):
yield job_advert yield self.fill_obj(job_advert)
def decode_choice(self, choice): def decode_choice(self, choice):
splitted_choice = choice.split('|') splitted_choice = choice.split('|')
@ -247,13 +356,19 @@ class ApecModule(Module, CapJob):
contrat=self.config['contrat'].get(), contrat=self.config['contrat'].get(),
limit_date=self.config['limit_date'].get(), limit_date=self.config['limit_date'].get(),
level=self.config['level'].get()): level=self.config['level'].get()):
yield job_advert yield self.fill_obj(job_advert)
def get_job_advert(self, _id, advert=None): def get_job_advert(self, _id, advert=None):
with self.browser: job_advert = self.browser.get_job_advert(_id, advert)
return self.browser.get_job_advert(_id, advert) return self.fill_obj(job_advert)
def fill_obj(self, advert, fields): def fill_obj(self, advert, fields=None):
self.get_job_advert(advert.id, advert) if advert.contract_type in self.type_contrat_choices:
advert.contract_type = self.type_contrat_choices[advert.contract_type]
OBJECTS = {ApecJobAdvert: fill_obj} if advert.experience in self.level_choices:
advert.experience = self.level_choices[advert.experience]
return advert
OBJECTS = {BaseJobAdvert: fill_obj}

View file

@ -16,67 +16,63 @@
# #
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import requests
from weboob.browser.elements import ItemElement, method, DictElement
from weboob.browser.pages import JsonPage, pagination
from weboob.browser.filters.standard import DateTime, Format, Regexp
from weboob.browser.filters.json import Dict
from weboob.browser.filters.html import CleanHTML
from weboob.capabilities.job import BaseJobAdvert
from weboob.capabilities.base import NotAvailable
from weboob.deprecated.browser import Page class IdsPage(JsonPage):
from weboob.tools.html import html2text
import dateutil.parser
import re
from .job import ApecJobAdvert def get_adverts_number(self):
return self.doc['totalCount']
@pagination
@method
class iter_job_adverts(DictElement):
item_xpath = 'resultats'
def next_page(self):
self.page.browser.start += self.env['range']
if self.page.browser.start <= self.env['count']:
data = self.page.browser.create_parameters(pattern=self.env['pattern'],
fonctions=self.env['fonctions'],
lieux=self.env['lieux'],
secteursActivite=self.env['secteursActivite'],
typesContrat=self.env['typesContrat'],
typesConvention=self.env['typesConvention'],
niveauxExperience=self.env['niveauxExperience'],
salaire_min=self.env['salaire_min'],
salaire_max=self.env['salaire_max'],
date_publication=self.env['date_publication'],
start=self.page.browser.start,
range=self.env['range'])
return requests.Request("POST", self.page.url, data=data)
class item(ItemElement):
klass = BaseJobAdvert
obj_id = Regexp(Dict('@uriOffre'), '.*=(.*)')
class SearchPage(Page): class OffrePage(JsonPage):
def iter_job_adverts(self): @method
re_id_title = re.compile('/offres-emploi-cadres/\d*_\d*_\d*_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?)_(.*?).html', re.DOTALL) class get_job_advert(ItemElement):
divs = self.document.getroot().xpath("//div[@class='boxContent offre']") + self.document.getroot().xpath("//div[@class='boxContent offre even']") klass = BaseJobAdvert
for div in divs:
a = self.parser.select(div, 'div/div/h3/a', 1, method='xpath')
_id = u'%s/%s' % (re_id_title.search(a.attrib['href']).group(1), re_id_title.search(a.attrib['href']).group(9))
advert = ApecJobAdvert(_id)
advert.title = u'%s' % re_id_title.search(a.attrib['href']).group(9).replace('-', ' ')
l = self.parser.select(div, 'h4', 1).text.split('-')
advert.society_name = u'%s' % l[0].strip()
advert.place = u'%s' % l[-1].strip()
date = self.parser.select(div, 'div/div/div', 1, method='xpath')
advert.publication_date = dateutil.parser.parse(date.text_content().strip()[8:]).date()
yield advert
obj_id = Dict('numeroOffre')
class AdvertPage(Page): obj_title = Dict('intitule')
def get_job_advert(self, url, advert): obj_description = CleanHTML(Dict('texteHtml'))
re_id_title = re.compile('/offres-emploi-cadres/\d*_\d*_\d*_(.*?)________(.*?).html(.*?)', re.DOTALL) obj_job_name = Dict('intitule')
if advert is None: obj_publication_date = DateTime(Dict('datePublication'))
_id = u'%s/%s' % (re_id_title.search(url).group(1), re_id_title.search(url).group(2)) obj_society_name = Dict('nomCommercialEtablissement', default=NotAvailable)
advert = ApecJobAdvert(_id) obj_contract_type = Dict('idNomTypeContrat')
advert.title = re_id_title.search(url).group(2).replace('-', ' ') obj_place = Dict('lieuTexte')
obj_pay = Dict('salaireTexte')
description = self.document.getroot().xpath("//div[@class='contentWithDashedBorderTop marginTop boxContent']/div")[0] obj_experience = Dict('idNomNiveauExperience')
advert.description = html2text(self.parser.tostring(description)) obj_url = Format('https://cadres.apec.fr/home/mes-offres/recherche-des-offres-demploi/liste-des-offres-demploi/detail-de-loffre-demploi.html?numIdOffre=%s', Dict('numeroOffre'))
advert.job_name = advert.title
trs = self.document.getroot().xpath("//table[@class='noFieldsTable']/tr")
for tr in trs:
th = self.parser.select(tr, 'th', 1, method='xpath')
td = self.parser.select(tr, 'td', 1, method='xpath')
if u'Date de publication' in u'%s' % th.text_content():
advert.publication_date = dateutil.parser.parse(td.text_content()).date()
elif u'Société' in u'%s' % th.text_content() and not advert.society_name:
society_name = td.text_content()
a = self.parser.select(td, 'a', method='xpath')
if a:
advert.society_name = u'%s' % society_name.replace(a[0].text_content(), '').strip()
else:
advert.society_name = society_name.strip()
elif u'Type de contrat' in u'%s' % th.text_content():
advert.contract_type = u'%s' % td.text_content().strip()
elif u'Lieu' in u'%s' % th.text_content():
advert.place = u'%s' % td.text_content()
elif u'Salaire' in u'%s' % th.text_content():
advert.pay = u'%s' % td.text_content()
elif u'Expérience' in u'%s' % th.text_content():
advert.experience = u'%s' % td.text_content()
advert.url = url
return advert