diff --git a/modules/popolemploi/backend.py b/modules/popolemploi/backend.py index 7856bebb..f7364629 100644 --- a/modules/popolemploi/backend.py +++ b/modules/popolemploi/backend.py @@ -220,13 +220,13 @@ class PopolemploiBackend(BaseBackend, ICapJob): }.iteritems())]) limit_date_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ - '': u'Aucune limite', - 'UN_JOUR': u'Hier', - 'TROIS_JOUR': u'3 jours', - 'UNE_SEMAINE': u'1 semaine', - 'DEUX_SEMAINES': u'2 semaines', - 'UN_MOIS': u'1 mois', - 'TROIS_MOIS': u'3 mois', + ' ': u'Aucune limite', + '1': u'Hier', + '3': u'3 jours', + '7': u'1 semaine', + '14': u'2 semaines', + '31': u'1 mois', + '93': u'3 mois', }.iteritems())]) domain_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ diff --git a/modules/popolemploi/browser.py b/modules/popolemploi/browser.py index 22e36c9d..cf059313 100644 --- a/modules/popolemploi/browser.py +++ b/modules/popolemploi/browser.py @@ -19,10 +19,11 @@ from weboob.tools.browser.decorators import id2url from weboob.tools.browser import BaseBrowser +import re import urllib import copy -from .pages import SearchPage, AdvertPage +from .pages import SearchPage, AdvertPage, ChangeLocationReturnPage, ChangeLocationPage from .job import PopolemploiJobAdvert @@ -35,6 +36,8 @@ class PopolemploiBrowser(BaseBrowser): ENCODING = None PAGES = { + 'http://candidat.pole-emploi.fr/candidat/rechercheoffres/avancee.recherche': ChangeLocationPage, + 'http://candidat.pole-emploi.fr/candidat/rechercheoffres/avancee/(.*?)': ChangeLocationReturnPage, 'http://candidat.pole-emploi.fr/candidat/rechercheoffres/resultats(.*?)': SearchPage, 'http://candidat.pole-emploi.fr/candidat/rechercheoffres/detail/(?P.+)': AdvertPage, } @@ -48,8 +51,6 @@ class PopolemploiBrowser(BaseBrowser): def advanced_search_job(self, metier=None, place=None, contrat=None, salary=None, qualification=None, limit_date=None, domain=None): - self.location('http://candidat.pole-emploi.fr/candidat/rechercheoffres/rechercheavanceeparparametres') - data = { 't:formdata': 'H4sIAAAAAAAAALVYz2/cRBj9sqilZFtafkkICSQgvSEn67YQkqZlu9lWgNNEXcolBzTr/XYzxfaYmfGue4ETR5A4I3HiiCoB9x7oASkHDvwH/AGckDiBxIxnbSfbJu16TKREyvM3b957M54Zzw9/wonJKrzN0d9Drn7ZcMhRLLfHJPIR10gQIB8jFwEpSgQN44AOKaLgsDPkToxO6Ds+C8MkciSJUUh+16F7ocZiFmEkhePR6NNe0g+pbI/R77BIYipxaYczH4XInghBWXT7yx/vbTRff7UBjV142jd1El7Y9e6QMVkOSDRa3u7fQV+ue9DEAENFf5OEKOH5AyU9yWk0Wk85nNOgo0HHtIPyJ41jCa9lJj9WJj1yKzfZK0xOrsDlo+IpcGKAzK8gkXQ4GVA24iyJVUirjI8cEhNVWORzSYXDMaD9gyHd0q1u6FZLPZRJ/M3K/bd+fvavXxqwoMzqMDgLtNnP4HNopPrvUxIWy85qEHuxgtjWfrq//1Pv3/sNlWmmarIGq/MLEShbKy0l4tKxIvpEoNPuK5D48jrFYGDiOn/7QfOPl3795/i4TppeLBW251X40Ex/cG9wcfj3d7/ZZ+YqRZqhacy5FuZUW810dvIeXJmfYMRJNBiwkNAIc0WnM3DTgPa0Rt1N8OxolqYrh8iWJDWenWMXspCmNBJO1zQy69jDI/rtF199/f327u9q7fLglB9QVfv+IJt85Vqm/13I5+KhcOLJVdioMGosETOZNzWWR25LahJ/F96Zn0XuqbxyUSey/yyIjJAb0K3YfmbMNd2Luax40oH2/LzTiGLCD1k9O8V3CP8oM10HubFfaTSjJETOTHk+RQy2rTFrUoslI2QDvc1y9bIkmTZQW/LpHPUUWk2dvBtjtgMQWVjWWMdg1qTG8hZ8aMXy6Cl5UGg8uQxrFd5fRezLT1Zy76dywI7OuO5CZ36KIUs0LiUKEhBazsTnyic986SmDoxYG7+t2fhadnQWivYw4RiStFCUAVsktaMziiqdGjGNkastzjcjeVIdREvIllIzLk424dr8JGMSaHOH1anEzpkH3VJjLfQWCSYRlThQZIXGxQza1JAtpdHVhqsVNh7dHqeHmlzamQztTtEaiC1OckK9TWoQ1NmajpVftYByjDkVWLywr0xL2tOSbllSd5f/hw/38T7curu0Xy7d2eXStaOzV3RhVtEFOzoLRfoGIkFRbik5YEdn8ZpHdIwkMUSFrDMG9QxaA7F9ZO5sZBWnVd66psjcR0bm1kBsBG7AeoWzpfrCHOr7hlzcMwViSWhEefBBhful4upQcqqWn5BJ4Qfq03mq8eWioGcKtlRBRxXU251xcB7ePIpSf2lhlJ1Ni2+GA9gTNjXdLMEbj1XOBYfrT3qBeo0lkkVHXT2U16YLh69FZ24Ki67/A14oWbX6FQAA', 'radiogroup': u'MOTS_CLES', @@ -80,21 +81,26 @@ class PopolemploiBrowser(BaseBrowser): } if place.split('|')[1] == 'DEPARTEMENT': - self.choose_departement(place.split('|')[2], copy.deepcopy(data)) + place_type, place_number = self.choose_departement(place.split('|')[2], copy.deepcopy(data)) elif place.split('|')[1] == 'REGION': - self.choose_region(place.split('|')[2], copy.deepcopy(data)) + place_type, place_number = self.choose_region(place.split('|')[2], copy.deepcopy(data)) else: - data['select'] = u'10' - data['radiogroup_0'] = 'FRANCE' + place_type = 'FRANCE' + place_number = '01' - data['rechercher'] = u'' - data['partenaires'] = u'on' - data['rechercher:hiddenelementSubmit'] = u'rechercher:hiddenelementSubmit' + params = 'A_%s_%s_%s__%s_P_%s_%s_%s_______INDIFFERENT______________%s' % (metier, + place_type, + place_number, + contrat, + domain, + salary, + qualification, + limit_date + ) - self.location('http://candidat.pole-emploi.fr/candidat/rechercheoffres/avancee.recherche', - urllib.urlencode(data)) + self.location('http://candidat.pole-emploi.fr/candidat/rechercheoffres/resultats/%s' % params) assert self.is_on_page(SearchPage) return self.page.iter_job_adverts() @@ -117,6 +123,15 @@ class PopolemploiBrowser(BaseBrowser): self.location('http://candidat.pole-emploi.fr/candidat/rechercheoffres/avancee/choisirlesregions.choisirdeslieux', urllib.urlencode(data2)) + return self.decode_place(self.page.url) + + def decode_place(self, url): + re_url = re.compile('http://candidat.pole-emploi.fr/candidat/rechercheoffres/avancee/A_(.*?)_(.*?)_(.*?)__(.*?)_P_(.*?)_(.*?)_(.*?)_______INDIFFERENT______________(.*?)', re.DOTALL) + if re_url.match(url): + return re_url.search(url).group(2), re_url.search(url).group(3) + else: + return 'FRANCE', '01' + def choose_departement(self, place, data): data['select'] = u'10' @@ -137,6 +152,8 @@ class PopolemploiBrowser(BaseBrowser): self.location('http://candidat.pole-emploi.fr/candidat/rechercheoffres/avancee/choisirlesdepartements.choisirdeslieux', urllib.urlencode(data2)) + return self.decode_place(self.page.url) + @id2url(PopolemploiJobAdvert.id2url) def get_job_advert(self, url, advert): self.location(url) diff --git a/modules/popolemploi/pages.py b/modules/popolemploi/pages.py index 4d713457..b63e0836 100644 --- a/modules/popolemploi/pages.py +++ b/modules/popolemploi/pages.py @@ -20,11 +20,12 @@ from weboob.tools.browser import BasePage import dateutil.parser +import re from .job import PopolemploiJobAdvert -__all__ = ['SearchPage', 'AdvertPage'] +__all__ = ['SearchPage', 'AdvertPage', 'ChangeLocationPage', 'ChangeLocationReturnPage'] class SearchPage(BasePage): @@ -36,9 +37,10 @@ class SearchPage(BasePage): yield advert def create_job_advert(self, row): + re_id = re.compile('../resultats.tableauresultatrechercheoffre:detailOffre/(.*?)\?(.*?)', re.DOTALL) a = self.parser.select(row, 'td[@headers="offre"]/a', 1, method='xpath') - _id = u'%s' % (a.attrib['href'][-7:]) - if _id: + if re_id.match(a.attrib['href']): + _id = u'%s' % (re_id.search(a.attrib['href']).group(1)) advert = PopolemploiJobAdvert(_id) advert.contract_type = u'%s' % self.parser.select(row, 'td[@headers="contrat"]', 1, method='xpath').text advert.title = u'%s' % a.text_content().strip() @@ -53,9 +55,9 @@ class SearchPage(BasePage): class AdvertPage(BasePage): def get_job_advert(self, url, advert): - content = self.document.getroot().xpath('//div[@class="block-content"]/div')[0] + content = self.document.getroot().xpath('//div[@id="offre-body"]')[0] if not advert: - _id = self.parser.select(content, 'ul/li/ul/li/div[@class="value"]/span', 1, method='xpath').text + _id = self.parser.select(content, 'div/div/ul/li/div[@class="value"]/span', 1, method='xpath').text advert = PopolemploiJobAdvert(_id) advert.title = u'%s' % self.parser.select(content, 'h4', 1, method='xpath').text.strip() @@ -101,3 +103,11 @@ class AdvertPage(BasePage): advert.pay = pay.strip() return advert + + +class ChangeLocationReturnPage(BasePage): + pass + + +class ChangeLocationPage(BasePage): + pass