[regionsjob] fix: site changed
This commit is contained in:
parent
cdd213c2ef
commit
6deaa846f9
3 changed files with 170 additions and 133 deletions
|
|
@ -17,30 +17,45 @@
|
|||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from weboob.browser.pages import HTMLPage
|
||||
from weboob.browser.elements import ItemElement, SkipItem, ListElement, method
|
||||
from weboob.browser.filters.standard import CleanText, Regexp, Format, Env, DateGuesser, DateTime
|
||||
from weboob.browser.filters.html import Link, CleanHTML
|
||||
from weboob.tools.date import LinearDateGuesser
|
||||
from weboob.browser.pages import HTMLPage, pagination
|
||||
from weboob.browser.elements import ItemElement, ListElement, method
|
||||
from weboob.browser.filters.standard import CleanText, Regexp, Format, Env, Date, BrowserURL, Join
|
||||
from weboob.browser.filters.html import CleanHTML, Link
|
||||
from weboob.capabilities.job import BaseJobAdvert
|
||||
from weboob.exceptions import ParseError
|
||||
from datetime import date, timedelta
|
||||
|
||||
|
||||
class SearchPage(HTMLPage):
|
||||
@pagination
|
||||
@method
|
||||
class iter_job_adverts(ListElement):
|
||||
item_xpath = '//div[@id="liste_offres"]/ul/li'
|
||||
item_xpath = '//section[@class="annonce"]'
|
||||
|
||||
def next_page(self):
|
||||
return Link('//a[@class="picto picto-nextsmall"]')(self)
|
||||
|
||||
class item(ItemElement):
|
||||
klass = BaseJobAdvert
|
||||
|
||||
obj_id = Format(u'%s#%s',
|
||||
Env('domain'),
|
||||
Regexp(Link('div/span[@class="offres_poste"]/a'), '.*?numoffre=(.*?)&de=consultation'))
|
||||
obj_title = CleanText('div/span[@class="offres_poste"]/a')
|
||||
obj_society_name = CleanText('div/span[@class="offres_entreprise"]/span/a')
|
||||
obj_place = CleanText('div/span[@class="offres_ville"]/span/span/span')
|
||||
obj_contract_type = CleanText('div/span[@class="offres_poste"]/span')
|
||||
obj_publication_date = DateGuesser(CleanText('div/span[@class="offres_date"]'), LinearDateGuesser())
|
||||
Regexp(Env('domain'), 'http://www\.(.*)\.com'),
|
||||
Regexp(CleanText('h1/a[2]/@href'), '/emplois/(.*)\.html'))
|
||||
obj_title = CleanText('h1/a[2]')
|
||||
obj_society_name = CleanText('figure/span[@itemprop="name"]')
|
||||
obj_place = CleanText('p[@class="inlineblock max-width-75"]')
|
||||
obj_contract_type = CleanText('p[@class="max-width-75"]')
|
||||
|
||||
def obj_publication_date(self):
|
||||
_date = CleanText('p[@class="infos"]')
|
||||
try:
|
||||
return Date(_date)(self)
|
||||
except ParseError:
|
||||
str_date = _date(self)
|
||||
if 'hier' in str_date:
|
||||
return date.today() - timedelta(days=1)
|
||||
else:
|
||||
return date.today()
|
||||
|
||||
|
||||
class AdvertPage(HTMLPage):
|
||||
|
|
@ -48,28 +63,15 @@ class AdvertPage(HTMLPage):
|
|||
class get_job_advert(ItemElement):
|
||||
klass = BaseJobAdvert
|
||||
|
||||
def parse(self, el):
|
||||
if self.obj.id:
|
||||
advert = self.obj
|
||||
advert.url = self.page.url
|
||||
advert.description = Format(u'%s\r\n%s',
|
||||
CleanHTML('//div[@id="annonce"]/p[@id="description_annonce"]'),
|
||||
CleanHTML('//div[@id="annonce"]/p[@id="description_annonce"]/following-sibling::p[1]'))(el)
|
||||
advert.pay = CleanText('//div[@id="annonce"]/p[@class="rubrique_annonce"]/following-sibling::p[1]')(el)
|
||||
raise SkipItem()
|
||||
|
||||
self.env['url'] = self.page.url
|
||||
|
||||
obj_description = Format(u'%s%s',
|
||||
CleanHTML('//div[@id="annonce"]/p[@id="description_annonce"]'),
|
||||
CleanHTML('//div[@id="annonce"]/p[@id="description_annonce"]/following-sibling::p[1]'))
|
||||
|
||||
obj_description = Join('\n%s', '//div[@id="annonce-detail"]/p[@class="text"]', textCleaner=CleanHTML)
|
||||
obj_id = Env('_id')
|
||||
obj_url = Env('url')
|
||||
obj_publication_date = DateTime(Regexp(CleanText('//div[@id="annonce"]/p[@class="date_ref"]'),
|
||||
'(\d{2}/\d{2}/\d{4})'))
|
||||
obj_title = CleanText('//div[@id="annonce"]/h1')
|
||||
obj_society_name = CleanText('//div[@id="annonce"]/p[@class="contrat_loc"]/strong[1]')
|
||||
obj_contract_type = CleanText('//div[@id="annonce"]/p[@class="contrat_loc"]/strong[2]')
|
||||
obj_place = CleanText('//div[@id="annonce"]/p[@class="contrat_loc"]/strong[3]')
|
||||
obj_pay = CleanText('//div[@id="annonce"]/p[@class="rubrique_annonce"]/following-sibling::p[1]')
|
||||
obj_url = BrowserURL('advert_page', _id=Env('_id'))
|
||||
obj_publication_date = Date(Regexp(CleanText('//div[@id="annonce-detail"]/p[@class="infos"]'),
|
||||
'(\d{2}/\d{2}/\d{4})'))
|
||||
obj_title = CleanText('//div[@id="annonce"]/div/div/h1')
|
||||
obj_society_name = CleanText('//section[@class="entp-resume"]/h1/a')
|
||||
|
||||
obj_contract_type = CleanText('//dl[@class="infos-annonce"]/dt[span[@class="picto picto-contrat-grey"]]/following-sibling::dd[1]')
|
||||
obj_place = CleanText('//dl[@class="infos-annonce"]/dt[span[@class="picto picto-geolocalisation-grey"]]/following-sibling::dd[1]')
|
||||
obj_pay = CleanText('//div[@id="annonce-detail"]/p[@class="infos"]/preceding-sibling::p[1]',
|
||||
replace=[('Salaire : ', '')])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue