[monster] fix and adapt to browser2
This commit is contained in:
parent
015626b87e
commit
549551a629
4 changed files with 81 additions and 178 deletions
|
|
@ -18,109 +18,64 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.deprecated.browser import Page
|
||||
from weboob.tools.html import html2text
|
||||
import re
|
||||
from datetime import datetime, time, timedelta
|
||||
from .job import MonsterJobAdvert
|
||||
|
||||
from weboob.browser.pages import HTMLPage, pagination
|
||||
from weboob.browser.elements import ItemElement, ListElement, method
|
||||
from weboob.browser.filters.standard import CleanText, Regexp, Filter, Env, BrowserURL, Join
|
||||
from weboob.browser.filters.html import Link, CleanHTML
|
||||
from weboob.capabilities.job import BaseJobAdvert
|
||||
from weboob.capabilities.base import NotAvailable
|
||||
|
||||
|
||||
class SearchPage(Page):
|
||||
def iter_job_adverts(self):
|
||||
re_id = re.compile('http://offre-emploi.monster.fr/(.*?).aspx', re.DOTALL)
|
||||
trs = self.document.getroot().xpath("//table[@class='listingsTable']/tbody/tr")
|
||||
for tr in trs:
|
||||
if 'class' in tr.attrib and tr.attrib['class'] != 'aceHidden':
|
||||
a = self.parser.select(tr, 'td/div/div[@class="jobTitleContainer"]/a', 1, method='xpath')
|
||||
_id = u'%s' % re_id.search(a.attrib['href']).group(1)
|
||||
advert = MonsterJobAdvert(_id)
|
||||
advert.society_name = u'%s' % self.parser.select(tr, 'td/div/div[@class="companyContainer"]/div/a',
|
||||
1, method='xpath').attrib['title']
|
||||
advert.title = u'%s' % a.text
|
||||
|
||||
date = self.parser.select(tr, 'td/div/div[@class="fnt20"]', 1, method='xpath').text_content().strip()
|
||||
now = datetime.now()
|
||||
number = re.search("\d+", date)
|
||||
if number:
|
||||
if 'heures' in date:
|
||||
date = now - timedelta(hours=int(number.group(0)))
|
||||
advert.publication_date = datetime.combine(date, time())
|
||||
elif 'jour' in date:
|
||||
date = now - timedelta(days=int(number.group(0)))
|
||||
advert.publication_date = datetime.combine(date, time())
|
||||
else:
|
||||
advert.publication_date = datetime.combine(now, time.min)
|
||||
|
||||
place = self.parser.select(tr, 'td/div/div[@class="jobLocationSingleLine"]/a', method='xpath')
|
||||
if len(place) != 0:
|
||||
advert.place = u'%s' % place[0].attrib['title']
|
||||
|
||||
yield advert
|
||||
|
||||
|
||||
class AdvertPage(Page):
|
||||
def get_job_advert(self, url, advert):
|
||||
re_id = re.compile('http://offre-emploi.monster.fr/(.*?).aspx', re.DOTALL)
|
||||
if advert is None:
|
||||
_id = u'%s' % re_id.search(url).group(1)
|
||||
advert = MonsterJobAdvert(_id)
|
||||
|
||||
advert.url = url
|
||||
|
||||
div_normal = self.document.getroot().xpath('//div[@id="jobcopy"]')
|
||||
div_special = self.document.getroot().xpath('//div[@id="divtxt"]')
|
||||
if len(div_normal) > 0:
|
||||
return self.fill_normal_advert(advert, div_normal[0])
|
||||
|
||||
elif len(div_special) > 0:
|
||||
return self.fill_special_advert(advert, div_special[0])
|
||||
|
||||
class MonsterDate(Filter):
|
||||
def filter(self, date):
|
||||
now = datetime.now()
|
||||
number = re.search("\d+", date)
|
||||
if number:
|
||||
if 'heures' in date:
|
||||
date = now - timedelta(hours=int(number.group(0)))
|
||||
return datetime.combine(date, time())
|
||||
elif 'jour' in date:
|
||||
date = now - timedelta(days=int(number.group(0)))
|
||||
return datetime.combine(date, time())
|
||||
else:
|
||||
return advert
|
||||
return datetime.combine(now, time.min)
|
||||
|
||||
def fill_special_advert(self, advert, div):
|
||||
advert.title = u'%s' % self.parser.select(div, 'div[@class="poste"]', 1, method='xpath').text
|
||||
description = self.parser.select(div, 'div[@id="jobBodyContent"]', 1, method='xpath')
|
||||
advert.description = html2text(self.parser.tostring(description))
|
||||
|
||||
titresmenuG = self.document.getroot().xpath('//div[@id="divmenuGauche"]')[0]
|
||||
contract_type = self.parser.select(titresmenuG, '//span[@itemprop="employmentType"]', method='xpath')
|
||||
if len(contract_type) != 0:
|
||||
advert.contract_type = u'%s' % contract_type[0].text_content()
|
||||
class SearchPage(HTMLPage):
|
||||
@pagination
|
||||
@method
|
||||
class iter_job_adverts(ListElement):
|
||||
item_xpath = '//table[@class="listingsTable"]/tbody/tr[@class="odd"] | //table[@class="listingsTable"]/tbody/tr[@class="even"]'
|
||||
|
||||
return self.fill_advert(advert, titresmenuG)
|
||||
def next_page(self):
|
||||
return Link('//a[@title="Suivant"]', default=None)(self)
|
||||
|
||||
def fill_normal_advert(self, advert, div):
|
||||
advert.title = u'%s' % self.parser.select(div, 'h1', 1, method='xpath').text
|
||||
description = self.parser.select(div, 'div[@id="jobBodyContent"]', 1, method='xpath')
|
||||
advert.description = html2text(self.parser.tostring(description))
|
||||
class item(ItemElement):
|
||||
klass = BaseJobAdvert
|
||||
|
||||
jobsummary = self.document.getroot().xpath('//div[@id="jobsummary_content"]')[0]
|
||||
contract_type = self.parser.select(jobsummary, 'dl/dd[@class="multipleddlast"]/span', method='xpath')
|
||||
if len(contract_type) != 0:
|
||||
advert.contract_type = u'%s' % contract_type[0].text_content()
|
||||
obj_id = Regexp(Link('./td/div/div[@class="jobTitleContainer"]/a'),
|
||||
'http://offre-emploi.monster.fr:80/(.*?).aspx')
|
||||
obj_society_name = CleanText('./td/div/div[@class="companyContainer"]/div/a')
|
||||
obj_title = CleanText('./td/div/div[@class="jobTitleContainer"]/a')
|
||||
obj_publication_date = MonsterDate(CleanText('td/div/div[@class="fnt20"]'))
|
||||
obj_place = CleanText('./td/div/div[@class="jobLocationSingleLine"]/a/@title', default=NotAvailable)
|
||||
|
||||
society_name = self.parser.select(jobsummary, '//span[@itemprop="name"]', method='xpath')
|
||||
if len(society_name) != 0:
|
||||
advert.society_name = u'%s' % society_name[0].text_content()
|
||||
|
||||
return self.fill_advert(advert, jobsummary)
|
||||
class AdvertPage(HTMLPage):
|
||||
@method
|
||||
class get_job_advert(ItemElement):
|
||||
klass = BaseJobAdvert
|
||||
|
||||
def fill_advert(self, advert, jobsummary):
|
||||
place = self.parser.select(jobsummary, '//span[@itemprop="jobLocation"]', method='xpath')
|
||||
if len(place) != 0:
|
||||
advert.place = u'%s' % place[0].text_content()
|
||||
|
||||
pay = self.parser.select(jobsummary, '//span[@itemprop="baseSalary"]', method='xpath')
|
||||
if len(pay) != 0:
|
||||
advert.pay = u'%s' % pay[0].text_content()
|
||||
|
||||
formation = self.parser.select(jobsummary, '//span[@itemprop="educationRequirements"]', method='xpath')
|
||||
if len(formation) != 0:
|
||||
advert.formation = u'%s' % formation[0].text_content()
|
||||
|
||||
experience = self.parser.select(jobsummary, '//span[@itemprop="qualifications"]', method='xpath')
|
||||
if len(experience) != 0:
|
||||
advert.experience = u'%s' % experience[0].text_content()
|
||||
|
||||
return advert
|
||||
obj_id = Env('_id')
|
||||
obj_url = BrowserURL('advert', _id=Env('_id'))
|
||||
obj_title = CleanText('//div[@id="jobcopy"]/h1[@itemprop="title"]')
|
||||
obj_description = CleanHTML('//div[@id="jobBodyContent"]')
|
||||
obj_contract_type = Join('%s ', '//dd[starts-with(@class, "multipledd")]')
|
||||
obj_society_name = CleanText('//dd[@itemprop="hiringOrganization"]')
|
||||
obj_place = CleanText('//span[@itemprop="jobLocation"]')
|
||||
obj_pay = CleanText('//span[@itemprop="baseSalary"]')
|
||||
obj_formation = CleanText('//span[@itemprop="educationRequirements"]')
|
||||
obj_experience = CleanText('//span[@itemprop="qualifications"]')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue