improve apec page parsing
This commit is contained in:
parent
cf6dfaeb82
commit
836dd27bfd
2 changed files with 21 additions and 13 deletions
|
|
@ -54,18 +54,26 @@ class AdvertPage(BasePage):
|
||||||
|
|
||||||
advert.description = self.document.getroot().xpath("//div[@class='contentWithDashedBorderTop marginTop boxContent']/div")[0].text_content()
|
advert.description = self.document.getroot().xpath("//div[@class='contentWithDashedBorderTop marginTop boxContent']/div")[0].text_content()
|
||||||
|
|
||||||
td = self.document.getroot().xpath("//table[@class='noFieldsTable']/tr/td")
|
|
||||||
advert.job_name = advert.title
|
advert.job_name = advert.title
|
||||||
advert.publication_date = dateutil.parser.parse(td[2].text_content()).date()
|
|
||||||
society_name = td[3].text_content()
|
trs = self.document.getroot().xpath("//table[@class='noFieldsTable']/tr")
|
||||||
a = self.parser.select(td[3], 'a', 1, method='xpath').text_content()
|
for tr in trs:
|
||||||
|
th = self.parser.select(tr, 'th', 1, method='xpath')
|
||||||
|
td = self.parser.select(tr, 'td', 1, method='xpath')
|
||||||
|
if u'Date de publication' in u'%s' % th.text_content():
|
||||||
|
advert.publication_date = dateutil.parser.parse(td.text_content()).date()
|
||||||
|
elif u'Société' in u'%s' % th.text_content():
|
||||||
|
society_name = td.text_content()
|
||||||
|
a = self.parser.select(td, 'a', 1, method='xpath').text_content()
|
||||||
advert.society_name = u'%s' % society_name.replace(a, '').strip()
|
advert.society_name = u'%s' % society_name.replace(a, '').strip()
|
||||||
advert.contract_type = u'%s' % td[4].text_content().strip()
|
elif u'Type de contrat' in u'%s' % th.text_content():
|
||||||
advert.place = u'%s' % td[5].text_content()
|
advert.contract_type = u'%s' % td.text_content().strip()
|
||||||
td_pay = 6
|
elif u'Lieu' in u'%s' % th.text_content():
|
||||||
if 'class' in td[6].attrib:
|
advert.place = u'%s' % td.text_content()
|
||||||
td_pay = 7
|
elif u'Salaire' in u'%s' % th.text_content():
|
||||||
advert.pay = u'%s' % td[td_pay].text_content()
|
advert.pay = u'%s' % td.text_content()
|
||||||
advert.experience = u'%s' % td[td_pay + 1].text_content()
|
elif u'Expérience' in u'%s' % th.text_content():
|
||||||
|
advert.experience = u'%s' % td.text_content()
|
||||||
|
|
||||||
advert.url = url
|
advert.url = url
|
||||||
return advert
|
return advert
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class ApecTest(BackendTest):
|
||||||
BACKEND = 'apec'
|
BACKEND = 'apec'
|
||||||
|
|
||||||
def test_apec(self):
|
def test_apec(self):
|
||||||
l = list(self.backend.search_job(u'maitre brasseur'))
|
l = list(self.backend.search_job(u'informaticien'))
|
||||||
assert len(l)
|
assert len(l)
|
||||||
advert = self.backend.get_job_advert(l[0].id, None)
|
advert = self.backend.get_job_advert(l[0].id, None)
|
||||||
self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url))
|
self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue