improve apec page parsing

This commit is contained in:
Bezleputh 2013-07-08 22:55:40 +02:00 committed by Florent
commit 836dd27bfd
2 changed files with 21 additions and 13 deletions

View file

@ -54,18 +54,26 @@ class AdvertPage(BasePage):
advert.description = self.document.getroot().xpath("//div[@class='contentWithDashedBorderTop marginTop boxContent']/div")[0].text_content()
td = self.document.getroot().xpath("//table[@class='noFieldsTable']/tr/td")
advert.job_name = advert.title
advert.publication_date = dateutil.parser.parse(td[2].text_content()).date()
society_name = td[3].text_content()
a = self.parser.select(td[3], 'a', 1, method='xpath').text_content()
advert.society_name = u'%s' % society_name.replace(a, '').strip()
advert.contract_type = u'%s' % td[4].text_content().strip()
advert.place = u'%s' % td[5].text_content()
td_pay = 6
if 'class' in td[6].attrib:
td_pay = 7
advert.pay = u'%s' % td[td_pay].text_content()
advert.experience = u'%s' % td[td_pay + 1].text_content()
trs = self.document.getroot().xpath("//table[@class='noFieldsTable']/tr")
for tr in trs:
th = self.parser.select(tr, 'th', 1, method='xpath')
td = self.parser.select(tr, 'td', 1, method='xpath')
if u'Date de publication' in u'%s' % th.text_content():
advert.publication_date = dateutil.parser.parse(td.text_content()).date()
elif u'Société' in u'%s' % th.text_content():
society_name = td.text_content()
a = self.parser.select(td, 'a', 1, method='xpath').text_content()
advert.society_name = u'%s' % society_name.replace(a, '').strip()
elif u'Type de contrat' in u'%s' % th.text_content():
advert.contract_type = u'%s' % td.text_content().strip()
elif u'Lieu' in u'%s' % th.text_content():
advert.place = u'%s' % td.text_content()
elif u'Salaire' in u'%s' % th.text_content():
advert.pay = u'%s' % td.text_content()
elif u'Expérience' in u'%s' % th.text_content():
advert.experience = u'%s' % td.text_content()
advert.url = url
return advert

View file

@ -25,7 +25,7 @@ class ApecTest(BackendTest):
BACKEND = 'apec'
def test_apec(self):
l = list(self.backend.search_job(u'maitre brasseur'))
l = list(self.backend.search_job(u'informaticien'))
assert len(l)
advert = self.backend.get_job_advert(l[0].id, None)
self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url))