[CCI] update on cci website

This commit is contained in:
Bezleputh 2013-10-28 20:23:52 +01:00
commit a286975f2a
2 changed files with 3 additions and 14 deletions

View file

@ -29,7 +29,7 @@ __all__ = ['CciBrowser']
class CciBrowser(BaseBrowser):
PROTOCOL = 'http'
DOMAIN = 'www.cci.fr/web/recrutement/les-offres-d-emploi'
ENCODING = None
ENCODING = "UTF-8"
PAGES = {
'%s://%s' % (PROTOCOL, DOMAIN): SearchPage,

View file

@ -59,25 +59,14 @@ class SearchPage(BasePage):
advert.job_name = u'%s' % first_div[1].text_content()
second_div = self.parser.select(item, 'div/fieldset', 2, method='xpath')
description = ""
ps_1 = self.parser.select(second_div[0], 'p[@class="normal"]', method='xpath')
h2s_1 = self.parser.select(second_div[0], 'h2[@class="titreParagraphe"]', method='xpath')
description = ""
if len(ps_1) == 5 and len(h2s_1) == 5:
for i in range(0, 5):
description += "\r\n-- %s --\r\n" % h2s_1[i].text
description += "%s\r\n" % ps_1[i].text_content()
ps_2 = self.parser.select(second_div[1], 'p[@class="normal"]', method='xpath')
h2s_2 = self.parser.select(second_div[1], 'h2[@class="titreParagraphe"]', method='xpath')
if len(ps_2) == 3 and len(h2s_2) == 2:
description += "\r\n-- %s --\r\n" % h2s_2[0].text
a = self.parser.select(ps_2[0], 'a', 1, method='xpath')
description += "%s\r\n" % a.text_content()
description += "\r\n-- %s --\r\n" % h2s_2[1].text
description += "%s\r\n" % ps_2[1].text_content()
description += "%s\r\n" % ps_2[2].text_content()
advert.description = description
advert.url = self.url + '#' + advert.id
date = self.parser.select(item, 'div/fieldset/p[@class="dateOffre"]', 1, method='xpath')