From c47451bd4ae418c5c1430cd2cb99321c1d3b6f24 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Tue, 4 Aug 2015 14:18:51 +0200 Subject: [PATCH] [regionsjob] do not crash while parsing an external advert --- modules/regionsjob/pages.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/regionsjob/pages.py b/modules/regionsjob/pages.py index 4b76ed3f..065ecec5 100644 --- a/modules/regionsjob/pages.py +++ b/modules/regionsjob/pages.py @@ -39,9 +39,14 @@ class SearchPage(HTMLPage): class item(ItemElement): klass = BaseJobAdvert + def condition(self): + return Regexp(CleanText('h1/a[@class="lien-annonce"]/@href'), + '/emplois/(.*)\.html', + default=None)(self) + obj_id = Format(u'%s#%s', Regexp(Env('domain'), 'http://www\.(.*)\.com'), - Regexp(CleanText('h1/a[2]/@href'), '/emplois/(.*)\.html')) + Regexp(CleanText('h1/a[@class="lien-annonce"]/@href'), '/emplois/(.*)\.html')) obj_title = CleanText('h1/a[2]') obj_society_name = CleanText('figure/span[@itemprop="name"]') obj_place = CleanText('p[@class="inlineblock max-width-75"]')