From 4d59d1f9eadc95aebb768d76e3fc596bf360ec68 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Mon, 23 Feb 2015 09:38:01 +0100 Subject: [PATCH] [monster] improve page parsing --- modules/monster/pages.py | 4 ++-- modules/monster/test.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/monster/pages.py b/modules/monster/pages.py index 309c9dc0..f562860b 100644 --- a/modules/monster/pages.py +++ b/modules/monster/pages.py @@ -71,8 +71,8 @@ class AdvertPage(HTMLPage): obj_id = Env('_id') obj_url = BrowserURL('advert', _id=Env('_id')) - obj_title = CleanText('//div[@id="jobcopy"]/h1[@itemprop="title"]') - obj_description = CleanHTML('//div[@id="jobBodyContent"]') + obj_title = CleanText('//div[@id="jobcopy"]/h1[@itemprop="title"]|//div[@itemprop="title"]/h1') + obj_description = CleanHTML('//div[@id="jobBodyContent"]|//div[@itemprop="description"]') obj_contract_type = Join('%s ', '//dd[starts-with(@class, "multipledd")]') obj_society_name = CleanText('//dd[@itemprop="hiringOrganization"]') obj_place = CleanText('//span[@itemprop="jobLocation"]') diff --git a/modules/monster/test.py b/modules/monster/test.py index 043f91c0..43365588 100644 --- a/modules/monster/test.py +++ b/modules/monster/test.py @@ -17,7 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . - +import itertools from weboob.tools.test import BackendTest @@ -25,13 +25,13 @@ class MonsterTest(BackendTest): MODULE = 'monster' def test_monster_search(self): - l = list(self.backend.search_job(u'marketing')) + l = list(itertools.islice(self.backend.search_job(u'marketing'), 0, 20)) assert len(l) advert = self.backend.get_job_advert(l[0].id, None) self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url)) def test_monster_advanced_search(self): - l = list(self.backend.advanced_search_job()) + l = list(itertools.islice(self.backend.advanced_search_job(), 0, 20)) assert len(l) advert = self.backend.get_job_advert(l[0].id, None) self.assertTrue(advert.url, 'URL for announce "%s" not found: %s' % (advert.id, advert.url))