From 05a2a8cc9cb0d923336f69d19e9705c04d4f1903 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Fri, 10 Oct 2014 11:30:05 +0200 Subject: [PATCH] [pap] exclude adverts from other websites --- modules/pap/pages.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/pap/pages.py b/modules/pap/pages.py index 03d6211e..943be750 100644 --- a/modules/pap/pages.py +++ b/modules/pap/pages.py @@ -63,6 +63,9 @@ class SearchResultsPage(HTMLPage): class item(ItemElement): klass = Housing + def condition(self): + return Regexp(Link('./div[@class="header-annonce"]/a'), '/annonces/(.*)', default=None)(self) + obj_id = Regexp(Link('./div[@class="header-annonce"]/a'), '/annonces/(.*)') obj_title = CleanText('./div[@class="header-annonce"]/a') obj_area = CleanDecimal(Regexp(CleanText('./div[@class="header-annonce"]/a/span[@class="desc"]'),