From 9cd521bfde651c3d64708e0df17ea60f329c6263 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Thu, 13 Mar 2014 19:23:32 +0100 Subject: [PATCH] [hybride] improve browser2 adaptation --- modules/hybride/browser.py | 11 ++--- modules/hybride/calendar.py | 10 +---- modules/hybride/pages.py | 82 ++++++++++++++----------------------- 3 files changed, 36 insertions(+), 67 deletions(-) diff --git a/modules/hybride/browser.py b/modules/hybride/browser.py index c9e7c9b7..0a2ca842 100644 --- a/modules/hybride/browser.py +++ b/modules/hybride/browser.py @@ -17,12 +17,8 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -#from weboob.tools.browser.decorators import id2url -#from weboob.tools.browser import BaseBrowser -#from .calendar import HybrideCalendarEvent from .pages import ProgramPage, EventPage - from weboob.tools.browser2 import PagesBrowser, URL, Firefox __all__ = ['HybrideBrowser'] @@ -36,9 +32,10 @@ class HybrideBrowser(PagesBrowser): event_page = URL('/programme/item/(?P<_id>.*)', EventPage) def list_events(self, date_from, date_to=None, city=None, categories=None): - self.program_page.stay_or_go() - self.page.set_filters(date_from, date_to, city, categories) - return self.page.list_events() + return self.program_page.stay_or_go().list_events(date_from=date_from, + date_to=date_to, + city=city, + categories=categories) def get_event(self, _id, event=None): return self.event_page.stay_or_go(_id=_id).get_event(obj=event) diff --git a/modules/hybride/calendar.py b/modules/hybride/calendar.py index 2b65dc77..e724ddd1 100644 --- a/modules/hybride/calendar.py +++ b/modules/hybride/calendar.py @@ -24,15 +24,9 @@ class HybrideCalendarEvent(BaseCalendarEvent): def __init__(self): BaseCalendarEvent.__init__(self) + self.city = u'Lille' self.location = u'18 rue Gosselet' self.sequence = 1 self.transp = TRANSP.TRANSPARENT self.status = STATUS.CONFIRMED - - @classmethod - def get_city(cls): - return u'Lille' - - @classmethod - def get_category(cls): - return CATEGORIES.CINE + self.category = CATEGORIES.CINE diff --git a/modules/hybride/pages.py b/modules/hybride/pages.py index 71c28365..8f98d15c 100644 --- a/modules/hybride/pages.py +++ b/modules/hybride/pages.py @@ -46,19 +46,25 @@ class CombineDate(Filter): return datetime.combine(format_date(text), time.max) +class Description(Filter): + def filter(self, el): + description = '' + + description_intro = el[0].xpath("div[@class='itemIntroText']/table/tbody/tr/td") + + if description_intro and len(description_intro) > 0: + description += u'%s' % description_intro[0].text_content() + + description_content = el[0].xpath("div[@class='itemFullText']/table/tbody/tr/td") + + if description_content and len(description_content) > 0: + description += u'%s' % description_content[0].text_content() + + return u'%s' % description + + class ProgramPage(HTMLPage): - date_from = None - date_to = None - city = None - categories = None - - def set_filters(self, date_from, date_to, city, categories): - self.date_from = date_from - self.date_to = date_to - self.city = city - self.categories = categories - @method class list_events(ListElement): item_xpath = '//div[@class="catItemView groupLeading"]' @@ -66,27 +72,23 @@ class ProgramPage(HTMLPage): class item(ItemElement): klass = HybrideCalendarEvent - def condition(self): - return self.check_date() and self.check_city() and self.check_category() + def validate(self, obj): + return self.check_date(obj) and self.check_city(obj) and self.check_category(obj) - def check_date(self): - date = self.el.xpath("div[@class='catItemHeader']/span[@class='catItemDateCreated']")[0] - event_date = format_date(date.text) - if self.page.date_from and event_date >= self.page.date_from: - if not self.page.date_to: + def check_date(self, obj): + if self.env['date_from'] and obj.start_date > self.env['date_from']: + if not self.env['date_to']: return True else: - if event_date <= self.page.date_to: + if obj.end_date < self.env['date_to']: return True return False - def check_city(self): - return (not self.page.city or (self.page.city and - self.page.city.upper() == HybrideCalendarEvent.get_city().upper()) - ) + def check_city(self, obj): + return (not self.env['city'] or self.env['city'].upper() == obj.city.upper()) - def check_category(self): - return (not self.page.categories or HybrideCalendarEvent.get_category() in self.page.categories) + def check_category(self, obj): + return (not self.env['categories'] or obj.category in self.env['categories']) class CheckId(Filter): def filter(self, a_id): @@ -100,8 +102,6 @@ class ProgramPage(HTMLPage): obj_start_date = Date(CleanText('div[@class="catItemHeader"]/span[@class="catItemDateCreated"]')) obj_end_date = CombineDate(CleanText('div[@class="catItemHeader"]/span[@class="catItemDateCreated"]')) obj_summary = CleanText('div[@class="catItemHeader"]/h3[@class="catItemTitle"]/a') - obj_city = HybrideCalendarEvent.get_city() - obj_category = HybrideCalendarEvent.get_category() class EventPage(HTMLPage): @@ -111,40 +111,18 @@ class EventPage(HTMLPage): klass = HybrideCalendarEvent def parse(self, el): - div = el.xpath("//div[@class='itemView']")[0] - if self.obj.id: event = self.obj event.url = self.page.url - event.description = self.get_description(div) + event.description = Description('//div[@class="itemView"]/div[@class="itemBody"]')(self) raise SkipItem() - re_id = re.compile('http://www.lhybride.org/programme/item/(.*?)', re.DOTALL) - self.env['id'] = re_id.search(self.page.url).group(1) self.env['url'] = self.page.url - self.env['description'] = self.get_description(div) - def get_description(self, div): - description = '' - - description_intro = div.xpath("div[@class='itemBody']/div[@class='itemIntroText']/table/tbody/tr/td") - - if description_intro and len(description_intro) > 0: - description += u'%s' % description_intro[0].text_content() - - description_content = div.xpath("div[@class='itemBody']/div[@class='itemFullText']/table/tbody/tr/td") - - if description_content and len(description_content) > 0: - description += u'%s' % description_content[0].text_content() - - return u'%s' % description - - obj_id = Env('id') + obj_id = Env('_id') base = '//div[@class="itemView"]/div[@class="itemHeader"]' obj_start_date = Date(CleanText('%s/span[@class="itemDateCreated"]' % base)) obj_end_date = CombineDate(CleanText('%s/span[@class="itemDateCreated"]' % base)) obj_summary = CleanText('%s/h2[@class="itemTitle"]' % base) - obj_city = HybrideCalendarEvent.get_city() - obj_category = HybrideCalendarEvent.get_category() obj_url = Env('url') - obj_description = Env('description') + obj_description = Description('//div[@class="itemView"]/div[@class="itemBody"]')