From 5945d5d539dae6b71c3cb277d759e8b7bf3b1e60 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Tue, 9 Dec 2014 10:19:13 +0100 Subject: [PATCH] [hybride] site changed --- modules/hybride/browser.py | 4 ++-- modules/hybride/pages.py | 35 ++++++++--------------------------- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/modules/hybride/browser.py b/modules/hybride/browser.py index d16c0e86..a996bad0 100644 --- a/modules/hybride/browser.py +++ b/modules/hybride/browser.py @@ -29,8 +29,8 @@ class HybrideBrowser(PagesBrowser): PROFILE = Firefox() BASEURL = 'http://www.lhybride.org' - program_page = URL('/programme.html', ProgramPage) - event_page = URL('/programme/item/(?P<_id>.*)', EventPage) + program_page = URL('programme.html', ProgramPage) + event_page = URL('programme/item/(?P<_id>.*)', EventPage) def list_events(self, date_from, date_to=None, city=None, categories=None): return self.program_page.stay_or_go().list_events(date_from=date_from, diff --git a/modules/hybride/pages.py b/modules/hybride/pages.py index 53dc9760..cd96fa0c 100644 --- a/modules/hybride/pages.py +++ b/modules/hybride/pages.py @@ -17,15 +17,13 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from datetime import time, datetime from .calendar import HybrideCalendarEvent import weboob.tools.date as date_util -import re from weboob.browser.pages import HTMLPage -from weboob.browser.elements import ItemElement, SkipItem, ListElement, method -from weboob.browser.filters.standard import Filter, CleanText, Env, Format, BrowserURL +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.browser.filters.standard import Filter, CleanText, Env, Format, BrowserURL, Regexp from weboob.browser.filters.html import CleanHTML from weboob.browser.filters.html import Link @@ -35,16 +33,11 @@ class Date(Filter): return date_util.parse_french_date(text) -class CombineDate(Filter): - def filter(self, text): - return datetime.combine(date_util.parse_french_date(text), time.max) - - class ProgramPage(HTMLPage): @method class list_events(ListElement): - item_xpath = '//div[@class="catItemView groupLeading"]' + item_xpath = '//div[@class="itemContainer itemContainerLast"]' class item(ItemElement): klass = HybrideCalendarEvent @@ -67,18 +60,9 @@ class ProgramPage(HTMLPage): def check_category(self, obj): return (not self.env['categories'] or obj.category in self.env['categories']) - class CheckId(Filter): - def filter(self, a_id): - re_id = re.compile('/programme/item/(.*?).html', re.DOTALL) - _id = re_id.search(a_id).group(1) - if _id: - return _id - raise SkipItem() - - obj_id = CheckId(Link('div[@class="catItemHeader"]/h3[@class="catItemTitle"]/a')) - obj_start_date = Date(CleanText('div[@class="catItemHeader"]/span[@class="catItemDateCreated"]')) - obj_end_date = CombineDate(CleanText('div[@class="catItemHeader"]/span[@class="catItemDateCreated"]')) - obj_summary = CleanText('div[@class="catItemHeader"]/h3[@class="catItemTitle"]/a') + obj_id = Regexp(Link('div/div[@class="catItemHeader"]/h3[@class="catItemTitle"]/a'), '/programme/item/(.*?).html') + obj_start_date = Date(CleanText('div/div[@class="catItemHeader"]/span[@class="catItemDateCreated"]')) + obj_summary = CleanText('div/div[@class="catItemHeader"]/h3[@class="catItemTitle"]/a') class EventPage(HTMLPage): @@ -88,11 +72,8 @@ class EventPage(HTMLPage): klass = HybrideCalendarEvent obj_id = Env('_id') - base = '//div[@class="itemView"]/div[@class="itemHeader"]' - obj_start_date = Date(CleanText('%s/span[@class="itemDateCreated"]' % base)) - obj_end_date = CombineDate(CleanText('%s/span[@class="itemDateCreated"]' % base)) - obj_summary = CleanText('%s/h2[@class="itemTitle"]' % base) - obj_url = Env('url') + obj_start_date = Date(CleanText('//span[@class="itemDateCreated"]')) + obj_summary = CleanText('//h2[@class="itemTitle"]') obj_description = Format('%s\n%s', CleanHTML('//div[@class="itemIntroText"]'), CleanHTML('//div[@class="itemFullText"]'))