diff --git a/modules/hybride/backend.py b/modules/hybride/backend.py
index 828d4007..cc08e2c9 100644
--- a/modules/hybride/backend.py
+++ b/modules/hybride/backend.py
@@ -39,22 +39,18 @@ class HybrideBackend(BaseBackend, ICapCalendarEvent):
def search_events(self, query):
if self.has_matching_categories(query):
- with self.browser:
- return self.browser.list_events(query.start_date,
- query.end_date,
- query.city,
- query.categories)
+ return self.browser.list_events(query.start_date,
+ query.end_date,
+ query.city,
+ query.categories)
def list_events(self, date_from, date_to=None):
- with self.browser:
- return self.browser.list_events(date_from, date_to)
+ return self.browser.list_events(date_from, date_to)
def get_event(self, _id):
- with self.browser:
- return self.browser.get_event(_id)
+ return self.browser.get_event(_id)
def fill_obj(self, event, fields):
- with self.browser:
- return self.browser.get_event(event.id, event)
+ return self.browser.get_event(event.id, event)
OBJECTS = {HybrideCalendarEvent: fill_obj}
diff --git a/modules/hybride/browser.py b/modules/hybride/browser.py
index 1073bed1..c9e7c9b7 100644
--- a/modules/hybride/browser.py
+++ b/modules/hybride/browser.py
@@ -17,32 +17,28 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-from weboob.tools.browser.decorators import id2url
-from weboob.tools.browser import BaseBrowser
-from .calendar import HybrideCalendarEvent
+#from weboob.tools.browser.decorators import id2url
+#from weboob.tools.browser import BaseBrowser
+#from .calendar import HybrideCalendarEvent
from .pages import ProgramPage, EventPage
+from weboob.tools.browser2 import PagesBrowser, URL, Firefox
+
__all__ = ['HybrideBrowser']
-class HybrideBrowser(BaseBrowser):
- PROTOCOL = 'http'
- DOMAIN = 'www.lhybride.org'
- ENCODING = None
+class HybrideBrowser(PagesBrowser):
+ PROFILE = Firefox()
+ BASEURL = 'http://www.lhybride.org'
- PAGES = {
- '%s://%s/programme.html' % (PROTOCOL, DOMAIN): ProgramPage,
- '%s://%s/programme/item/(.*?)' % (PROTOCOL, DOMAIN): EventPage,
- }
+ program_page = URL('/programme.html', ProgramPage)
+ event_page = URL('/programme/item/(?P<_id>.*)', EventPage)
def list_events(self, date_from, date_to=None, city=None, categories=None):
- self.location('%s://%s/programme.html' % (self.PROTOCOL, self.DOMAIN))
- assert self.is_on_page(ProgramPage)
- return self.page.list_events(date_from, date_to, city, categories)
+ self.program_page.stay_or_go()
+ self.page.set_filters(date_from, date_to, city, categories)
+ return self.page.list_events()
- @id2url(HybrideCalendarEvent.id2url)
- def get_event(self, url, event=None):
- self.location(url)
- assert self.is_on_page(EventPage)
- return self.page.get_event(url, event)
+ def get_event(self, _id, event=None):
+ return self.event_page.stay_or_go(_id=_id).get_event(obj=event)
diff --git a/modules/hybride/calendar.py b/modules/hybride/calendar.py
index e7f32496..2b65dc77 100644
--- a/modules/hybride/calendar.py
+++ b/modules/hybride/calendar.py
@@ -21,15 +21,18 @@ from weboob.capabilities.calendar import BaseCalendarEvent, TRANSP, STATUS, CATE
class HybrideCalendarEvent(BaseCalendarEvent):
- def __init__(self, _id):
- BaseCalendarEvent.__init__(self, _id)
+
+ def __init__(self):
+ BaseCalendarEvent.__init__(self)
self.location = u'18 rue Gosselet'
- self.city = u'Lille'
self.sequence = 1
self.transp = TRANSP.TRANSPARENT
self.status = STATUS.CONFIRMED
- self.category = CATEGORIES.CINE
@classmethod
- def id2url(cls, _id):
- return 'http://www.lhybride.org/programme/item/%s.html' % _id
+ def get_city(cls):
+ return u'Lille'
+
+ @classmethod
+ def get_category(cls):
+ return CATEGORIES.CINE
diff --git a/modules/hybride/pages.py b/modules/hybride/pages.py
index f4a41000..71c28365 100644
--- a/modules/hybride/pages.py
+++ b/modules/hybride/pages.py
@@ -18,11 +18,15 @@
# along with weboob. If not, see .
from datetime import time, datetime
-from weboob.tools.browser import BasePage
from .calendar import HybrideCalendarEvent
+
import weboob.tools.date as date_util
import re
+from weboob.tools.browser2.page import HTMLPage, method, ItemElement, SkipItem, ListElement
+from weboob.tools.browser2.filters import Filter, Link, CleanText, Env
+
+
__all__ = ['ProgramPage', 'EventPage']
@@ -32,80 +36,115 @@ def format_date(date):
return date_util.parse_french_date(splitted_date)
-class ProgramPage(BasePage):
- def list_events(self, date_from, date_to=None, city=None, categories=None):
- divs = self.document.getroot().xpath("//div[@class='catItemView groupLeading']")
- for div in divs:
- if(self.is_event_in_valid_period(div, date_from, date_to)):
- event = self.create_event(div, city, categories)
- if event:
- yield event
-
- def create_event(self, div, city=None, categories=None):
- re_id = re.compile('/programme/item/(.*?).html', re.DOTALL)
- header = self.parser.select(div, "div[@class='catItemHeader']", 1, method='xpath')
- date = self.parser.select(header, "span[@class='catItemDateCreated']", 1, method='xpath')
- a_id = self.parser.select(header, "h3[@class='catItemTitle']/a", 1, method='xpath')
- _id = re_id.search(a_id.attrib['href']).group(1)
- if _id:
- event = HybrideCalendarEvent(_id)
- event.start_date = format_date(date.text)
- event.end_date = datetime.combine(event.start_date, time.max)
- event.summary = u'%s' % a_id.text_content().strip()
- if self.is_valid_event(event, city, categories):
- return event
-
- def is_valid_event(self, event, city, categories):
- if city and city != '' and city.upper() != event.city.upper():
- return False
-
- if categories and len(categories) > 0 and event.category not in categories:
- return False
-
- return True
-
- def is_event_in_valid_period(self, div, date_from, date_to=None):
- header = self.parser.select(div, "div[@class='catItemHeader']", 1, method='xpath')
- date = self.parser.select(header, "span[@class='catItemDateCreated']", 1, method='xpath')
- event_date = format_date(date.text)
- if event_date > date_from:
- if not date_to:
- return True
- else:
- if event_date < date_to:
- return True
- return False
+class Date(Filter):
+ def filter(self, text):
+ return format_date(text)
-class EventPage(BasePage):
- def get_event(self, url, event=None):
- if not event:
- re_id = re.compile('http://www.lhybride.org/programme/item/(.*?).html', re.DOTALL)
- event = HybrideCalendarEvent(re_id.search(url).group(1))
+class CombineDate(Filter):
+ def filter(sel, text):
+ return datetime.combine(format_date(text), time.max)
- event.url = url
- div = self.document.getroot().xpath("//div[@class='itemView']")[0]
- header = self.parser.select(div, "div[@class='itemHeader']", 1, method='xpath')
+class ProgramPage(HTMLPage):
- date = self.parser.select(header, "span[@class='itemDateCreated']", 1, method='xpath')
- event.start_date = format_date(date.text)
- event.end_date = datetime.combine(event.start_date, time.max)
+ date_from = None
+ date_to = None
+ city = None
+ categories = None
- summary = self.parser.select(header, "h2[@class='itemTitle']", 1, method='xpath')
- event.summary = u'%s' % summary.text_content().strip()
+ def set_filters(self, date_from, date_to, city, categories):
+ self.date_from = date_from
+ self.date_to = date_to
+ self.city = city
+ self.categories = categories
- description = ''
+ @method
+ class list_events(ListElement):
+ item_xpath = '//div[@class="catItemView groupLeading"]'
- description_intro = self.parser.select(div, "div[@class='itemBody']/div[@class='itemIntroText']/table/tbody/tr/td",
- method='xpath')
- if description_intro and len(description_intro) > 0:
- description += u'%s' % description_intro[0].text_content()
+ class item(ItemElement):
+ klass = HybrideCalendarEvent
- description_content = self.parser.select(div, "div[@class='itemBody']/div[@class='itemFullText']/table/tbody/tr/td",
- method='xpath')
- if description_content and len(description_content) > 0:
- description += u'%s' % description_content[0].text_content()
+ def condition(self):
+ return self.check_date() and self.check_city() and self.check_category()
- event.description = u'%s' % description
- return event
+ def check_date(self):
+ date = self.el.xpath("div[@class='catItemHeader']/span[@class='catItemDateCreated']")[0]
+ event_date = format_date(date.text)
+ if self.page.date_from and event_date >= self.page.date_from:
+ if not self.page.date_to:
+ return True
+ else:
+ if event_date <= self.page.date_to:
+ return True
+ return False
+
+ def check_city(self):
+ return (not self.page.city or (self.page.city and
+ self.page.city.upper() == HybrideCalendarEvent.get_city().upper())
+ )
+
+ def check_category(self):
+ return (not self.page.categories or HybrideCalendarEvent.get_category() in self.page.categories)
+
+ class CheckId(Filter):
+ def filter(self, a_id):
+ re_id = re.compile('/programme/item/(.*?).html', re.DOTALL)
+ _id = re_id.search(a_id).group(1)
+ if _id:
+ return _id
+ raise SkipItem()
+
+ obj_id = CheckId(Link('div[@class="catItemHeader"]/h3[@class="catItemTitle"]/a'))
+ obj_start_date = Date(CleanText('div[@class="catItemHeader"]/span[@class="catItemDateCreated"]'))
+ obj_end_date = CombineDate(CleanText('div[@class="catItemHeader"]/span[@class="catItemDateCreated"]'))
+ obj_summary = CleanText('div[@class="catItemHeader"]/h3[@class="catItemTitle"]/a')
+ obj_city = HybrideCalendarEvent.get_city()
+ obj_category = HybrideCalendarEvent.get_category()
+
+
+class EventPage(HTMLPage):
+
+ @method
+ class get_event(ItemElement):
+ klass = HybrideCalendarEvent
+
+ def parse(self, el):
+ div = el.xpath("//div[@class='itemView']")[0]
+
+ if self.obj.id:
+ event = self.obj
+ event.url = self.page.url
+ event.description = self.get_description(div)
+ raise SkipItem()
+
+ re_id = re.compile('http://www.lhybride.org/programme/item/(.*?)', re.DOTALL)
+ self.env['id'] = re_id.search(self.page.url).group(1)
+ self.env['url'] = self.page.url
+ self.env['description'] = self.get_description(div)
+
+ def get_description(self, div):
+ description = ''
+
+ description_intro = div.xpath("div[@class='itemBody']/div[@class='itemIntroText']/table/tbody/tr/td")
+
+ if description_intro and len(description_intro) > 0:
+ description += u'%s' % description_intro[0].text_content()
+
+ description_content = div.xpath("div[@class='itemBody']/div[@class='itemFullText']/table/tbody/tr/td")
+
+ if description_content and len(description_content) > 0:
+ description += u'%s' % description_content[0].text_content()
+
+ return u'%s' % description
+
+ obj_id = Env('id')
+ base = '//div[@class="itemView"]/div[@class="itemHeader"]'
+ obj_start_date = Date(CleanText('%s/span[@class="itemDateCreated"]' % base))
+ obj_end_date = CombineDate(CleanText('%s/span[@class="itemDateCreated"]' % base))
+ obj_summary = CleanText('%s/h2[@class="itemTitle"]' % base)
+ obj_city = HybrideCalendarEvent.get_city()
+ obj_category = HybrideCalendarEvent.get_category()
+ obj_url = Env('url')
+ obj_description = Env('description')