From f2bfe01178b62f86fdad9fcf5cc0ecd1ad28ee24 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Wed, 30 Oct 2013 19:28:21 +0100 Subject: [PATCH] [Hybride] add a new module to manage www.lhybride.org --- modules/hybride/__init__.py | 24 ++++++++++ modules/hybride/backend.py | 51 ++++++++++++++++++++ modules/hybride/browser.py | 48 +++++++++++++++++++ modules/hybride/calendar.py | 33 +++++++++++++ modules/hybride/pages.py | 92 +++++++++++++++++++++++++++++++++++++ modules/hybride/test.py | 32 +++++++++++++ 6 files changed, 280 insertions(+) create mode 100644 modules/hybride/__init__.py create mode 100644 modules/hybride/backend.py create mode 100644 modules/hybride/browser.py create mode 100644 modules/hybride/calendar.py create mode 100644 modules/hybride/pages.py create mode 100644 modules/hybride/test.py diff --git a/modules/hybride/__init__.py b/modules/hybride/__init__.py new file mode 100644 index 00000000..07b8188e --- /dev/null +++ b/modules/hybride/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import HybrideBackend + + +__all__ = ['HybrideBackend'] diff --git a/modules/hybride/backend.py b/modules/hybride/backend.py new file mode 100644 index 00000000..db7f5ae0 --- /dev/null +++ b/modules/hybride/backend.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import BaseBackend +from weboob.capabilities.calendar import ICapCalendarEvent + +from .browser import HybrideBrowser +from .calendar import HybrideCalendarEvent + +__all__ = ['HybrideBackend'] + + +class HybrideBackend(BaseBackend, ICapCalendarEvent): + NAME = 'hybride' + DESCRIPTION = u'hybride website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '0.h' + + BROWSER = HybrideBrowser + + def list_events(self, date_from, date_to=None): + with self.browser: + return self.browser.list_events(date_from, date_to) + + def get_event(self, _id, event=None): + with self.browser: + return self.browser.get_event(_id, event) + + def fill_obj(self, event, fields): + self.get_event(event.id, event) + + OBJECTS = {HybrideCalendarEvent: fill_obj} diff --git a/modules/hybride/browser.py b/modules/hybride/browser.py new file mode 100644 index 00000000..ed79d615 --- /dev/null +++ b/modules/hybride/browser.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser.decorators import id2url +from weboob.tools.browser import BaseBrowser +from .calendar import HybrideCalendarEvent +from .pages import ProgramPage, EventPage + + +__all__ = ['HybrideBrowser'] + + +class HybrideBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'www.lhybride.org' + ENCODING = None + + PAGES = { + '%s://%s/programme.html' % (PROTOCOL, DOMAIN): ProgramPage, + '%s://%s/programme/item/(.*?)' % (PROTOCOL, DOMAIN): EventPage, + } + + def list_events(self, date_from, date_to=None): + self.location('%s://%s/programme.html' % (self.PROTOCOL, self.DOMAIN)) + assert self.is_on_page(ProgramPage) + return self.page.list_events(date_from, date_to) + + @id2url(HybrideCalendarEvent.id2url) + def get_event(self, url, event=None): + self.location(url) + assert self.is_on_page(EventPage) + return self.page.get_event(url, event) diff --git a/modules/hybride/calendar.py b/modules/hybride/calendar.py new file mode 100644 index 00000000..c9b9c14c --- /dev/null +++ b/modules/hybride/calendar.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.calendar import BaseCalendarEvent, TRANSP, STATUS, CATEGORIES + + +class HybrideCalendarEvent(BaseCalendarEvent): + + location = '18 rue Gosselet 59014 LILLE CEDEX' + sequence = 1 + transp = TRANSP.TRANSPARENT + status = STATUS.CONFIRMED + category = CATEGORIES.CINE + + @classmethod + def id2url(cls, _id): + return 'http://www.lhybride.org/programme/item/%s.html' % _id diff --git a/modules/hybride/pages.py b/modules/hybride/pages.py new file mode 100644 index 00000000..43a2baea --- /dev/null +++ b/modules/hybride/pages.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from datetime import time, datetime +from weboob.tools.browser import BasePage +from .calendar import HybrideCalendarEvent +import weboob.tools.date as date_util +import re + +__all__ = ['ProgramPage', 'EventPage'] + + +def format_date(date): + splitted_date = date.split(',')[1] + if splitted_date: + return date_util.parse_french_date(splitted_date) + + +class ProgramPage(BasePage): + def list_events(self, date_from, date_to=None): + divs = self.document.getroot().xpath("//div[@class='catItemView groupLeading']") + for div in divs: + if(self.is_event_in_valid_period(div, date_from, date_to)): + event = self.create_event(div) + if event: + yield event + + def create_event(self, div): + re_id = re.compile('/programme/item/(.*?).html', re.DOTALL) + header = self.parser.select(div, "div[@class='catItemHeader']", 1, method='xpath') + date = self.parser.select(header, "span[@class='catItemDateCreated']", 1, method='xpath') + a_id = self.parser.select(header, "h3[@class='catItemTitle']/a", 1, method='xpath') + _id = re_id.search(a_id.attrib['href']).group(1) + if _id: + event = HybrideCalendarEvent(_id) + event.start_date = format_date(date.text) + event.end_date = datetime.combine(event.start_date, time.max) + event.summary = u'%s' % a_id.text_content().strip() + return event + + def is_event_in_valid_period(self, div, date_from, date_to=None): + header = self.parser.select(div, "div[@class='catItemHeader']", 1, method='xpath') + date = self.parser.select(header, "span[@class='catItemDateCreated']", 1, method='xpath') + event_date = format_date(date.text) + if event_date > date_from: + if not date_to: + return True + else: + if event_date < date_to: + return True + return False + + +class EventPage(BasePage): + def get_event(self, url, event=None): + if not event: + re_id = re.compile('http://www.lhybride.org/programme/item/(.*?).html', re.DOTALL) + event = HybrideCalendarEvent(re_id.search(url).group(1)) + + event.url = url + + div = self.document.getroot().xpath("//div[@class='itemView']")[0] + header = self.parser.select(div, "div[@class='itemHeader']", 1, method='xpath') + + date = self.parser.select(header, "span[@class='itemDateCreated']", 1, method='xpath') + event.start_date = format_date(date.text) + event.end_date = datetime.combine(event.start_date, time.max) + + summary = self.parser.select(header, "h2[@class='itemTitle']", 1, method='xpath') + event.summary = u'%s' % summary.text_content().strip() + + table_description = self.parser.select(div, "div[@class='itemBody']/div[@class='itemFullText']/table/tbody/tr/td", + 1, method='xpath') + + event.description = u'%s' % table_description.text_content() + return event diff --git a/modules/hybride/test.py b/modules/hybride/test.py new file mode 100644 index 00000000..14a70e61 --- /dev/null +++ b/modules/hybride/test.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +from datetime import datetime + +class HybrideTest(BackendTest): + BACKEND = 'hybride' + + def test_hybride_list(self): + l = list(self.backend.list_events(datetime.now())) + assert len(l) + event = self.backend.get_event(l[0].id, None) + self.assertTrue(event.url, 'URL for event "%s" not found: %s' % (event.id, event.url)) +