new calendar module: pariskiwi
This commit is contained in:
parent
4b04e7af97
commit
299c0369e6
6 changed files with 331 additions and 0 deletions
24
modules/pariskiwi/__init__.py
Normal file
24
modules/pariskiwi/__init__.py
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2013 Vincent A
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from .backend import ParisKiwiBackend
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['ParisKiwiBackend']
|
||||||
91
modules/pariskiwi/backend.py
Normal file
91
modules/pariskiwi/backend.py
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2013 Vincent A
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from weboob.tools.backend import BaseBackend
|
||||||
|
from weboob.capabilities.calendar import ICapCalendarEvent, BaseCalendarEvent, CATEGORIES, TRANSP, STATUS
|
||||||
|
from datetime import datetime, time
|
||||||
|
|
||||||
|
from .browser import ParisKiwiBrowser
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['ParisKiwiBackend']
|
||||||
|
|
||||||
|
|
||||||
|
class ParisKiwiBackend(BaseBackend, ICapCalendarEvent):
|
||||||
|
NAME = 'pariskiwi'
|
||||||
|
DESCRIPTION = u'ParisKiwi website'
|
||||||
|
MAINTAINER = u'Vincent A'
|
||||||
|
EMAIL = 'dev@indigo.re'
|
||||||
|
LICENSE = 'AGPLv3+'
|
||||||
|
VERSION = '0.h'
|
||||||
|
|
||||||
|
BROWSER = ParisKiwiBrowser
|
||||||
|
|
||||||
|
ASSOCIATED_CATEGORIES = [CATEGORIES.CONCERT]
|
||||||
|
|
||||||
|
def search_events(self, query):
|
||||||
|
if self.has_matching_categories(query):
|
||||||
|
return self.list_events(query.start_date, query.end_date or None)
|
||||||
|
|
||||||
|
def list_events(self, date_from, date_to=None):
|
||||||
|
for d in self.browser.list_events_all():
|
||||||
|
if self.matches_date(d, date_from, date_to):
|
||||||
|
event = self.get_event(d['id'])
|
||||||
|
if event is not None:
|
||||||
|
yield event
|
||||||
|
|
||||||
|
def get_event(self, _id):
|
||||||
|
d = self.browser.get_event(_id)
|
||||||
|
if not d:
|
||||||
|
return None
|
||||||
|
return self._make_event(d)
|
||||||
|
|
||||||
|
def _make_event(self, d):
|
||||||
|
event = BaseCalendarEvent(d['id'])
|
||||||
|
event.city = u'Paris'
|
||||||
|
event.url = d['url']
|
||||||
|
event.start_date = d['datetime']
|
||||||
|
event.end_date = datetime.combine(d['datetime'].date(), time.max)
|
||||||
|
event.summary = d['summary']
|
||||||
|
event.category = CATEGORIES.CONCERT
|
||||||
|
event.description = d['description']
|
||||||
|
event.status = STATUS.CONFIRMED
|
||||||
|
event.transp = TRANSP.OPAQUE
|
||||||
|
if 'price' in d:
|
||||||
|
event.price = d['price']
|
||||||
|
if 'address' in d:
|
||||||
|
event.location = d['address']
|
||||||
|
return event
|
||||||
|
|
||||||
|
def _make_false_event(self):
|
||||||
|
event = BaseCalendarEvent('0')
|
||||||
|
event.start_date = event.end_date = datetime.utcfromtimestamp(0)
|
||||||
|
event.summary = u'NON EXISTING EVENT'
|
||||||
|
event.status = STATUS.CANCELLED
|
||||||
|
event.category = CATEGORIES.CONCERT
|
||||||
|
event.transp = TRANSP.OPAQUE
|
||||||
|
return event
|
||||||
|
|
||||||
|
def matches_date(self, d, date_from, date_to):
|
||||||
|
if date_from and d['date'] < date_from:
|
||||||
|
return False
|
||||||
|
if date_to and d['date'] > date_from:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
52
modules/pariskiwi/browser.py
Normal file
52
modules/pariskiwi/browser.py
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2013 Vincent A
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from weboob.tools.browser import BaseBrowser
|
||||||
|
|
||||||
|
from .pages import PageList, PageList2, PageEvent
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['ParisKiwiBrowser']
|
||||||
|
|
||||||
|
|
||||||
|
class ParisKiwiBrowser(BaseBrowser):
|
||||||
|
PROTOCOL = 'http'
|
||||||
|
DOMAIN = 'pariskiwi.org'
|
||||||
|
ENCODING = 'utf-8'
|
||||||
|
|
||||||
|
PAGES = {
|
||||||
|
'http://pariskiwi.org/~parislagrise/mediawiki/index.php/Agenda': PageList,
|
||||||
|
'http://pariskiwi.org/~parislagrise/mediawiki/index.php/Agenda/Detruire_Ennui_Paris/.+': PageEvent,
|
||||||
|
r'http://pariskiwi.org/~parislagrise/mediawiki/api.php\?action=query&list=allpages.*': PageList2,
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, *a, **kw):
|
||||||
|
kw['parser'] = 'raw'
|
||||||
|
BaseBrowser.__init__(self, *a, **kw)
|
||||||
|
|
||||||
|
def list_events_all(self):
|
||||||
|
self.location('http://pariskiwi.org/~parislagrise/mediawiki/api.php?action=query&list=allpages&apprefix=Agenda%2FDetruire_Ennui_Paris&aplimit=500&format=json')
|
||||||
|
assert self.is_on_page(PageList2)
|
||||||
|
return self.page.list_events()
|
||||||
|
|
||||||
|
def get_event(self, _id):
|
||||||
|
self.location('http://pariskiwi.org/~parislagrise/mediawiki/index.php/Agenda/Detruire_Ennui_Paris/%s' % _id)
|
||||||
|
assert self.is_on_page(PageEvent)
|
||||||
|
return self.page.get_event()
|
||||||
BIN
modules/pariskiwi/favicon.png
Normal file
BIN
modules/pariskiwi/favicon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 924 B |
123
modules/pariskiwi/pages.py
Normal file
123
modules/pariskiwi/pages.py
Normal file
|
|
@ -0,0 +1,123 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2013 Vincent A
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from weboob.tools.browser import BasePage
|
||||||
|
from datetime import datetime, time
|
||||||
|
import json
|
||||||
|
import lxml.html
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['PageList', 'PageEvent', 'PageList2']
|
||||||
|
|
||||||
|
|
||||||
|
def date_from_id(_id):
|
||||||
|
textdate = _id.split('_')[0]
|
||||||
|
return datetime.strptime(textdate, '%m-%d-%Y')
|
||||||
|
|
||||||
|
def id_from_path(title):
|
||||||
|
return title.replace(' ', '_').split('/')[-1]
|
||||||
|
|
||||||
|
def combine(dt, t):
|
||||||
|
return datetime(dt.year, dt.month, dt.day, t.hour, t.minute)
|
||||||
|
|
||||||
|
class PageList(BasePage):
|
||||||
|
def get_events(self):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
|
class PageList2(BasePage):
|
||||||
|
def list_events(self):
|
||||||
|
events = list(self.unsorted_list())
|
||||||
|
events.sort(key=lambda d: (d['date'], d['id']))
|
||||||
|
return events
|
||||||
|
|
||||||
|
def unsorted_list(self):
|
||||||
|
# TODO paginate when there are >500 events
|
||||||
|
for jpage in json.loads(self.document)['query']['allpages']:
|
||||||
|
d = {}
|
||||||
|
d['id'] = id_from_path(jpage['title'])
|
||||||
|
d['date'] = date_from_id(d['id'])
|
||||||
|
yield d
|
||||||
|
|
||||||
|
|
||||||
|
class PageEvent(BasePage):
|
||||||
|
def get_event(self):
|
||||||
|
d = {}
|
||||||
|
d['id'] = id_from_path(self.url)
|
||||||
|
d['date'] = date_from_id(d['id'])
|
||||||
|
d['datetime'] = date_from_id(d['id'])
|
||||||
|
d['url'] = self.url
|
||||||
|
|
||||||
|
html = lxml.html.fromstring(self.document)
|
||||||
|
for div in html.iter('div'):
|
||||||
|
if div.get('id') == 'bodyContent':
|
||||||
|
break
|
||||||
|
|
||||||
|
tags = [t for t in div if not callable(t.tag) and not t.get('id') and 'footer' not in t.get('class', '')]
|
||||||
|
parts = [t.text_content().strip().replace('\n', ' ') for t in tags]
|
||||||
|
description = '\n'.join(parts)
|
||||||
|
summary = description.split('\n', 1)[0]
|
||||||
|
|
||||||
|
self.div = div
|
||||||
|
if not summary:
|
||||||
|
return None
|
||||||
|
|
||||||
|
d['summary'] = summary
|
||||||
|
d['description'] = description
|
||||||
|
|
||||||
|
for n, p in enumerate(parts):
|
||||||
|
match = re.search(r'\b(\d\d?)h(\d\d)?\b', p)
|
||||||
|
if match:
|
||||||
|
d['hour'] = time(int(match.group(1)), int(match.group(2) or '0'))
|
||||||
|
d['datetime'] = combine(d['date'], d['hour'])
|
||||||
|
parts[n] = p[:match.start(0)] + p[match.end(0):]
|
||||||
|
break
|
||||||
|
|
||||||
|
for n, p in enumerate(parts):
|
||||||
|
match = re.search(ur'\b(\d+([,.]\d+)?)\s*(euros\b|euro\b|€)', p)
|
||||||
|
if match:
|
||||||
|
d['price'] = float(match.group(1).replace(',', '.'))
|
||||||
|
parts[n] = p[:match.start(0)] + p[match.end(0):]
|
||||||
|
break
|
||||||
|
|
||||||
|
address = []
|
||||||
|
for n, p in enumerate(parts):
|
||||||
|
match = re.search(r'\d+[\s,]+(rue|boulevard|avenue)\s+.+', p, re.I)
|
||||||
|
if match:
|
||||||
|
address.append(match.group(0))
|
||||||
|
p = parts[n] = p[:match.start(0)] + p[match.end(0):]
|
||||||
|
match = re.search(r'\b(75|92|93|94|78|77|95|91)\d\d\d\b.*', p)
|
||||||
|
if match:
|
||||||
|
address.append(match.group(0))
|
||||||
|
p = parts[n] = p[:match.start(0)] + p[match.end(0):]
|
||||||
|
match = re.search(r'\b(m.tro|rer)\b.*', p, re.I)
|
||||||
|
if match:
|
||||||
|
address.append(match.group(0))
|
||||||
|
p = parts[n] = p[:match.start(0)] + p[match.end(0):]
|
||||||
|
match = re.search(r'@\s+\w+(\s+[^.]+.*)?', p) # refuse '@foo' or '@ foo . plop'
|
||||||
|
if match:
|
||||||
|
address.append(match.group(0))
|
||||||
|
p = parts[n] = p[:match.start(0)] + p[match.end(0):]
|
||||||
|
|
||||||
|
if address:
|
||||||
|
d['address'] = ' '.join(address)
|
||||||
|
|
||||||
|
return d
|
||||||
41
modules/pariskiwi/test.py
Normal file
41
modules/pariskiwi/test.py
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2013 Vincent A
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from weboob.tools.test import BackendTest
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class ParisKiwiTest(BackendTest):
|
||||||
|
BACKEND = 'pariskiwi'
|
||||||
|
|
||||||
|
def test_pariskiwi_event(self):
|
||||||
|
event = self.backend.get_event('11-9-2013_-Event_2')
|
||||||
|
assert event
|
||||||
|
assert event.location
|
||||||
|
assert event.price
|
||||||
|
assert event.summary
|
||||||
|
assert event.url == 'http://pariskiwi.org/~parislagrise/mediawiki/index.php/Agenda/Detruire_Ennui_Paris/11-9-2013_-Event_2'
|
||||||
|
assert event.start_date == datetime(2013, 11, 9, 20, 30)
|
||||||
|
|
||||||
|
def test_pariskiwi_list(self):
|
||||||
|
it = self.backend.list_events(datetime.now())
|
||||||
|
ev = it.next()
|
||||||
|
assert ev is not None
|
||||||
|
assert ev.start_date >= datetime.now()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue