new calendar module: sueurdemetal
This commit is contained in:
parent
299c0369e6
commit
47d4105d8e
6 changed files with 341 additions and 0 deletions
24
modules/sueurdemetal/__init__.py
Normal file
24
modules/sueurdemetal/__init__.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2013 Vincent A
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from .backend import SueurDeMetalBackend
|
||||
|
||||
|
||||
__all__ = ['SueurDeMetalBackend']
|
||||
107
modules/sueurdemetal/backend.py
Normal file
107
modules/sueurdemetal/backend.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2013 Vincent A
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.backend import BaseBackend
|
||||
from weboob.capabilities.calendar import ICapCalendarEvent, BaseCalendarEvent, CATEGORIES, TRANSP, STATUS
|
||||
import datetime
|
||||
|
||||
from .browser import SueurDeMetalBrowser
|
||||
|
||||
|
||||
__all__ = ['SueurDeMetalBackend']
|
||||
|
||||
|
||||
class Concert(BaseCalendarEvent):
|
||||
@classmethod
|
||||
def id2url(cls, _id):
|
||||
return 'http://www.sueurdemetal.com/detail-concert-metal.php?c=%s' % _id
|
||||
|
||||
|
||||
class SueurDeMetalBackend(BaseBackend, ICapCalendarEvent):
|
||||
NAME = 'sueurdemetal'
|
||||
DESCRIPTION = u'SueurDeMetal French concerts list website'
|
||||
MAINTAINER = u'Vincent A'
|
||||
EMAIL = 'dev@indigo.re'
|
||||
LICENSE = 'AGPLv3+'
|
||||
VERSION = '0.h'
|
||||
|
||||
BROWSER = SueurDeMetalBrowser
|
||||
|
||||
ASSOCIATED_CATEGORIES = [CATEGORIES.CONCERT]
|
||||
|
||||
def __init__(self, *a, **kw):
|
||||
super(SueurDeMetalBackend, self).__init__(*a, **kw)
|
||||
self.cities = {}
|
||||
|
||||
def search_events(self, query):
|
||||
city_id = self.find_city_id(query.city)
|
||||
for d in self.browser.get_concerts_city(city_id):
|
||||
yield self._make_event(d)
|
||||
|
||||
def get_event(self, _id):
|
||||
d = self.browser.get_concert(_id)
|
||||
return self._make_event(d)
|
||||
|
||||
def _make_event(self, d):
|
||||
event = Concert(d['id'])
|
||||
event.category = CATEGORIES.CONCERT
|
||||
event.start_date = d['date']
|
||||
event.end_date = datetime.datetime.combine(event.start_date.date(), datetime.time.max)
|
||||
event.summary = d['summary']
|
||||
event.url = d['url']
|
||||
|
||||
if 'price' in d:
|
||||
event.price = d['price']
|
||||
|
||||
if d['active']:
|
||||
event.status = STATUS.CONFIRMED
|
||||
else:
|
||||
event.status = STATUS.CANCELLED
|
||||
|
||||
if 'city' in d:
|
||||
event.city = d['city']
|
||||
else:
|
||||
event.city = self.find_city_name(d['city_id'])
|
||||
event.transp = TRANSP.OPAQUE
|
||||
|
||||
# "room, address" or "room" or "address" or ""
|
||||
location = ', '.join(filter(None, (d.get('room', ''), d.get('address', ''))))
|
||||
if location:
|
||||
event.location = location
|
||||
|
||||
return event
|
||||
|
||||
def _fetch_cities(self):
|
||||
if self.cities:
|
||||
return
|
||||
self.cities = self.browser.get_cities()
|
||||
|
||||
def find_city_id(self, name):
|
||||
self._fetch_cities()
|
||||
name = name.lower()
|
||||
for c in self.cities:
|
||||
if c.lower() == name:
|
||||
return self.cities[c]['id']
|
||||
|
||||
def find_city_name(self, _id):
|
||||
self._fetch_cities()
|
||||
for c in self.cities.values():
|
||||
if c['id'] == _id:
|
||||
return c['name']
|
||||
53
modules/sueurdemetal/browser.py
Normal file
53
modules/sueurdemetal/browser.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2013 Vincent A
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BaseBrowser
|
||||
|
||||
from .pages import PageCity, PageConcert, PageCityList
|
||||
|
||||
|
||||
__all__ = ['SueurDeMetalBrowser']
|
||||
|
||||
|
||||
class SueurDeMetalBrowser(BaseBrowser):
|
||||
PROTOCOL = 'http'
|
||||
DOMAIN = 'www.sueurdemetal.com'
|
||||
ENCODING = 'iso-8859-15'
|
||||
|
||||
PAGES = {
|
||||
'%s://%s/ville-metal-.+.htm' % (PROTOCOL, DOMAIN): PageCity,
|
||||
r'%s://%s/detail-concert-metal.php\?c=.+' % (PROTOCOL, DOMAIN): PageConcert,
|
||||
'%s://%s/recherchemulti.php' % (PROTOCOL, DOMAIN): PageCityList,
|
||||
}
|
||||
|
||||
def get_concerts_city(self, city):
|
||||
self.location('%s://%s/ville-metal-%s.htm' % (self.PROTOCOL, self.DOMAIN, city))
|
||||
assert self.is_on_page(PageCity)
|
||||
return self.page.get_concerts()
|
||||
|
||||
def get_concert(self, _id):
|
||||
self.location('%s://%s/detail-concert-metal.php?c=%s' % (self.PROTOCOL, self.DOMAIN, _id))
|
||||
assert self.is_on_page(PageConcert)
|
||||
return self.page.get_concert()
|
||||
|
||||
def get_cities(self):
|
||||
self.location('%s://%s/recherchemulti.php' % (self.PROTOCOL, self.DOMAIN))
|
||||
assert self.is_on_page(PageCityList)
|
||||
return self.page.get_cities()
|
||||
BIN
modules/sueurdemetal/favicon.png
Normal file
BIN
modules/sueurdemetal/favicon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 893 B |
124
modules/sueurdemetal/pages.py
Normal file
124
modules/sueurdemetal/pages.py
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2013 Vincent A
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.tools.date import parse_french_date
|
||||
import re
|
||||
|
||||
|
||||
__all__ = ['PageCity', 'PageConcert', 'PageCityList']
|
||||
|
||||
|
||||
class PageWithConcerts(BasePage):
|
||||
def extract_concert(self, concert_table):
|
||||
d = {}
|
||||
date_h3 = concert_table.iter('h3').next()
|
||||
d['date'] = parse_french_date(date_h3.text)
|
||||
|
||||
cancel_h2 = next(date_h3.itersiblings('h2'), None)
|
||||
if cancel_h2 is not None and cancel_h2.text.startswith('ANNUL'):
|
||||
d['active'] = False
|
||||
else:
|
||||
d['active'] = True
|
||||
|
||||
performers_table = concert_table.iterdescendants('table').next()
|
||||
d['performers'] = list(self.extract_performers(performers_table))
|
||||
d['summary'] = ' + '.join(p['name'] for p in d['performers'])
|
||||
d['description'] = d['summary']
|
||||
|
||||
return d
|
||||
|
||||
def extract_performers(self, performers_table):
|
||||
for performer_tr in performers_table.findall('tr'):
|
||||
performer_td = performer_tr.find('td')
|
||||
d = {'name': performer_td.find('strong').text.strip(' \t\r\n+')} # handle '+ GUESTS'
|
||||
rest = performer_td.tail
|
||||
if rest:
|
||||
d['genre'] = rest
|
||||
yield d
|
||||
|
||||
def extract_id_from_url(self, url):
|
||||
return re.search(r'c=(\d+)', url).group(1)
|
||||
|
||||
def extract_city_from_url(self, url):
|
||||
re.search('metal-(.+).htm$', url).group(1)
|
||||
|
||||
|
||||
class PageCity(PageWithConcerts):
|
||||
def get_concerts(self):
|
||||
for concert_table in self.document.xpath('//div[@id="centre-page"]//div/table'):
|
||||
yield self.extract_concert(concert_table)
|
||||
|
||||
def extract_concert(self, concert_table):
|
||||
d = PageWithConcerts.extract_concert(self, concert_table)
|
||||
infos_a = concert_table.xpath('.//a[starts-with(@href, "detail-concert-metal.php")]')[0]
|
||||
d['id'] = self.extract_id_from_url(infos_a.get('href'))
|
||||
d['url'] = 'http://www.sueurdemetal.com/detail-concert-metal.php?c=%s' % d['id']
|
||||
d['city_id'] = self.extract_city_from_url(self.url)
|
||||
return d
|
||||
|
||||
|
||||
class PageConcert(PageWithConcerts):
|
||||
def get_concert(self):
|
||||
concert_table = self.document.xpath('//div[@id="centre-page"]//div/table')[0]
|
||||
d = self.extract_concert(concert_table)
|
||||
d['id'] = self.extract_id_from_url(self.url)
|
||||
d['url'] = self.url
|
||||
|
||||
it = concert_table.iterdescendants('table')
|
||||
it.next() # ignore performers table
|
||||
infos_table = it.next()
|
||||
self.infos_table = infos_table
|
||||
info_trs = infos_table.findall('tr')
|
||||
d['room'] = (info_trs[3].findall('td')[1].text or '').strip()
|
||||
d['address'] = (info_trs[4].findall('td')[1].text or '').strip()
|
||||
|
||||
price = self.parse_price(info_trs[5].findall('td')[1].text)
|
||||
if price is not None: # "None" is different from "0€"
|
||||
d['price'] = price
|
||||
|
||||
city_a = self.document.xpath('//a[starts-with(@href, "ville-metal-")]')[0]
|
||||
d['city_id'] = self.extract_city_from_url(city_a.get('href'))
|
||||
d['city'] = city_a.text
|
||||
return d
|
||||
|
||||
def parse_price(self, s):
|
||||
if not s:
|
||||
return
|
||||
parts = filter(None, re.split(r'[^\d.]+', s.strip()))
|
||||
if not parts:
|
||||
return
|
||||
return float(parts[-1])
|
||||
|
||||
|
||||
class PageCityList(BasePage):
|
||||
def get_cities(self):
|
||||
cities = {}
|
||||
for option in self.document.xpath('//select[@name="ville"]/option'):
|
||||
v = option.get('value')
|
||||
if not v:
|
||||
continue
|
||||
d = {}
|
||||
d['code'], d['dept'] = re.search(r'ville-metal-(.*)-(\d+).htm$', v).groups()
|
||||
d['id'] = '%s-%s' % (d['code'], d['dept'])
|
||||
d['name'] = option.text.split('(')[0].strip()
|
||||
|
||||
cities[d['name']] = d
|
||||
return cities
|
||||
33
modules/sueurdemetal/test.py
Normal file
33
modules/sueurdemetal/test.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2013 Vincent A
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.test import BackendTest
|
||||
from weboob.capabilities.calendar import Query
|
||||
|
||||
class SueurDeMetalTest(BackendTest):
|
||||
BACKEND = 'sueurdemetal'
|
||||
|
||||
def test_sueurdemetal_searchcity(self):
|
||||
q = Query()
|
||||
q.city = 'paris'
|
||||
self.assertTrue(len(list(self.backend.search_events(q))) > 0)
|
||||
|
||||
ev = self.backend.search_events(q).next()
|
||||
self.assertTrue(self.backend.get_event(ev.id))
|
||||
Loading…
Add table
Add a link
Reference in a new issue