diff --git a/modules/sueurdemetal/__init__.py b/modules/sueurdemetal/__init__.py new file mode 100644 index 00000000..5fc9c355 --- /dev/null +++ b/modules/sueurdemetal/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import SueurDeMetalBackend + + +__all__ = ['SueurDeMetalBackend'] diff --git a/modules/sueurdemetal/backend.py b/modules/sueurdemetal/backend.py new file mode 100644 index 00000000..e6cc0e79 --- /dev/null +++ b/modules/sueurdemetal/backend.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.backend import BaseBackend +from weboob.capabilities.calendar import ICapCalendarEvent, BaseCalendarEvent, CATEGORIES, TRANSP, STATUS +import datetime + +from .browser import SueurDeMetalBrowser + + +__all__ = ['SueurDeMetalBackend'] + + +class Concert(BaseCalendarEvent): + @classmethod + def id2url(cls, _id): + return 'http://www.sueurdemetal.com/detail-concert-metal.php?c=%s' % _id + + +class SueurDeMetalBackend(BaseBackend, ICapCalendarEvent): + NAME = 'sueurdemetal' + DESCRIPTION = u'SueurDeMetal French concerts list website' + MAINTAINER = u'Vincent A' + EMAIL = 'dev@indigo.re' + LICENSE = 'AGPLv3+' + VERSION = '0.h' + + BROWSER = SueurDeMetalBrowser + + ASSOCIATED_CATEGORIES = [CATEGORIES.CONCERT] + + def __init__(self, *a, **kw): + super(SueurDeMetalBackend, self).__init__(*a, **kw) + self.cities = {} + + def search_events(self, query): + city_id = self.find_city_id(query.city) + for d in self.browser.get_concerts_city(city_id): + yield self._make_event(d) + + def get_event(self, _id): + d = self.browser.get_concert(_id) + return self._make_event(d) + + def _make_event(self, d): + event = Concert(d['id']) + event.category = CATEGORIES.CONCERT + event.start_date = d['date'] + event.end_date = datetime.datetime.combine(event.start_date.date(), datetime.time.max) + event.summary = d['summary'] + event.url = d['url'] + + if 'price' in d: + event.price = d['price'] + + if d['active']: + event.status = STATUS.CONFIRMED + else: + event.status = STATUS.CANCELLED + + if 'city' in d: + event.city = d['city'] + else: + event.city = self.find_city_name(d['city_id']) + event.transp = TRANSP.OPAQUE + + # "room, address" or "room" or "address" or "" + location = ', '.join(filter(None, (d.get('room', ''), d.get('address', '')))) + if location: + event.location = location + + return event + + def _fetch_cities(self): + if self.cities: + return + self.cities = self.browser.get_cities() + + def find_city_id(self, name): + self._fetch_cities() + name = name.lower() + for c in self.cities: + if c.lower() == name: + return self.cities[c]['id'] + + def find_city_name(self, _id): + self._fetch_cities() + for c in self.cities.values(): + if c['id'] == _id: + return c['name'] diff --git a/modules/sueurdemetal/browser.py b/modules/sueurdemetal/browser.py new file mode 100644 index 00000000..422b001d --- /dev/null +++ b/modules/sueurdemetal/browser.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser + +from .pages import PageCity, PageConcert, PageCityList + + +__all__ = ['SueurDeMetalBrowser'] + + +class SueurDeMetalBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'www.sueurdemetal.com' + ENCODING = 'iso-8859-15' + + PAGES = { + '%s://%s/ville-metal-.+.htm' % (PROTOCOL, DOMAIN): PageCity, + r'%s://%s/detail-concert-metal.php\?c=.+' % (PROTOCOL, DOMAIN): PageConcert, + '%s://%s/recherchemulti.php' % (PROTOCOL, DOMAIN): PageCityList, + } + + def get_concerts_city(self, city): + self.location('%s://%s/ville-metal-%s.htm' % (self.PROTOCOL, self.DOMAIN, city)) + assert self.is_on_page(PageCity) + return self.page.get_concerts() + + def get_concert(self, _id): + self.location('%s://%s/detail-concert-metal.php?c=%s' % (self.PROTOCOL, self.DOMAIN, _id)) + assert self.is_on_page(PageConcert) + return self.page.get_concert() + + def get_cities(self): + self.location('%s://%s/recherchemulti.php' % (self.PROTOCOL, self.DOMAIN)) + assert self.is_on_page(PageCityList) + return self.page.get_cities() diff --git a/modules/sueurdemetal/favicon.png b/modules/sueurdemetal/favicon.png new file mode 100644 index 00000000..a3c847ad Binary files /dev/null and b/modules/sueurdemetal/favicon.png differ diff --git a/modules/sueurdemetal/pages.py b/modules/sueurdemetal/pages.py new file mode 100644 index 00000000..985a6ec5 --- /dev/null +++ b/modules/sueurdemetal/pages.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BasePage +from weboob.tools.date import parse_french_date +import re + + +__all__ = ['PageCity', 'PageConcert', 'PageCityList'] + + +class PageWithConcerts(BasePage): + def extract_concert(self, concert_table): + d = {} + date_h3 = concert_table.iter('h3').next() + d['date'] = parse_french_date(date_h3.text) + + cancel_h2 = next(date_h3.itersiblings('h2'), None) + if cancel_h2 is not None and cancel_h2.text.startswith('ANNUL'): + d['active'] = False + else: + d['active'] = True + + performers_table = concert_table.iterdescendants('table').next() + d['performers'] = list(self.extract_performers(performers_table)) + d['summary'] = ' + '.join(p['name'] for p in d['performers']) + d['description'] = d['summary'] + + return d + + def extract_performers(self, performers_table): + for performer_tr in performers_table.findall('tr'): + performer_td = performer_tr.find('td') + d = {'name': performer_td.find('strong').text.strip(' \t\r\n+')} # handle '+ GUESTS' + rest = performer_td.tail + if rest: + d['genre'] = rest + yield d + + def extract_id_from_url(self, url): + return re.search(r'c=(\d+)', url).group(1) + + def extract_city_from_url(self, url): + re.search('metal-(.+).htm$', url).group(1) + + +class PageCity(PageWithConcerts): + def get_concerts(self): + for concert_table in self.document.xpath('//div[@id="centre-page"]//div/table'): + yield self.extract_concert(concert_table) + + def extract_concert(self, concert_table): + d = PageWithConcerts.extract_concert(self, concert_table) + infos_a = concert_table.xpath('.//a[starts-with(@href, "detail-concert-metal.php")]')[0] + d['id'] = self.extract_id_from_url(infos_a.get('href')) + d['url'] = 'http://www.sueurdemetal.com/detail-concert-metal.php?c=%s' % d['id'] + d['city_id'] = self.extract_city_from_url(self.url) + return d + + +class PageConcert(PageWithConcerts): + def get_concert(self): + concert_table = self.document.xpath('//div[@id="centre-page"]//div/table')[0] + d = self.extract_concert(concert_table) + d['id'] = self.extract_id_from_url(self.url) + d['url'] = self.url + + it = concert_table.iterdescendants('table') + it.next() # ignore performers table + infos_table = it.next() + self.infos_table = infos_table + info_trs = infos_table.findall('tr') + d['room'] = (info_trs[3].findall('td')[1].text or '').strip() + d['address'] = (info_trs[4].findall('td')[1].text or '').strip() + + price = self.parse_price(info_trs[5].findall('td')[1].text) + if price is not None: # "None" is different from "0€" + d['price'] = price + + city_a = self.document.xpath('//a[starts-with(@href, "ville-metal-")]')[0] + d['city_id'] = self.extract_city_from_url(city_a.get('href')) + d['city'] = city_a.text + return d + + def parse_price(self, s): + if not s: + return + parts = filter(None, re.split(r'[^\d.]+', s.strip())) + if not parts: + return + return float(parts[-1]) + + +class PageCityList(BasePage): + def get_cities(self): + cities = {} + for option in self.document.xpath('//select[@name="ville"]/option'): + v = option.get('value') + if not v: + continue + d = {} + d['code'], d['dept'] = re.search(r'ville-metal-(.*)-(\d+).htm$', v).groups() + d['id'] = '%s-%s' % (d['code'], d['dept']) + d['name'] = option.text.split('(')[0].strip() + + cities[d['name']] = d + return cities diff --git a/modules/sueurdemetal/test.py b/modules/sueurdemetal/test.py new file mode 100644 index 00000000..d2f1ce4b --- /dev/null +++ b/modules/sueurdemetal/test.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Vincent A +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +from weboob.capabilities.calendar import Query + +class SueurDeMetalTest(BackendTest): + BACKEND = 'sueurdemetal' + + def test_sueurdemetal_searchcity(self): + q = Query() + q.city = 'paris' + self.assertTrue(len(list(self.backend.search_events(q))) > 0) + + ev = self.backend.search_events(q).next() + self.assertTrue(self.backend.get_event(ev.id))