From bff3bd78456e11062b6c3b39519efcfc1d936794 Mon Sep 17 00:00:00 2001 From: Vincent A Date: Tue, 17 Dec 2013 17:38:35 +0100 Subject: [PATCH] sueurdemetal: implement list_events --- modules/sueurdemetal/backend.py | 4 +++ modules/sueurdemetal/browser.py | 13 +++++++++- modules/sueurdemetal/pages.py | 44 +++++++++++++++++++++++++++++---- 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/modules/sueurdemetal/backend.py b/modules/sueurdemetal/backend.py index e6cc0e79..7f381db9 100644 --- a/modules/sueurdemetal/backend.py +++ b/modules/sueurdemetal/backend.py @@ -50,6 +50,10 @@ class SueurDeMetalBackend(BaseBackend, ICapCalendarEvent): super(SueurDeMetalBackend, self).__init__(*a, **kw) self.cities = {} + def list_events(self, from_date, to_date=None): + for d in self.browser.get_concerts_date(from_date): + yield self._make_event(d) + def search_events(self, query): city_id = self.find_city_id(query.city) for d in self.browser.get_concerts_city(city_id): diff --git a/modules/sueurdemetal/browser.py b/modules/sueurdemetal/browser.py index 422b001d..ec7f2921 100644 --- a/modules/sueurdemetal/browser.py +++ b/modules/sueurdemetal/browser.py @@ -20,7 +20,7 @@ from weboob.tools.browser import BaseBrowser -from .pages import PageCity, PageConcert, PageCityList +from .pages import PageCity, PageConcert, PageCityList, PageDate, PageDates __all__ = ['SueurDeMetalBrowser'] @@ -35,6 +35,8 @@ class SueurDeMetalBrowser(BaseBrowser): '%s://%s/ville-metal-.+.htm' % (PROTOCOL, DOMAIN): PageCity, r'%s://%s/detail-concert-metal.php\?c=.+' % (PROTOCOL, DOMAIN): PageConcert, '%s://%s/recherchemulti.php' % (PROTOCOL, DOMAIN): PageCityList, + '%s://%s/liste-dates-concerts.php' % (PROTOCOL, DOMAIN): PageDates, + r'%s://%s/date-metal-.+.htm' % (PROTOCOL, DOMAIN): PageDate, } def get_concerts_city(self, city): @@ -42,6 +44,15 @@ class SueurDeMetalBrowser(BaseBrowser): assert self.is_on_page(PageCity) return self.page.get_concerts() + def get_concerts_date(self, date_from, date_end=None): + self.location('%s://%s/liste-dates-concerts.php' % (self.PROTOCOL, self.DOMAIN)) + assert self.is_on_page(PageDates) + for day in self.page.get_dates_filtered(date_from, date_end): + self.location(day['url']) + assert self.is_on_page(PageDate) + for data in self.page.get_concerts(): + yield data + def get_concert(self, _id): self.location('%s://%s/detail-concert-metal.php?c=%s' % (self.PROTOCOL, self.DOMAIN, _id)) assert self.is_on_page(PageConcert) diff --git a/modules/sueurdemetal/pages.py b/modules/sueurdemetal/pages.py index 985a6ec5..92d9f828 100644 --- a/modules/sueurdemetal/pages.py +++ b/modules/sueurdemetal/pages.py @@ -21,9 +21,10 @@ from weboob.tools.browser import BasePage from weboob.tools.date import parse_french_date import re +from urlparse import urljoin -__all__ = ['PageCity', 'PageConcert', 'PageCityList'] +__all__ = ['PageCity', 'PageConcert', 'PageCityList', 'PageDate', 'PageDates'] class PageWithConcerts(BasePage): @@ -58,7 +59,13 @@ class PageWithConcerts(BasePage): return re.search(r'c=(\d+)', url).group(1) def extract_city_from_url(self, url): - re.search('metal-(.+).htm$', url).group(1) + return re.search('metal-(.+).htm$', url).group(1) + + def extract_concert_link(self, concert_table, d): + infos_a = concert_table.xpath('.//a[starts-with(@href, "detail-concert-metal.php")]')[0] + infos_a = concert_table.xpath('.//a[starts-with(@href, "detail-concert-metal.php")]')[0] + d['id'] = self.extract_id_from_url(infos_a.get('href')) + d['url'] = 'http://www.sueurdemetal.com/detail-concert-metal.php?c=%s' % d['id'] class PageCity(PageWithConcerts): @@ -68,13 +75,24 @@ class PageCity(PageWithConcerts): def extract_concert(self, concert_table): d = PageWithConcerts.extract_concert(self, concert_table) - infos_a = concert_table.xpath('.//a[starts-with(@href, "detail-concert-metal.php")]')[0] - d['id'] = self.extract_id_from_url(infos_a.get('href')) - d['url'] = 'http://www.sueurdemetal.com/detail-concert-metal.php?c=%s' % d['id'] + self.extract_concert_link(concert_table, d) d['city_id'] = self.extract_city_from_url(self.url) return d +class PageDate(PageWithConcerts): + def get_concerts(self): + for concert_table in self.document.xpath('//div[@id="centre-page"]//div/table'): + yield self.extract_concert(concert_table) + + def extract_concert(self, concert_table): + d = PageWithConcerts.extract_concert(self, concert_table) + self.extract_concert_link(concert_table, d) + city_a = concert_table.xpath('.//a[starts-with(@href, "ville-metal-")]')[0] + d['city_id'] = self.extract_city_from_url(city_a.get('href')) + return d + + class PageConcert(PageWithConcerts): def get_concert(self): concert_table = self.document.xpath('//div[@id="centre-page"]//div/table')[0] @@ -122,3 +140,19 @@ class PageCityList(BasePage): cities[d['name']] = d return cities + + +class PageDates(BasePage): + def get_dates(self): + for a in self.document.xpath('//div[@id="dateconcerts"]//a'): + d = {} + d['date'] = parse_french_date(a.text.strip()) + d['url'] = urljoin(self.url, a.get('href')) + yield d + + def get_dates_filtered(self, date_from=None, date_end=None): + for d in self.get_dates(): + date = d['date'] + if (not date_from or date_from <= date) and \ + (not date_end or date <= date_end): + yield d