[biplan] adapt to browser2

This commit is contained in:
Bezleputh 2014-03-27 00:16:46 +01:00
commit 7c9e47d3fb
4 changed files with 137 additions and 186 deletions

View file

@ -40,33 +40,29 @@ class BiplanBackend(BaseBackend, ICapCalendarEvent):
def search_events(self, query): def search_events(self, query):
if self.has_matching_categories(query): if self.has_matching_categories(query):
with self.browser: theatre_events = []
theatre_events = [] concert_events = []
concert_events = [] if CATEGORIES.CONCERT in query.categories:
if CATEGORIES.CONCERT in query.categories: concert_events = self.browser.list_events_concert(query.start_date,
concert_events = self.browser.list_events_concert(query.start_date, query.end_date,
query.end_date, query.city,
query.city, query.categories)
query.categories) if CATEGORIES.THEATRE in query.categories:
if CATEGORIES.THEATRE in query.categories: theatre_events = self.browser.list_events_theatre(query.start_date,
theatre_events = self.browser.list_events_theatre(query.start_date, query.end_date,
query.end_date, query.city,
query.city, query.categories)
query.categories)
return itertools.chain(concert_events, theatre_events) return itertools.chain(concert_events, theatre_events)
def list_events(self, date_from, date_to=None): def list_events(self, date_from, date_to=None):
with self.browser: return itertools.chain(self.browser.list_events_concert(date_from, date_to),
return itertools.chain(self.browser.list_events_concert(date_from, date_to), self.browser.list_events_theatre(date_from, date_to))
self.browser.list_events_theatre(date_from, date_to))
def get_event(self, _id): def get_event(self, _id):
with self.browser: return self.browser.get_event(_id)
return self.browser.get_event(_id)
def fill_obj(self, event, fields): def fill_obj(self, event, fields):
with self.browser: return self.browser.get_event(event.id, event)
return self.browser.get_event(event.id, event)
OBJECTS = {BiplanCalendarEvent: fill_obj} OBJECTS = {BiplanCalendarEvent: fill_obj}

View file

@ -18,38 +18,32 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser from weboob.tools.browser2 import PagesBrowser, URL
from weboob.tools.browser.decorators import id2url
from .pages import ProgramPage, EventPage from .pages import ProgramPage, EventPage
from .calendar import BiplanCalendarEvent
__all__ = ['BiplanBrowser'] __all__ = ['BiplanBrowser']
class BiplanBrowser(BaseBrowser): class BiplanBrowser(PagesBrowser):
PROTOCOL = 'http' BASEURL = 'http://www.lebiplan.org'
DOMAIN = 'www.lebiplan.org'
ENCODING = None
PAGES = { program_page = URL('/fr/biplan-prog-(?P<_category>.*)', ProgramPage)
#'%s://%s/fr/biplan-prog-concert.php' % (PROTOCOL, DOMAIN): ProgramPage, event_page = URL('/(?P<_id>.*).html', EventPage)
'%s://%s/fr/biplan-prog(.*?).php' % (PROTOCOL, DOMAIN): ProgramPage,
'%s://%s/(.*?)' % (PROTOCOL, DOMAIN): EventPage,
}
def list_events_concert(self, date_from, date_to=None, city=None, categories=None): def list_events_concert(self, date_from, date_to=None, city=None, categories=None):
self.location('%s://%s/fr/biplan-prog-concert.php' % (self.PROTOCOL, self.DOMAIN)) return self.program_page.go(_category='concert').list_events(date_from=date_from,
assert self.is_on_page(ProgramPage) date_to=date_to,
return self.page.list_events(date_from, date_to, city, categories, is_concert=True) city=city,
categories=categories,
is_concert=True)
def list_events_theatre(self, date_from, date_to=None, city=None, categories=None): def list_events_theatre(self, date_from, date_to=None, city=None, categories=None):
self.location('%s://%s/fr/biplan-prog-theatre.php' % (self.PROTOCOL, self.DOMAIN)) return self.program_page.go(_category='theatre').list_events(date_from=date_from,
assert self.is_on_page(ProgramPage) date_to=date_to,
return self.page.list_events(date_from, date_to, city, categories, is_concert=False) city=city,
categories=categories,
is_Concert=False)
@id2url(BiplanCalendarEvent.id2url) def get_event(self, _id, event=None):
def get_event(self, url, event=None): return self.event_page.go(_id=_id).get_event(obj=event)
self.location(url)
assert self.is_on_page(EventPage)
return self.page.get_event(url, event)

View file

@ -22,26 +22,22 @@ from weboob.capabilities.calendar import BaseCalendarEvent, TRANSP, STATUS, CATE
class BiplanCalendarEvent(BaseCalendarEvent): class BiplanCalendarEvent(BaseCalendarEvent):
def __init__(self, _id): def __init__(self):
BaseCalendarEvent.__init__(self, _id) BaseCalendarEvent.__init__(self)
self.city = u'LILLE' self.city = u'LILLE'
self.location = u'19, rue Colbert' self.location = u'19, rue Colbert'
self.sequence = 1 self.sequence = 1
self.transp = TRANSP.TRANSPARENT self.transp = TRANSP.TRANSPARENT
self.status = STATUS.CONFIRMED self.status = STATUS.CONFIRMED
@classmethod
def id2url(cls, _id):
return 'http://www.lebiplan.org/%s.html' % _id
class BiplanCalendarEventConcert(BiplanCalendarEvent): class BiplanCalendarEventConcert(BiplanCalendarEvent):
def __init__(self, _id): def __init__(self):
BiplanCalendarEvent.__init__(self, _id) BiplanCalendarEvent.__init__(self)
self.category = CATEGORIES.CONCERT self.category = CATEGORIES.CONCERT
class BiplanCalendarEventTheatre(BiplanCalendarEvent): class BiplanCalendarEventTheatre(BiplanCalendarEvent):
def __init__(self, _id): def __init__(self, _id):
BiplanCalendarEvent.__init__(self, _id) BiplanCalendarEvent.__init__(self)
self.category = CATEGORIES.THEATRE self.category = CATEGORIES.THEATRE

View file

@ -21,164 +21,129 @@ import re
from datetime import datetime, time from datetime import datetime, time
import weboob.tools.date as date_util import weboob.tools.date as date_util
from weboob.tools.browser import BasePage
from .calendar import BiplanCalendarEventConcert, BiplanCalendarEventTheatre from .calendar import BiplanCalendarEventConcert, BiplanCalendarEventTheatre
from weboob.tools.browser2.page import HTMLPage, method, ItemElement, SkipItem, ListElement
from weboob.tools.browser2.filters import Filter, Link, CleanText, Env, Regexp, Combine, CleanHTML
__all__ = ['ProgramPage', 'EventPage'] __all__ = ['ProgramPage', 'EventPage']
def parse_b(b): class BiplanPrice(Filter):
to_return = [] def filter(self, el):
for item in b.replace('\n', '\t').replace(' ', '\t').split('\t'): index = 1 if len(el) > 1 else 0
if not (item is None or item == ''): content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index]))
to_return.append(u'%s' % item) a_price = content.split(' - ')[-1]
return to_return parsed_price = re.findall(r"\d*\,\d+|\d+", " ".join(a_price))
class ProgramPage(BasePage):
def list_events(self, date_from, date_to=None, city=None, categories=None, is_concert=True):
divs = self.document.getroot().xpath("//div[@class='ligne']")
for i in range(2, len(divs)):
event = self.create_event(divs[i], date_from, date_to, city, categories, is_concert=is_concert)
if event:
yield event
def create_event(self, div, date_from, date_to, city=None, categories=None, is_concert=True):
re_id = re.compile('/(.*?).html', re.DOTALL)
a_id = self.parser.select(div, "div/a", 1, method='xpath')
b = self.parser.select(div, "div/div/b", 2, method='xpath')
_id = re_id.search(a_id.attrib['href']).group(1)
date = self.parse_date(b[0].text_content())
re_time = re.compile('(\d{1,2}h[\d{1,2}]?)', re.DOTALL)
start_end_date = re_time.findall(b[1].text_content().split('-')[0].strip())
if start_end_date:
time_price = parse_b(b[1].text_content())
start_time = self.parse_start_time(start_end_date[0])
start_date = datetime.combine(date, start_time)
if len(start_end_date) > 1:
end_time = self.parse_start_time(start_end_date[1])
end_date = datetime.combine(start_date, end_time)
else:
end_date = datetime.combine(start_date, time.max)
if _id and self.is_event_in_valid_period(start_date, date_from, date_to):
if is_concert:
event = BiplanCalendarEventConcert(_id)
else:
event = BiplanCalendarEventTheatre(_id)
event.start_date = start_date
event.end_date = end_date
price = time_price[time_price.index('-') + 1:]
parsed_price = re.findall(r"\d*\,\d+|\d+", " ".join(price))
if parsed_price and len(parsed_price) > 0:
event.price = float(parsed_price[0].replace(',', '.'))
else:
event.price = float(0)
event.summary = u'%s' % self.parser.select(div, "div/div/div/a/strong", 1, method='xpath').text
if self.is_valid_event(event, city, categories):
return event
def is_valid_event(self, event, city, categories):
if city and city != '' and city.upper() != event.city.upper():
return False
if categories and len(categories) > 0 and event.category not in categories:
return False
return True
def is_event_in_valid_period(self, event_date, date_from, date_to):
if event_date >= date_from:
if not date_to:
return True
else:
if event_date <= date_to:
return True
return False
def parse_start_time(self, _time):
start_time = _time.split('h')
time_hour = start_time[0]
time_minutes = 0
if len(start_time) > 1 and start_time[1]:
time_minutes = start_time[1]
return time(int(time_hour), int(time_minutes))
def parse_date(self, b):
content = parse_b(b)
a_date = content[1:content.index('-')]
for fr, en in date_util.DATE_TRANSLATE_FR:
a_date[1] = fr.sub(en, a_date[1])
if (datetime.now().month > datetime.strptime(a_date[1], "%B").month):
a_date.append(u'%i' % (datetime.now().year + 1))
else:
a_date.append(u'%i' % (datetime.now().year))
return date_util.parse_french_date(" ".join(a_date))
class EventPage(BasePage):
def get_event(self, url, event=None):
div = self.document.getroot().xpath("//div[@id='popup']")[0]
if not event:
re_id = re.compile('http://www.lebiplan.org/(.*?).html', re.DOTALL)
_id = re_id.search(url).group(1)
if div.attrib['class'] == 'theatre-popup':
event = BiplanCalendarEventTheatre(_id)
else:
event = BiplanCalendarEventConcert(_id)
b = self.parser.select(div, "div/b", 1, method='xpath').text_content()
splitted_b = b.split('-')
parsed_price = re.findall(r"\d*\,\d+|\d+", " ".join(parse_b(splitted_b[-1])))
if parsed_price and len(parsed_price) > 0: if parsed_price and len(parsed_price) > 0:
event.price = float(parsed_price[0].replace(',', '.')) return float(parsed_price[0].replace(',', '.'))
else:
event.price = float(0)
_date = date_util.parse_french_date(" ".join(parse_b(splitted_b[0]))) return float(0)
re_time = re.compile('(\d{1,2}h[\d{1,2}]?)', re.DOTALL)
start_end_date = re_time.findall(splitted_b[2])
if start_end_date: class BiplanDate(Filter):
start_time = self.parse_start_time(start_end_date[0]) def filter(self, el):
content = CleanText.clean(CleanText(CleanHTML('.'), ['*'])(el[0]))
a_date = content[0:content.index(' - ')]
if len(start_end_date) > 1: for fr, en in date_util.DATE_TRANSLATE_FR:
end_time = self.parse_start_time(start_end_date[1]) a_date = fr.sub(en, a_date)
try:
_month = datetime.strptime(a_date, "%A %d %B").month
if (datetime.now().month > _month):
a_date += u' %i' % (datetime.now().year + 1)
else: else:
end_time = time.max a_date += u' %i' % (datetime.now().year)
except ValueError:
pass
event.start_date = datetime.combine(_date, start_time) return datetime.strptime(a_date, "%A %d %B %Y")
event.end_date = datetime.combine(_date, end_time)
event.url = url
event.summary = u'%s' % self.parser.select(div, "div/div/span", 1, method='xpath').text_content() class StartTime(Filter):
event.description = u'%s' % self.parser.select(div, def filter(self, el):
"div/div[@class='presentation-popup']", index = 1 if len(el) > 1 else 0
1, content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index]))
method='xpath').text_content().strip() a_time = content.split(' - ')[-2]
return event regexp = re.compile(ur'(?P<hh>\d+)h?(?P<mm>\d+)')
m = regexp.search(a_time)
return time(int(m.groupdict()['hh'] or 0), int(m.groupdict()['mm'] or 0))
def parse_start_time(self, _time):
start_time = _time.split('h') class EndTime(Filter):
time_hour = start_time[0] def filter(self, el):
time_minutes = 0 return time.max
if len(start_time) > 1 and start_time[1]:
time_minutes = start_time[1]
return time(int(time_hour), int(time_minutes)) class ProgramPage(HTMLPage):
@method
class list_events(ListElement):
item_xpath = '//div[@class="ligne"]'
class item(ItemElement):
klass = BiplanCalendarEventConcert if Env('is_concert') else BiplanCalendarEventTheatre
def condition(self):
if(self.el.xpath('./div')):
return True
return False
def validate(self, obj):
return (self.is_valid_event(obj, self.env['city'], self.env['categories']) and
self.is_event_in_valid_period(obj.start_date, self.env['date_from'], self.env['date_to']))
def is_valid_event(self, event, city, categories):
if city and city != '' and city.upper() != event.city.upper():
return False
if categories and len(categories) > 0 and event.category not in categories:
return False
return True
def is_event_in_valid_period(self, event_date, date_from, date_to):
if event_date >= date_from:
if not date_to:
return True
else:
if event_date <= date_to:
return True
return False
obj_id = Regexp(Link('./div/a'), '/(.*?).html')
obj_start_date = Combine(BiplanDate('div/div/b'), StartTime('div/div/b'))
obj_end_date = Combine(BiplanDate('div/div/b'), EndTime('.'))
obj_price = BiplanPrice('div/div/b')
obj_summary = CleanText("div/div/div/a/strong")
class EventPage(HTMLPage):
@method
class get_event(ItemElement):
klass = BiplanCalendarEventConcert if Env('is_concert') else BiplanCalendarEventTheatre
def parse(self, el):
_div = "//div/div/div[@id='popup']"
div = el.xpath("%s" % _div)[0]
if self.obj.id:
event = self.obj
event.url = self.page.url
event.description = CleanHTML("%s/div/div[@class='presentation-popup']" % _div)(self)
raise SkipItem()
self.env['is_concert'] = not div.attrib['class'] == 'theatre-popup'
self.env['url'] = self.page.url
obj_id = Env('_id')
base = "//div[@id='popup']"
obj_price = BiplanPrice("%s/div/b" % base)
obj_start_date = Combine(BiplanDate("%s/div/b" % base), StartTime("%s/div/b" % base))
obj_end_date = Combine(BiplanDate("%s/div/b" % base), EndTime("."))
obj_url = Env('url')
obj_summary = CleanText('%s/div/div/span' % base)
obj_description = CleanHTML('%s/div/div[@class="presentation-popup"]' % base)