From a7e78c1dc9f57e0ea6931456e1d90da3cb114112 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Tue, 18 Feb 2014 19:48:04 +0100 Subject: [PATCH] [senscritique] Add new calendar module senscritique. Thr aim is to display as an event films scheduled on TV --- modules/senscritique/__init__.py | 24 +++++ modules/senscritique/backend.py | 127 ++++++++++++++++++++++++ modules/senscritique/browser.py | 132 +++++++++++++++++++++++++ modules/senscritique/calendar.py | 34 +++++++ modules/senscritique/pages.py | 163 +++++++++++++++++++++++++++++++ modules/senscritique/test.py | 31 ++++++ 6 files changed, 511 insertions(+) create mode 100644 modules/senscritique/__init__.py create mode 100644 modules/senscritique/backend.py create mode 100644 modules/senscritique/browser.py create mode 100644 modules/senscritique/calendar.py create mode 100644 modules/senscritique/pages.py create mode 100644 modules/senscritique/test.py diff --git a/modules/senscritique/__init__.py b/modules/senscritique/__init__.py new file mode 100644 index 00000000..9dc56a19 --- /dev/null +++ b/modules/senscritique/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import SenscritiqueBackend + + +__all__ = ['SenscritiqueBackend'] diff --git a/modules/senscritique/backend.py b/modules/senscritique/backend.py new file mode 100644 index 00000000..046a1c35 --- /dev/null +++ b/modules/senscritique/backend.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.backend import BaseBackend, BackendConfig +from weboob.tools.ordereddict import OrderedDict +from weboob.tools.value import Value, ValueBool +from weboob.capabilities.calendar import ICapCalendarEvent, CATEGORIES + +from .browser import SenscritiqueBrowser +from .calendar import SensCritiquenCalendarEvent + +__all__ = ['SenscritiqueBackend'] + + +def cmp_start_date(p1, p2): + if p1.start_date == p2.start_date: + return 0 + if p1.start_date > p2.start_date: + return 1 + return -1 + + +class SenscritiqueBackend(BaseBackend, ICapCalendarEvent): + NAME = 'senscritique' + DESCRIPTION = u'senscritique website' + MAINTAINER = u'Bezleputh' + EMAIL = 'carton_ben@yahoo.fr' + LICENSE = 'AGPLv3+' + VERSION = '0.i' + ASSOCIATED_CATEGORIES = [CATEGORIES.TELE] + BROWSER = SenscritiqueBrowser + + tv_settings_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ + '000000': u'-- Indifférent --', + '9': u'TNT', + '1': u'Canalsat', + '2': u'Numericable', + '10': u'Orange', + '11': u'Free', + '12': u'SFR', + '15': u'Darty box via ADSL', + '16': u'Bouygues', + }.iteritems())]) + + general = { + 9: [46, 2, 48, 56], + 1: [49, 46, 21, 2, 36, 59, 54, 48, 56, 50, 32, 1, 51, 24, 38, 34, 37, 6, 25, 11, 53, 26, 47], + 2: [49, 46, 21, 2, 36, 59, 54, 48, 56, 50, 32, 1, 51, 24, 38, 34, 37, 6, 25, 11, 53, 26, 47], + 10: [46, 46, 2, 36, 59, 54, 32, 24, 34, 37, 53, 47], + 11: [46, 46, 2, 36, 59, 54, 32, 24, 34, 37, 53, 47], + 12: [49, 46, 2, 36, 59, 54, 32, 24, 34, 37, 53, 47], + 15: [49, 46, 2, 36, 32, 24, 34, 37, 53, 47], + 16: [49, 46, 2, 36, 59, 54, 32, 24, 34, 37, 53, 47], + } + + cinema = { + 9: [10, 7], + 1: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 4055, 44, 3, 45, 42, 41, 43, 13, 12], + 2: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 4055, 44, 3, 45, 42, 41, 43, 13, 12], + 10: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 44, 3, 45, 42, 41, 43, 13, 12], + 11: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 4055, 44, 3, 45, 42, 41, 43, 13, 12], + 12: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 44, 3, 45, 42, 41, 43, 13, 12], + 15: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 44, 3, 45, 42, 41, 43, 13, 12], + 16: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 4055, 44, 3, 45, 42, 41, 43, 13, 12], + } + + CONFIG = BackendConfig(Value('tv_settings', label=u'T.V. package', choices=tv_settings_choices), + ValueBool('general', label='General', default=True), + ValueBool('cinema', label='Cinema', default=False), + ) + + def get_package_and_channels(self): + package = int(self.config['tv_settings'].get()) + channels = [] + if package: + if self.config['general'].get(): + channels += self.general[package] + + if self.config['cinema'].get(): + channels += self.cinema[package] + + return package, channels + + def search_events(self, query): + if self.has_matching_categories(query): + with self.browser: + package, channels = self.get_package_and_channels() + return self.browser.list_events(query.start_date, + query.end_date, + package, + channels) + + def list_events(self, date_from, date_to=None): + with self.browser: + items = [] + package, channels = self.get_package_and_channels() + for item in self.browser.list_events(date_from, date_to, package, channels): + items.append(item) + + items.sort(cmp=cmp_start_date) + return items + + def get_event(self, _id): + with self.browser: + return self.browser.get_event(_id) + + def fill_obj(self, event, fields): + with self.browser: + return self.browser.get_event(event.id, event) + + OBJECTS = {SensCritiquenCalendarEvent: fill_obj} diff --git a/modules/senscritique/browser.py b/modules/senscritique/browser.py new file mode 100644 index 00000000..1cd416aa --- /dev/null +++ b/modules/senscritique/browser.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser import BaseBrowser +from weboob.tools.json import json as simplejson + +from .calendar import SensCritiquenCalendarEvent +from .pages import ProgramPage, EventPage + +import urllib +import urllib2 + +__all__ = ['SenscritiqueBrowser'] + + +class SenscritiqueBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'www.senscritique.com' + ENCODING = 'utf-8' + + PAGES = { + '%s://%s/sc/tv_guides' % (PROTOCOL, DOMAIN): ProgramPage, + '%s://%s/film/(.*?)' % (PROTOCOL, DOMAIN): EventPage, + } + + LIMIT = 25 + LIMIT_NB_PAGES = 10 + + HEADER_AJAX = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows " + "NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8" + " GTB7.1 (.NET CLR 3.5.30729)", + "Accept": "gzip, deflate", + "X-Requested-With": "XMLHttpRequest", + "Referer": "http://www.senscritique.com/sc/tv_guides", + } + + HEADER_RESUME = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows " + "NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8" + " GTB7.1 (.NET CLR 3.5.30729)", + "Accept": "application/json, text/javascript, */*; q=0.01", + "X-Requested-With": "XMLHttpRequest", + } + + DATA = {'order': 'chrono', + 'without_product_done': '0', + 'period': 'cette-semaine', + 'limit': '%d' % LIMIT, + } + + URL = "http://www.senscritique.com/sc/tv_guides/gridContent.ajax" + + def home(self): + self.location("http://www.senscritique.com/sc/tv_guides") + assert self.is_on_page(ProgramPage) + + def list_events(self, date_from, date_to=None, package=None, channels=None): + self.home() + page = 1 + + if package and channels: + self.set_package_settings(package, channels) + + while True: + self.DATA['page'] = '%d' % page + self.page.document = self.get_ajax_content() + nb_events = self.page.count_events() + events = self.page.list_events(date_from, date_to) + + for event in events: + yield event + + if nb_events < self.LIMIT or page >= self.LIMIT_NB_PAGES: + break + + page += 1 + + def set_package_settings(self, package, channels): + url = 'http://www.senscritique.com/sc/tv_guides/saveSettings.json' + params = "network=%s" % package + params += ''.join(["&channels%%5B%%5D=%d" % (channel) for channel in channels]) + self.openurl(url, params) + + def get_ajax_content(self): + req = urllib2.Request(self.URL, urllib.urlencode(self.DATA), headers=self.HEADER_AJAX) + response = self.open(req) + return self.get_document(response) + + def get_event(self, _id, event=None): + if not event: + self.home() + page = 1 + + while True: + self.DATA['page'] = '%d' % page + self.page.document = self.get_ajax_content() + event = self.page.find_event(_id) + nb_events = self.page.count_events() + if event or nb_events < self.LIMIT or page >= self.LIMIT_NB_PAGES: + break + + page += 1 + + if event: + url = SensCritiquenCalendarEvent.id2url(_id) + self.location(url) + assert self.is_on_page(EventPage) + return self.page.get_event(url, event) + + def get_resume(self, url, _id): + self.HEADER_RESUME['Referer'] = url + req = urllib2.Request('http://www.senscritique.com/sc/products/storyline/%s.json' % _id, + headers=self.HEADER_RESUME) + response = self.open(req) + result = simplejson.loads(response.read(), self.ENCODING) + if result['json']['success']: + return result['json']['data'] diff --git a/modules/senscritique/calendar.py b/modules/senscritique/calendar.py new file mode 100644 index 00000000..de0e8269 --- /dev/null +++ b/modules/senscritique/calendar.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.calendar import BaseCalendarEvent, TRANSP, STATUS, CATEGORIES + + +class SensCritiquenCalendarEvent(BaseCalendarEvent): + + def __init__(self, _id): + BaseCalendarEvent.__init__(self, _id) + self.sequence = 1 + self.transp = TRANSP.TRANSPARENT + self.status = STATUS.CONFIRMED + self.category = CATEGORIES.TELE + + @classmethod + def id2url(cls, _id): + return 'http://www.senscritique.com%s' % _id diff --git a/modules/senscritique/pages.py b/modules/senscritique/pages.py new file mode 100644 index 00000000..21c44570 --- /dev/null +++ b/modules/senscritique/pages.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.misc import html2text +from weboob.tools.browser import BasePage +from .calendar import SensCritiquenCalendarEvent + +from datetime import date, datetime, time + + +__all__ = ['ProgramPage'] + + +class ProgramPage(BasePage): + + CHANNELS_PARAM = { + 'einst-3 elgr-data-logo': u'Action', + 'einst-8 elgr-data-logo': u'Canal+ Décalé', + 'einst-9 elgr-data-logo': u'Canal+ Family', + 'einst-12 elgr-data-logo': u'Ciné FX', + 'einst-13 elgr-data-logo': u'Polar', + 'einst-14 elgr-data-logo': u'Ciné+ Classic', + 'einst-15 elgr-data-logo': u'Ciné+ Club', + 'einst-16 elgr-data-logo': u'Ciné+ Emotion', + 'einst-17 elgr-data-logo': u'Ciné+ Famiz', + 'einst-18 elgr-data-logo': u'Ciné+ Frisson', + 'einst-19 elgr-data-logo': u'Ciné+ Premier', + 'einst-21 elgr-data-logo': u'Comédie+', + 'einst-24 elgr-data-logo': u'Disney Channel', + 'einst-25 elgr-data-logo': u'Disney Cinemagic', + 'einst-34 elgr-data-logo': u'Jimmy', + 'einst-37 elgr-data-logo': u'MCM', + 'einst-41 elgr-data-logo': u'OCS Géants', + 'einst-42 elgr-data-logo': u'OCS Choc', + 'einst-44 elgr-data-logo': u'OCS Max', + 'einst-45 elgr-data-logo': u'OCS City', + 'einst-49 elgr-data-logo': u'RTL 9', + 'einst-52 elgr-data-logo': u'TCM Cinéma', + 'einst-54 elgr-data-logo': u'Teva', + 'einst-59 elgr-data-logo': u'TV Breizh', + 'einst-4055 elgr-data-logo': u'Paramount Channel', + } + + def find_event(self, _id): + a = self.document.getroot().xpath("//a[@href='%s']" % _id, method='xpath') + if a: + event_date = self.get_event_date(a[0]) + return self.create_event(a[0], event_date) + + def count_events(self): + return len(self.document.getroot().xpath("//a")) + + def list_events(self, date_from, date_to=None): + for a in self.document.getroot().xpath("//a"): + event_date = self.get_event_date(a) + if self.is_valid_event(date_from, date_to, event_date): + yield self.create_event(a, event_date) + + def create_event(self, a, event_date): + event = SensCritiquenCalendarEvent(a.attrib['href']) + + title = self.parser.select(a, "span[@class='elgr-product-title']", 1, method='xpath').text + channel_info = self.parser.select(a, "div/div[@class='elgr-data-channel']", method='xpath') + if channel_info: + channel = channel_info[0].text.strip() + else: + channel_info = self.parser.select(a, + 'div[@class="elgr-product-data"]/span', + 1, + method='xpath').attrib['class'] + channel = self.CHANNELS_PARAM.get(channel_info) + event.summary = u'%s - %s' % (title, channel) + + event.start_date = event_date + event.end_date = datetime.combine(event_date.date(), time.max) + return event + + def is_valid_event(self, date_from, date_to, event_date): + if event_date >= date_from: + if not date_to: + return True + else: + if event_date < date_to: + return True + return False + + def get_event_date(self, a): + div_date = self.parser.select(a, "div/div[@class='elgr-data-diffusion']", 1, method='xpath') + _date = self.parse_start_date(div_date) + + str_time = self.parser.select(div_date, "time", 1, method='xpath').attrib['datetime'][:-6] + _time = datetime.strptime(str_time, '%H:%M:%S') + + return datetime.combine(_date, _time.time()) + + def parse_start_date(self, div_date): + spans_date = self.parser.select(div_date, "span[@class='d-date']", method='xpath') + + _date = date.today() + if len(spans_date) == 2: + day_number = int(spans_date[1].text) + + month = _date.month + year = _date.year + if day_number < _date.day: + month = _date.month + 1 + if _date.month == 12: + year = _date.year + 1 + + _date = date(day=day_number, month=month, year=year) + + return _date + + +class EventPage(BasePage): + def get_event(self, url, event): + + event.url = url + + header = self.document.getroot().xpath("//div[@class='pvi-hero-product']")[0] + + title = self.parser.select(header, "div[@class='d-rubric-inner']/h1", 1, method='xpath').text.strip() + year = self.parser.select(header, "div[@class='d-rubric-inner']/small", 1, method='xpath').text.strip() + + _infos = self.parser.select(header, "ul[@class='pvi-product-specs']/li", method='xpath') + infos = '' + for li in _infos: + infos += u'- %s\n' % self.parser.tocleanstring(li) + + section = self.document.getroot().xpath("//section[@class='pvi-productDetails']")[0] + _infos = self.parser.select(section, "ul/li", method='xpath') + for li in _infos: + infos += u'- %s\n' % self.parser.tocleanstring(li) + + _resume = self.parser.select(section, "p[@data-rel='full-resume']", method='xpath') + if not _resume: + _resume = self.parser.select(section, "p[@data-rel='small-resume']", method='xpath') + if _resume: + resume = html2text(self.parser.tostring(_resume[0])) + else: + resume = "" + else: + _id = self.parser.select(_resume[0], 'button', 1, method='xpath').attrib['data-sc-product-id'] + resume = self.browser.get_resume(url, _id) + + event.description = u'%s %s\n\n%s\n\n%s' % (title, year, infos, resume) + return event diff --git a/modules/senscritique/test.py b/modules/senscritique/test.py new file mode 100644 index 00000000..5dfabe59 --- /dev/null +++ b/modules/senscritique/test.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Bezleputh +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +from datetime import datetime + +class SenscritiqueTest(BackendTest): + BACKEND = 'senscritique' + + def test_senscritique(self): + l = list(self.backend.list_events(datetime.now())) + assert len(l) + event = self.backend.get_event(l[0].id) + self.assertTrue(event.url, 'URL for event "%s" not found: %s' % (event.id, event.url))