[senscritique] Add new calendar module senscritique. Thr aim is to display as an event films scheduled on TV

This commit is contained in:
Bezleputh 2014-02-18 19:48:04 +01:00
commit a7e78c1dc9
6 changed files with 511 additions and 0 deletions

View file

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import SenscritiqueBackend
__all__ = ['SenscritiqueBackend']

View file

@ -0,0 +1,127 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.value import Value, ValueBool
from weboob.capabilities.calendar import ICapCalendarEvent, CATEGORIES
from .browser import SenscritiqueBrowser
from .calendar import SensCritiquenCalendarEvent
__all__ = ['SenscritiqueBackend']
def cmp_start_date(p1, p2):
if p1.start_date == p2.start_date:
return 0
if p1.start_date > p2.start_date:
return 1
return -1
class SenscritiqueBackend(BaseBackend, ICapCalendarEvent):
NAME = 'senscritique'
DESCRIPTION = u'senscritique website'
MAINTAINER = u'Bezleputh'
EMAIL = 'carton_ben@yahoo.fr'
LICENSE = 'AGPLv3+'
VERSION = '0.i'
ASSOCIATED_CATEGORIES = [CATEGORIES.TELE]
BROWSER = SenscritiqueBrowser
tv_settings_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({
'000000': u'-- Indifférent --',
'9': u'TNT',
'1': u'Canalsat',
'2': u'Numericable',
'10': u'Orange',
'11': u'Free',
'12': u'SFR',
'15': u'Darty box via ADSL',
'16': u'Bouygues',
}.iteritems())])
general = {
9: [46, 2, 48, 56],
1: [49, 46, 21, 2, 36, 59, 54, 48, 56, 50, 32, 1, 51, 24, 38, 34, 37, 6, 25, 11, 53, 26, 47],
2: [49, 46, 21, 2, 36, 59, 54, 48, 56, 50, 32, 1, 51, 24, 38, 34, 37, 6, 25, 11, 53, 26, 47],
10: [46, 46, 2, 36, 59, 54, 32, 24, 34, 37, 53, 47],
11: [46, 46, 2, 36, 59, 54, 32, 24, 34, 37, 53, 47],
12: [49, 46, 2, 36, 59, 54, 32, 24, 34, 37, 53, 47],
15: [49, 46, 2, 36, 32, 24, 34, 37, 53, 47],
16: [49, 46, 2, 36, 59, 54, 32, 24, 34, 37, 53, 47],
}
cinema = {
9: [10, 7],
1: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 4055, 44, 3, 45, 42, 41, 43, 13, 12],
2: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 4055, 44, 3, 45, 42, 41, 43, 13, 12],
10: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 44, 3, 45, 42, 41, 43, 13, 12],
11: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 4055, 44, 3, 45, 42, 41, 43, 13, 12],
12: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 44, 3, 45, 42, 41, 43, 13, 12],
15: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 44, 3, 45, 42, 41, 43, 13, 12],
16: [10, 7, 9, 8, 52, 19, 18, 17, 16, 20, 15, 14, 4055, 44, 3, 45, 42, 41, 43, 13, 12],
}
CONFIG = BackendConfig(Value('tv_settings', label=u'T.V. package', choices=tv_settings_choices),
ValueBool('general', label='General', default=True),
ValueBool('cinema', label='Cinema', default=False),
)
def get_package_and_channels(self):
package = int(self.config['tv_settings'].get())
channels = []
if package:
if self.config['general'].get():
channels += self.general[package]
if self.config['cinema'].get():
channels += self.cinema[package]
return package, channels
def search_events(self, query):
if self.has_matching_categories(query):
with self.browser:
package, channels = self.get_package_and_channels()
return self.browser.list_events(query.start_date,
query.end_date,
package,
channels)
def list_events(self, date_from, date_to=None):
with self.browser:
items = []
package, channels = self.get_package_and_channels()
for item in self.browser.list_events(date_from, date_to, package, channels):
items.append(item)
items.sort(cmp=cmp_start_date)
return items
def get_event(self, _id):
with self.browser:
return self.browser.get_event(_id)
def fill_obj(self, event, fields):
with self.browser:
return self.browser.get_event(event.id, event)
OBJECTS = {SensCritiquenCalendarEvent: fill_obj}

View file

@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser
from weboob.tools.json import json as simplejson
from .calendar import SensCritiquenCalendarEvent
from .pages import ProgramPage, EventPage
import urllib
import urllib2
__all__ = ['SenscritiqueBrowser']
class SenscritiqueBrowser(BaseBrowser):
PROTOCOL = 'http'
DOMAIN = 'www.senscritique.com'
ENCODING = 'utf-8'
PAGES = {
'%s://%s/sc/tv_guides' % (PROTOCOL, DOMAIN): ProgramPage,
'%s://%s/film/(.*?)' % (PROTOCOL, DOMAIN): EventPage,
}
LIMIT = 25
LIMIT_NB_PAGES = 10
HEADER_AJAX = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
"NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
" GTB7.1 (.NET CLR 3.5.30729)",
"Accept": "gzip, deflate",
"X-Requested-With": "XMLHttpRequest",
"Referer": "http://www.senscritique.com/sc/tv_guides",
}
HEADER_RESUME = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
"NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
" GTB7.1 (.NET CLR 3.5.30729)",
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
}
DATA = {'order': 'chrono',
'without_product_done': '0',
'period': 'cette-semaine',
'limit': '%d' % LIMIT,
}
URL = "http://www.senscritique.com/sc/tv_guides/gridContent.ajax"
def home(self):
self.location("http://www.senscritique.com/sc/tv_guides")
assert self.is_on_page(ProgramPage)
def list_events(self, date_from, date_to=None, package=None, channels=None):
self.home()
page = 1
if package and channels:
self.set_package_settings(package, channels)
while True:
self.DATA['page'] = '%d' % page
self.page.document = self.get_ajax_content()
nb_events = self.page.count_events()
events = self.page.list_events(date_from, date_to)
for event in events:
yield event
if nb_events < self.LIMIT or page >= self.LIMIT_NB_PAGES:
break
page += 1
def set_package_settings(self, package, channels):
url = 'http://www.senscritique.com/sc/tv_guides/saveSettings.json'
params = "network=%s" % package
params += ''.join(["&channels%%5B%%5D=%d" % (channel) for channel in channels])
self.openurl(url, params)
def get_ajax_content(self):
req = urllib2.Request(self.URL, urllib.urlencode(self.DATA), headers=self.HEADER_AJAX)
response = self.open(req)
return self.get_document(response)
def get_event(self, _id, event=None):
if not event:
self.home()
page = 1
while True:
self.DATA['page'] = '%d' % page
self.page.document = self.get_ajax_content()
event = self.page.find_event(_id)
nb_events = self.page.count_events()
if event or nb_events < self.LIMIT or page >= self.LIMIT_NB_PAGES:
break
page += 1
if event:
url = SensCritiquenCalendarEvent.id2url(_id)
self.location(url)
assert self.is_on_page(EventPage)
return self.page.get_event(url, event)
def get_resume(self, url, _id):
self.HEADER_RESUME['Referer'] = url
req = urllib2.Request('http://www.senscritique.com/sc/products/storyline/%s.json' % _id,
headers=self.HEADER_RESUME)
response = self.open(req)
result = simplejson.loads(response.read(), self.ENCODING)
if result['json']['success']:
return result['json']['data']

View file

@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.calendar import BaseCalendarEvent, TRANSP, STATUS, CATEGORIES
class SensCritiquenCalendarEvent(BaseCalendarEvent):
def __init__(self, _id):
BaseCalendarEvent.__init__(self, _id)
self.sequence = 1
self.transp = TRANSP.TRANSPARENT
self.status = STATUS.CONFIRMED
self.category = CATEGORIES.TELE
@classmethod
def id2url(cls, _id):
return 'http://www.senscritique.com%s' % _id

View file

@ -0,0 +1,163 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.misc import html2text
from weboob.tools.browser import BasePage
from .calendar import SensCritiquenCalendarEvent
from datetime import date, datetime, time
__all__ = ['ProgramPage']
class ProgramPage(BasePage):
CHANNELS_PARAM = {
'einst-3 elgr-data-logo': u'Action',
'einst-8 elgr-data-logo': u'Canal+ Décalé',
'einst-9 elgr-data-logo': u'Canal+ Family',
'einst-12 elgr-data-logo': u'Ciné FX',
'einst-13 elgr-data-logo': u'Polar',
'einst-14 elgr-data-logo': u'Ciné+ Classic',
'einst-15 elgr-data-logo': u'Ciné+ Club',
'einst-16 elgr-data-logo': u'Ciné+ Emotion',
'einst-17 elgr-data-logo': u'Ciné+ Famiz',
'einst-18 elgr-data-logo': u'Ciné+ Frisson',
'einst-19 elgr-data-logo': u'Ciné+ Premier',
'einst-21 elgr-data-logo': u'Comédie+',
'einst-24 elgr-data-logo': u'Disney Channel',
'einst-25 elgr-data-logo': u'Disney Cinemagic',
'einst-34 elgr-data-logo': u'Jimmy',
'einst-37 elgr-data-logo': u'MCM',
'einst-41 elgr-data-logo': u'OCS Géants',
'einst-42 elgr-data-logo': u'OCS Choc',
'einst-44 elgr-data-logo': u'OCS Max',
'einst-45 elgr-data-logo': u'OCS City',
'einst-49 elgr-data-logo': u'RTL 9',
'einst-52 elgr-data-logo': u'TCM Cinéma',
'einst-54 elgr-data-logo': u'Teva',
'einst-59 elgr-data-logo': u'TV Breizh',
'einst-4055 elgr-data-logo': u'Paramount Channel',
}
def find_event(self, _id):
a = self.document.getroot().xpath("//a[@href='%s']" % _id, method='xpath')
if a:
event_date = self.get_event_date(a[0])
return self.create_event(a[0], event_date)
def count_events(self):
return len(self.document.getroot().xpath("//a"))
def list_events(self, date_from, date_to=None):
for a in self.document.getroot().xpath("//a"):
event_date = self.get_event_date(a)
if self.is_valid_event(date_from, date_to, event_date):
yield self.create_event(a, event_date)
def create_event(self, a, event_date):
event = SensCritiquenCalendarEvent(a.attrib['href'])
title = self.parser.select(a, "span[@class='elgr-product-title']", 1, method='xpath').text
channel_info = self.parser.select(a, "div/div[@class='elgr-data-channel']", method='xpath')
if channel_info:
channel = channel_info[0].text.strip()
else:
channel_info = self.parser.select(a,
'div[@class="elgr-product-data"]/span',
1,
method='xpath').attrib['class']
channel = self.CHANNELS_PARAM.get(channel_info)
event.summary = u'%s - %s' % (title, channel)
event.start_date = event_date
event.end_date = datetime.combine(event_date.date(), time.max)
return event
def is_valid_event(self, date_from, date_to, event_date):
if event_date >= date_from:
if not date_to:
return True
else:
if event_date < date_to:
return True
return False
def get_event_date(self, a):
div_date = self.parser.select(a, "div/div[@class='elgr-data-diffusion']", 1, method='xpath')
_date = self.parse_start_date(div_date)
str_time = self.parser.select(div_date, "time", 1, method='xpath').attrib['datetime'][:-6]
_time = datetime.strptime(str_time, '%H:%M:%S')
return datetime.combine(_date, _time.time())
def parse_start_date(self, div_date):
spans_date = self.parser.select(div_date, "span[@class='d-date']", method='xpath')
_date = date.today()
if len(spans_date) == 2:
day_number = int(spans_date[1].text)
month = _date.month
year = _date.year
if day_number < _date.day:
month = _date.month + 1
if _date.month == 12:
year = _date.year + 1
_date = date(day=day_number, month=month, year=year)
return _date
class EventPage(BasePage):
def get_event(self, url, event):
event.url = url
header = self.document.getroot().xpath("//div[@class='pvi-hero-product']")[0]
title = self.parser.select(header, "div[@class='d-rubric-inner']/h1", 1, method='xpath').text.strip()
year = self.parser.select(header, "div[@class='d-rubric-inner']/small", 1, method='xpath').text.strip()
_infos = self.parser.select(header, "ul[@class='pvi-product-specs']/li", method='xpath')
infos = ''
for li in _infos:
infos += u'- %s\n' % self.parser.tocleanstring(li)
section = self.document.getroot().xpath("//section[@class='pvi-productDetails']")[0]
_infos = self.parser.select(section, "ul/li", method='xpath')
for li in _infos:
infos += u'- %s\n' % self.parser.tocleanstring(li)
_resume = self.parser.select(section, "p[@data-rel='full-resume']", method='xpath')
if not _resume:
_resume = self.parser.select(section, "p[@data-rel='small-resume']", method='xpath')
if _resume:
resume = html2text(self.parser.tostring(_resume[0]))
else:
resume = ""
else:
_id = self.parser.select(_resume[0], 'button', 1, method='xpath').attrib['data-sc-product-id']
resume = self.browser.get_resume(url, _id)
event.description = u'%s %s\n\n%s\n\n%s' % (title, year, infos, resume)
return event

View file

@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Bezleputh
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
from datetime import datetime
class SenscritiqueTest(BackendTest):
BACKEND = 'senscritique'
def test_senscritique(self):
l = list(self.backend.list_events(datetime.now()))
assert len(l)
event = self.backend.get_event(l[0].id)
self.assertTrue(event.url, 'URL for event "%s" not found: %s' % (event.id, event.url))