[senscritique] site changed (we cannot choose channels packages anymore)

This commit is contained in:
Bezleputh 2015-09-02 12:00:58 +02:00
commit 7094d65d9c
3 changed files with 73 additions and 223 deletions

View file

@ -17,137 +17,52 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.base import UserError
from weboob.browser import PagesBrowser, URL
from .pages import FilmsPage, EventPage, JsonResumePage
from weboob.browser.profiles import Firefox
from .pages import AjaxPage, EventPage, JsonResumePage, SettingsPage
import re
from lxml.etree import XMLSyntaxError
__all__ = ['SenscritiqueBrowser']
class SenscritiqueBrowser(PagesBrowser):
def set_ajax_header(self):
self.session.headers.update({"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
"NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
" GTB7.1 (.NET CLR 3.5.30729)",
"Accept": "text/html, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"Referer": "http://www.senscritique.com/sc/tv_guides",
"Origin": "http://www.senscritique.com",
"Accept-Language": "fr-fr;q=0.667",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
})
BASEURL = 'http://www.senscritique.com'
films_page = URL('/everymovie/programme-tv/chrono', FilmsPage)
event_page = URL('/film/(?P<_id>.*)', EventPage)
json_page = URL('/sc/products/storyline/(?P<_id>.*).json', JsonResumePage)
def set_json_header(self):
self.session.headers.update({"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
"NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
" GTB7.1 (.NET CLR 3.5.30729)",
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
})
"NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
" GTB7.1 (.NET CLR 3.5.30729)",
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
})
ENCODING = 'utf-8'
CHANNELS = None
BASEURL = 'http://www.senscritique.com'
def list_events(self, date_from, date_to=None):
return self.films_page.go().iter_films(date_from=date_from, date_to=date_to)
program_page = URL('/sc/tv_guides')
ajax_page = URL('/sc/tv_guides/gridContent.ajax', AjaxPage)
event_page = URL('/film/(?P<_id>.*)', EventPage)
json_page = URL('/sc/products/storyline/(?P<_id>.*).json', JsonResumePage)
setting_page = URL('/sc/tv_guides/settings.ajax', SettingsPage)
LIMIT = 25 # number of results returned for each ajax call (defined in the website).
LIMIT_NB_PAGES = 10 # arbitrary limit to avoid infinitive loop that can occurs if total number of films is a multiple of LIMIT (in website it causes an infinite scroll)
DATA = {'order': 'chrono',
'without_product_done': '0',
'period': 'cette-semaine',
'limit': '%d' % LIMIT,
}
def get_channels(self):
if not self.CHANNELS:
self.CHANNELS = list(self.setting_page.go().get_channels())
return self.CHANNELS
def get_selected_channels(self, package, general=False, cinema=False):
for channel in self.get_channels():
if (package == 0 or u'%s' % package in channel._networks) and\
((general and channel._thema in ('1', '2')) or (cinema and channel._thema == '3')):
yield channel.id
def set_package_settings(self, package, channels):
url = 'http://www.senscritique.com/sc/tv_guides/saveSettings.json'
# do not use a dict because there are several same keys
params = "network=%s" % package
params += ''.join(["&channels%%5B%%5D=%s" % (channel) for channel in channels])
self.open(url, data=params)
def list_events(self, date_from, date_to=None, package=None, channels=None):
self.set_profile(Firefox())
self.program_page.go()
page_nb = 1
self.set_ajax_header()
if package and channels:
self.set_package_settings(package, channels)
while True:
try:
self.DATA['page'] = '%d' % page_nb
page = self.ajax_page.open(data=self.DATA)
nb_events = page.count_events()
events = page.list_events(date_from=date_from, date_to=date_to)
for event in events:
yield event
except XMLSyntaxError:
break
if nb_events < self.LIMIT or page_nb >= self.LIMIT_NB_PAGES:
break
page_nb += 1
def get_event(self, _id, event=None, package=None, channels=None):
def get_event(self, _id, event=None):
if not event:
self.set_profile(Firefox())
self.program_page.go()
page_nb = 1
try:
event = self.films_page.go().iter_films(_id=_id).next()
except StopIteration:
raise UserError('This event (%s) does not exists' % _id)
self.set_ajax_header()
if package and channels:
self.set_package_settings(package, channels)
film_id = _id.split('#')[0]
event = self.event_page.go(_id=film_id).get_event(obj=event)
while True:
self.DATA['page'] = '%d' % page_nb
page = self.ajax_page.open(data=self.DATA)
try:
event = page.list_events(_id=_id).next()
except StopIteration:
event = None
resume = self.get_resume(film_id)
if resume:
event.description += resume
nb_events = page.count_events()
if event or nb_events < self.LIMIT or page_nb >= self.LIMIT_NB_PAGES:
break
return event
page_nb += 1
if event:
_id = _id.split('#')[0]
self.set_profile(Firefox())
event = self.event_page.go(_id=_id).get_event(obj=event)
resume = self.get_resume(_id)
if resume:
event.description += self.get_resume(_id)
return event
def get_resume(self, _id):
def get_resume(self, film_id):
self.set_json_header()
re_id = re.compile('^/?.*/(.*)', re.DOTALL)
_id = re_id.search(_id).group(1)
return self.json_page.go(_id=_id).get_resume()
_id = film_id.split('/')[-1]
resume = self.json_page.go(_id=_id).get_resume()
self.set_profile(Firefox())
return resume

View file

@ -17,9 +17,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.backend import Module, BackendConfig
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.value import Value, ValueBool
from weboob.tools.backend import Module
from weboob.capabilities.calendar import CapCalendarEvent, CATEGORIES
from .browser import SenscritiqueBrowser
@ -38,49 +36,21 @@ class SenscritiqueModule(Module, CapCalendarEvent):
ASSOCIATED_CATEGORIES = [CATEGORIES.TELE]
BROWSER = SenscritiqueBrowser
tv_settings_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({
'000000': u'-- Indifférent --',
'9': u'TNT',
'1': u'Canalsat',
'2': u'Numericable',
'10': u'Orange',
'11': u'Free',
'12': u'SFR',
'15': u'Darty box via ADSL',
'16': u'Bouygues',
}.iteritems())])
CONFIG = BackendConfig(Value('tv_settings', label=u'T.V. package', choices=tv_settings_choices),
ValueBool('general', label='General', default=True),
ValueBool('cinema', label='Cinema', default=False),
)
def get_package_and_channels(self):
package = int(self.config['tv_settings'].get())
channels = self.browser.get_selected_channels(package, self.config['general'].get(),
self.config['cinema'].get())
return package, channels
def search_events(self, query):
if self.has_matching_categories(query):
package, channels = self.get_package_and_channels()
return self.browser.list_events(query.start_date,
query.end_date,
package,
channels)
return self.list_events(query.start_date,
query.end_date)
def list_events(self, date_from, date_to=None):
items = []
package, channels = self.get_package_and_channels()
for item in self.browser.list_events(date_from, date_to, package, channels):
for item in self.browser.list_events(date_from, date_to):
items.append(item)
items.sort(key=lambda o: o.start_date)
return items
def get_event(self, _id, event=None):
package, channels = self.get_package_and_channels()
return self.browser.get_event(_id, event, package=package, channels=channels)
return self.browser.get_event(_id, event)
def fill_obj(self, event, fields):
return self.get_event(event.id, event)

View file

@ -20,29 +20,31 @@
from .calendar import SensCritiquenCalendarEvent
from datetime import date, datetime, timedelta
from weboob.capabilities.base import empty, BaseObject
from weboob.capabilities.base import empty
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.filters.standard import Filter, CleanText, Regexp, Join, Format, BrowserURL, Env
from weboob.browser.filters.html import Link
class Channel(Filter):
class Description(Filter):
def filter(self, el):
header = "//div[@class='pvi-hero-product']"
section = "//section[@class='pvi-productDetails']"
return Format(u'%s %s\n\n%s%s\n\n',
CleanText("%s/div[@class='d-rubric-inner']/h1" % header),
CleanText("%s/div[@class='d-rubric-inner']/small" % header),
Join(u'- ', "%s/ul[@class='pvi-product-specs']/li" % header, newline=True),
Join(u'- ', "%s/ul/li" % section, newline=True, addBefore='- '))(el[0])
def __call__(self, item):
channels = item.page.browser.get_channels()
return self.filter(self.select(self.selector, item, key=self._key, obj=self._obj), channels)
def filter(self, el, channels):
channel_info = el[0].xpath('div/div[@class="elgr-data-channel"]')
if channel_info:
return CleanText('.', children=False)(channel_info[0])
else:
channel_id = Regexp(CleanText('div[@class="elgr-product-data"]/span/@class'),
'einst-(.*) elgr-data-logo')(el[0])
for channel in channels:
if channel_id == channel.id:
return channel._name
class FormatDate(Filter):
def __init__(self, pattern, selector):
super(FormatDate, self).__init__(selector)
self.pattern = pattern
def filter(self, _date):
return _date.strftime(self.pattern)
class Date(Filter):
@ -65,68 +67,10 @@ class Date(Filter):
return datetime.combine(_date, _time.time())
class FormatDate(Filter):
def __init__(self, pattern, selector):
super(FormatDate, self).__init__(selector)
self.pattern = pattern
def filter(self, date):
return date.strftime(self.pattern)
class AjaxPage(HTMLPage):
def count_events(self):
return len(self.doc.xpath("//a"))
@method
class list_events(ListElement):
item_xpath = '//a'
ignore_duplicate = True
class item(ItemElement):
klass = SensCritiquenCalendarEvent
def condition(self):
if '_id' in self.env and self.env['_id']:
return Format(u'%s#%s#%s',
Regexp(Link('.'), '/film/(.*)'),
FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')),
CleanText(Channel('.'), replace=[(' ', '-')]))(self) == self.env['_id']
return True
def validate(self, obj):
if 'date_from' in self.env and self.env['date_from'] and obj.start_date > self.env['date_from']:
if not self.env['date_to']:
return True
else:
if empty(obj.end_date) or obj.end_date <= self.env['date_to']:
return True
if '_id' in self.env:
return True
return False
obj_id = Format(u'%s#%s#%s',
Regexp(Link('.'), '/film/(.*)'),
FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')),
CleanText(Channel('.'), replace=[(' ', '-')]))
obj_start_date = Date('div/div[@class="elgr-data-diffusion"]')
obj_summary = Format('%s - %s',
Regexp(CleanText('./div/img/@alt'), '^Affiche(.*)'),
Channel('.'))
class Description(Filter):
def filter(self, el):
header = "//div[@class='pvi-hero-product']"
section = "//section[@class='pvi-productDetails']"
return Format(u'%s %s\n\n%s%s\n\n',
CleanText("%s/div[@class='d-rubric-inner']/h1" % header),
CleanText("%s/div[@class='d-rubric-inner']/small" % header),
Join(u'- ', "%s/ul[@class='pvi-product-specs']/li" % header, newline=True),
Join(u'- ', "%s/ul/li" % section, newline=True, addBefore=' - '))(el[0])
class JsonResumePage(JsonPage):
def get_resume(self):
if self.doc['json']['success']:
return self.doc['json']['data']
class EventPage(HTMLPage):
@ -138,24 +82,45 @@ class EventPage(HTMLPage):
obj_description = Description('.')
class JsonResumePage(JsonPage):
def get_resume(self):
if self.doc['json']['success']:
return self.doc['json']['data']
class SettingsPage(HTMLPage):
class FilmsPage(HTMLPage):
@method
class get_channels(ListElement):
item_xpath = '//li[@class="tse-channels-item hide"]'
class iter_films(ListElement):
item_xpath = '//li[@class="elgr-mosaic "]/a'
class item(ItemElement):
klass = BaseObject
klass = SensCritiquenCalendarEvent
obj_id = CleanText('./@data-sc-channel-id')
def condition(self):
if '_id' in self.env and self.env['_id']:
return Format(u'%s#%s#%s',
Regexp(Link('.'), '/film/(.*)'),
FormatDate("%Y%m%d%H%M",
Date('div/div[@class="elgr-data-diffusion"]')),
CleanText('./div/span[@class="d-offset"]',
replace=[(' ', '-')]))(self) == self.env['_id']
return True
def obj__networks(self):
return CleanText('./@data-sc-networks')(self).split(',')
def validate(self, obj):
if 'date_from' in self.env and self.env['date_from'] and obj.start_date > self.env['date_from']:
if not self.env['date_to']:
return True
else:
if empty(obj.end_date):
if obj.start_date < self.env['date_to']:
return True
elif obj.end_date <= self.env['date_to']:
return True
obj__thema = CleanText('./@data-sc-thema-id')
obj__name = CleanText('./label')
if '_id' in self.env:
return True
return False
obj_id = Format(u'%s#%s#%s',
Regexp(Link('.'), '/film/(.*)'),
FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')),
CleanText('./div/span[@class="d-offset"]', replace=[(' ', '-')]))
obj_start_date = Date('div/div[@class="elgr-data-diffusion"]')
obj_summary = Format('%s - %s',
Regexp(CleanText('./div/img/@alt'), '^Affiche(.*)'),
CleanText('./div/span[@class="d-offset"]'))