diff --git a/modules/senscritique/browser.py b/modules/senscritique/browser.py index 67d7f051..60362eba 100644 --- a/modules/senscritique/browser.py +++ b/modules/senscritique/browser.py @@ -17,137 +17,52 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . +from weboob.capabilities.base import UserError from weboob.browser import PagesBrowser, URL +from .pages import FilmsPage, EventPage, JsonResumePage from weboob.browser.profiles import Firefox -from .pages import AjaxPage, EventPage, JsonResumePage, SettingsPage - -import re -from lxml.etree import XMLSyntaxError __all__ = ['SenscritiqueBrowser'] class SenscritiqueBrowser(PagesBrowser): - def set_ajax_header(self): - self.session.headers.update({"User-Agent": "Mozilla/5.0 (Windows; U; Windows " - "NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8" - " GTB7.1 (.NET CLR 3.5.30729)", - "Accept": "text/html, */*; q=0.01", - "X-Requested-With": "XMLHttpRequest", - "Referer": "http://www.senscritique.com/sc/tv_guides", - "Origin": "http://www.senscritique.com", - "Accept-Language": "fr-fr;q=0.667", - "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", - }) + BASEURL = 'http://www.senscritique.com' + + films_page = URL('/everymovie/programme-tv/chrono', FilmsPage) + event_page = URL('/film/(?P<_id>.*)', EventPage) + json_page = URL('/sc/products/storyline/(?P<_id>.*).json', JsonResumePage) def set_json_header(self): self.session.headers.update({"User-Agent": "Mozilla/5.0 (Windows; U; Windows " - "NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8" - " GTB7.1 (.NET CLR 3.5.30729)", - "Accept": "application/json, text/javascript, */*; q=0.01", - "X-Requested-With": "XMLHttpRequest", - }) + "NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8" + " GTB7.1 (.NET CLR 3.5.30729)", + "Accept": "application/json, text/javascript, */*; q=0.01", + "X-Requested-With": "XMLHttpRequest", + }) - ENCODING = 'utf-8' - CHANNELS = None - BASEURL = 'http://www.senscritique.com' + def list_events(self, date_from, date_to=None): + return self.films_page.go().iter_films(date_from=date_from, date_to=date_to) - program_page = URL('/sc/tv_guides') - ajax_page = URL('/sc/tv_guides/gridContent.ajax', AjaxPage) - event_page = URL('/film/(?P<_id>.*)', EventPage) - json_page = URL('/sc/products/storyline/(?P<_id>.*).json', JsonResumePage) - setting_page = URL('/sc/tv_guides/settings.ajax', SettingsPage) - - LIMIT = 25 # number of results returned for each ajax call (defined in the website). - - LIMIT_NB_PAGES = 10 # arbitrary limit to avoid infinitive loop that can occurs if total number of films is a multiple of LIMIT (in website it causes an infinite scroll) - - DATA = {'order': 'chrono', - 'without_product_done': '0', - 'period': 'cette-semaine', - 'limit': '%d' % LIMIT, - } - - def get_channels(self): - if not self.CHANNELS: - self.CHANNELS = list(self.setting_page.go().get_channels()) - return self.CHANNELS - - def get_selected_channels(self, package, general=False, cinema=False): - for channel in self.get_channels(): - if (package == 0 or u'%s' % package in channel._networks) and\ - ((general and channel._thema in ('1', '2')) or (cinema and channel._thema == '3')): - yield channel.id - - def set_package_settings(self, package, channels): - url = 'http://www.senscritique.com/sc/tv_guides/saveSettings.json' - # do not use a dict because there are several same keys - params = "network=%s" % package - params += ''.join(["&channels%%5B%%5D=%s" % (channel) for channel in channels]) - self.open(url, data=params) - - def list_events(self, date_from, date_to=None, package=None, channels=None): - self.set_profile(Firefox()) - self.program_page.go() - page_nb = 1 - - self.set_ajax_header() - if package and channels: - self.set_package_settings(package, channels) - - while True: - try: - self.DATA['page'] = '%d' % page_nb - page = self.ajax_page.open(data=self.DATA) - nb_events = page.count_events() - events = page.list_events(date_from=date_from, date_to=date_to) - - for event in events: - yield event - except XMLSyntaxError: - break - - if nb_events < self.LIMIT or page_nb >= self.LIMIT_NB_PAGES: - break - - page_nb += 1 - - def get_event(self, _id, event=None, package=None, channels=None): + def get_event(self, _id, event=None): if not event: - self.set_profile(Firefox()) - self.program_page.go() - page_nb = 1 + try: + event = self.films_page.go().iter_films(_id=_id).next() + except StopIteration: + raise UserError('This event (%s) does not exists' % _id) - self.set_ajax_header() - if package and channels: - self.set_package_settings(package, channels) + film_id = _id.split('#')[0] + event = self.event_page.go(_id=film_id).get_event(obj=event) - while True: - self.DATA['page'] = '%d' % page_nb - page = self.ajax_page.open(data=self.DATA) - try: - event = page.list_events(_id=_id).next() - except StopIteration: - event = None + resume = self.get_resume(film_id) + if resume: + event.description += resume - nb_events = page.count_events() - if event or nb_events < self.LIMIT or page_nb >= self.LIMIT_NB_PAGES: - break + return event - page_nb += 1 - - if event: - _id = _id.split('#')[0] - self.set_profile(Firefox()) - event = self.event_page.go(_id=_id).get_event(obj=event) - resume = self.get_resume(_id) - if resume: - event.description += self.get_resume(_id) - return event - - def get_resume(self, _id): + def get_resume(self, film_id): self.set_json_header() - re_id = re.compile('^/?.*/(.*)', re.DOTALL) - _id = re_id.search(_id).group(1) - return self.json_page.go(_id=_id).get_resume() + _id = film_id.split('/')[-1] + resume = self.json_page.go(_id=_id).get_resume() + self.set_profile(Firefox()) + return resume diff --git a/modules/senscritique/module.py b/modules/senscritique/module.py index 5446b4e1..cd83fe99 100644 --- a/modules/senscritique/module.py +++ b/modules/senscritique/module.py @@ -17,9 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.backend import Module, BackendConfig -from weboob.tools.ordereddict import OrderedDict -from weboob.tools.value import Value, ValueBool +from weboob.tools.backend import Module from weboob.capabilities.calendar import CapCalendarEvent, CATEGORIES from .browser import SenscritiqueBrowser @@ -38,49 +36,21 @@ class SenscritiqueModule(Module, CapCalendarEvent): ASSOCIATED_CATEGORIES = [CATEGORIES.TELE] BROWSER = SenscritiqueBrowser - tv_settings_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({ - '000000': u'-- Indifférent --', - '9': u'TNT', - '1': u'Canalsat', - '2': u'Numericable', - '10': u'Orange', - '11': u'Free', - '12': u'SFR', - '15': u'Darty box via ADSL', - '16': u'Bouygues', - }.iteritems())]) - - CONFIG = BackendConfig(Value('tv_settings', label=u'T.V. package', choices=tv_settings_choices), - ValueBool('general', label='General', default=True), - ValueBool('cinema', label='Cinema', default=False), - ) - - def get_package_and_channels(self): - package = int(self.config['tv_settings'].get()) - channels = self.browser.get_selected_channels(package, self.config['general'].get(), - self.config['cinema'].get()) - return package, channels - def search_events(self, query): if self.has_matching_categories(query): - package, channels = self.get_package_and_channels() - return self.browser.list_events(query.start_date, - query.end_date, - package, - channels) + return self.list_events(query.start_date, + query.end_date) def list_events(self, date_from, date_to=None): items = [] - package, channels = self.get_package_and_channels() - for item in self.browser.list_events(date_from, date_to, package, channels): + for item in self.browser.list_events(date_from, date_to): items.append(item) items.sort(key=lambda o: o.start_date) return items def get_event(self, _id, event=None): - package, channels = self.get_package_and_channels() - return self.browser.get_event(_id, event, package=package, channels=channels) + return self.browser.get_event(_id, event) def fill_obj(self, event, fields): return self.get_event(event.id, event) diff --git a/modules/senscritique/pages.py b/modules/senscritique/pages.py index d834e760..d0bcd993 100644 --- a/modules/senscritique/pages.py +++ b/modules/senscritique/pages.py @@ -20,29 +20,31 @@ from .calendar import SensCritiquenCalendarEvent from datetime import date, datetime, timedelta -from weboob.capabilities.base import empty, BaseObject +from weboob.capabilities.base import empty from weboob.browser.pages import HTMLPage, JsonPage from weboob.browser.elements import ItemElement, ListElement, method from weboob.browser.filters.standard import Filter, CleanText, Regexp, Join, Format, BrowserURL, Env from weboob.browser.filters.html import Link -class Channel(Filter): +class Description(Filter): + def filter(self, el): + header = "//div[@class='pvi-hero-product']" + section = "//section[@class='pvi-productDetails']" + return Format(u'%s %s\n\n%s%s\n\n', + CleanText("%s/div[@class='d-rubric-inner']/h1" % header), + CleanText("%s/div[@class='d-rubric-inner']/small" % header), + Join(u'- ', "%s/ul[@class='pvi-product-specs']/li" % header, newline=True), + Join(u'- ', "%s/ul/li" % section, newline=True, addBefore='- '))(el[0]) - def __call__(self, item): - channels = item.page.browser.get_channels() - return self.filter(self.select(self.selector, item, key=self._key, obj=self._obj), channels) - def filter(self, el, channels): - channel_info = el[0].xpath('div/div[@class="elgr-data-channel"]') - if channel_info: - return CleanText('.', children=False)(channel_info[0]) - else: - channel_id = Regexp(CleanText('div[@class="elgr-product-data"]/span/@class'), - 'einst-(.*) elgr-data-logo')(el[0]) - for channel in channels: - if channel_id == channel.id: - return channel._name +class FormatDate(Filter): + def __init__(self, pattern, selector): + super(FormatDate, self).__init__(selector) + self.pattern = pattern + + def filter(self, _date): + return _date.strftime(self.pattern) class Date(Filter): @@ -65,68 +67,10 @@ class Date(Filter): return datetime.combine(_date, _time.time()) -class FormatDate(Filter): - def __init__(self, pattern, selector): - super(FormatDate, self).__init__(selector) - self.pattern = pattern - - def filter(self, date): - return date.strftime(self.pattern) - - -class AjaxPage(HTMLPage): - - def count_events(self): - return len(self.doc.xpath("//a")) - - @method - class list_events(ListElement): - item_xpath = '//a' - ignore_duplicate = True - - class item(ItemElement): - klass = SensCritiquenCalendarEvent - - def condition(self): - if '_id' in self.env and self.env['_id']: - return Format(u'%s#%s#%s', - Regexp(Link('.'), '/film/(.*)'), - FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')), - CleanText(Channel('.'), replace=[(' ', '-')]))(self) == self.env['_id'] - return True - - def validate(self, obj): - if 'date_from' in self.env and self.env['date_from'] and obj.start_date > self.env['date_from']: - if not self.env['date_to']: - return True - else: - if empty(obj.end_date) or obj.end_date <= self.env['date_to']: - return True - - if '_id' in self.env: - return True - - return False - - obj_id = Format(u'%s#%s#%s', - Regexp(Link('.'), '/film/(.*)'), - FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')), - CleanText(Channel('.'), replace=[(' ', '-')])) - obj_start_date = Date('div/div[@class="elgr-data-diffusion"]') - obj_summary = Format('%s - %s', - Regexp(CleanText('./div/img/@alt'), '^Affiche(.*)'), - Channel('.')) - - -class Description(Filter): - def filter(self, el): - header = "//div[@class='pvi-hero-product']" - section = "//section[@class='pvi-productDetails']" - return Format(u'%s %s\n\n%s%s\n\n', - CleanText("%s/div[@class='d-rubric-inner']/h1" % header), - CleanText("%s/div[@class='d-rubric-inner']/small" % header), - Join(u'- ', "%s/ul[@class='pvi-product-specs']/li" % header, newline=True), - Join(u'- ', "%s/ul/li" % section, newline=True, addBefore=' - '))(el[0]) +class JsonResumePage(JsonPage): + def get_resume(self): + if self.doc['json']['success']: + return self.doc['json']['data'] class EventPage(HTMLPage): @@ -138,24 +82,45 @@ class EventPage(HTMLPage): obj_description = Description('.') -class JsonResumePage(JsonPage): - def get_resume(self): - if self.doc['json']['success']: - return self.doc['json']['data'] - - -class SettingsPage(HTMLPage): +class FilmsPage(HTMLPage): @method - class get_channels(ListElement): - item_xpath = '//li[@class="tse-channels-item hide"]' + class iter_films(ListElement): + item_xpath = '//li[@class="elgr-mosaic "]/a' class item(ItemElement): - klass = BaseObject + klass = SensCritiquenCalendarEvent - obj_id = CleanText('./@data-sc-channel-id') + def condition(self): + if '_id' in self.env and self.env['_id']: + return Format(u'%s#%s#%s', + Regexp(Link('.'), '/film/(.*)'), + FormatDate("%Y%m%d%H%M", + Date('div/div[@class="elgr-data-diffusion"]')), + CleanText('./div/span[@class="d-offset"]', + replace=[(' ', '-')]))(self) == self.env['_id'] + return True - def obj__networks(self): - return CleanText('./@data-sc-networks')(self).split(',') + def validate(self, obj): + if 'date_from' in self.env and self.env['date_from'] and obj.start_date > self.env['date_from']: + if not self.env['date_to']: + return True + else: + if empty(obj.end_date): + if obj.start_date < self.env['date_to']: + return True + elif obj.end_date <= self.env['date_to']: + return True - obj__thema = CleanText('./@data-sc-thema-id') - obj__name = CleanText('./label') + if '_id' in self.env: + return True + + return False + + obj_id = Format(u'%s#%s#%s', + Regexp(Link('.'), '/film/(.*)'), + FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')), + CleanText('./div/span[@class="d-offset"]', replace=[(' ', '-')])) + obj_start_date = Date('div/div[@class="elgr-data-diffusion"]') + obj_summary = Format('%s - %s', + Regexp(CleanText('./div/img/@alt'), '^Affiche(.*)'), + CleanText('./div/span[@class="d-offset"]'))