diff --git a/modules/senscritique/browser.py b/modules/senscritique/browser.py
index 67d7f051..60362eba 100644
--- a/modules/senscritique/browser.py
+++ b/modules/senscritique/browser.py
@@ -17,137 +17,52 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
+from weboob.capabilities.base import UserError
from weboob.browser import PagesBrowser, URL
+from .pages import FilmsPage, EventPage, JsonResumePage
from weboob.browser.profiles import Firefox
-from .pages import AjaxPage, EventPage, JsonResumePage, SettingsPage
-
-import re
-from lxml.etree import XMLSyntaxError
__all__ = ['SenscritiqueBrowser']
class SenscritiqueBrowser(PagesBrowser):
- def set_ajax_header(self):
- self.session.headers.update({"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
- "NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
- " GTB7.1 (.NET CLR 3.5.30729)",
- "Accept": "text/html, */*; q=0.01",
- "X-Requested-With": "XMLHttpRequest",
- "Referer": "http://www.senscritique.com/sc/tv_guides",
- "Origin": "http://www.senscritique.com",
- "Accept-Language": "fr-fr;q=0.667",
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
- })
+ BASEURL = 'http://www.senscritique.com'
+
+ films_page = URL('/everymovie/programme-tv/chrono', FilmsPage)
+ event_page = URL('/film/(?P<_id>.*)', EventPage)
+ json_page = URL('/sc/products/storyline/(?P<_id>.*).json', JsonResumePage)
def set_json_header(self):
self.session.headers.update({"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
- "NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
- " GTB7.1 (.NET CLR 3.5.30729)",
- "Accept": "application/json, text/javascript, */*; q=0.01",
- "X-Requested-With": "XMLHttpRequest",
- })
+ "NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
+ " GTB7.1 (.NET CLR 3.5.30729)",
+ "Accept": "application/json, text/javascript, */*; q=0.01",
+ "X-Requested-With": "XMLHttpRequest",
+ })
- ENCODING = 'utf-8'
- CHANNELS = None
- BASEURL = 'http://www.senscritique.com'
+ def list_events(self, date_from, date_to=None):
+ return self.films_page.go().iter_films(date_from=date_from, date_to=date_to)
- program_page = URL('/sc/tv_guides')
- ajax_page = URL('/sc/tv_guides/gridContent.ajax', AjaxPage)
- event_page = URL('/film/(?P<_id>.*)', EventPage)
- json_page = URL('/sc/products/storyline/(?P<_id>.*).json', JsonResumePage)
- setting_page = URL('/sc/tv_guides/settings.ajax', SettingsPage)
-
- LIMIT = 25 # number of results returned for each ajax call (defined in the website).
-
- LIMIT_NB_PAGES = 10 # arbitrary limit to avoid infinitive loop that can occurs if total number of films is a multiple of LIMIT (in website it causes an infinite scroll)
-
- DATA = {'order': 'chrono',
- 'without_product_done': '0',
- 'period': 'cette-semaine',
- 'limit': '%d' % LIMIT,
- }
-
- def get_channels(self):
- if not self.CHANNELS:
- self.CHANNELS = list(self.setting_page.go().get_channels())
- return self.CHANNELS
-
- def get_selected_channels(self, package, general=False, cinema=False):
- for channel in self.get_channels():
- if (package == 0 or u'%s' % package in channel._networks) and\
- ((general and channel._thema in ('1', '2')) or (cinema and channel._thema == '3')):
- yield channel.id
-
- def set_package_settings(self, package, channels):
- url = 'http://www.senscritique.com/sc/tv_guides/saveSettings.json'
- # do not use a dict because there are several same keys
- params = "network=%s" % package
- params += ''.join(["&channels%%5B%%5D=%s" % (channel) for channel in channels])
- self.open(url, data=params)
-
- def list_events(self, date_from, date_to=None, package=None, channels=None):
- self.set_profile(Firefox())
- self.program_page.go()
- page_nb = 1
-
- self.set_ajax_header()
- if package and channels:
- self.set_package_settings(package, channels)
-
- while True:
- try:
- self.DATA['page'] = '%d' % page_nb
- page = self.ajax_page.open(data=self.DATA)
- nb_events = page.count_events()
- events = page.list_events(date_from=date_from, date_to=date_to)
-
- for event in events:
- yield event
- except XMLSyntaxError:
- break
-
- if nb_events < self.LIMIT or page_nb >= self.LIMIT_NB_PAGES:
- break
-
- page_nb += 1
-
- def get_event(self, _id, event=None, package=None, channels=None):
+ def get_event(self, _id, event=None):
if not event:
- self.set_profile(Firefox())
- self.program_page.go()
- page_nb = 1
+ try:
+ event = self.films_page.go().iter_films(_id=_id).next()
+ except StopIteration:
+ raise UserError('This event (%s) does not exists' % _id)
- self.set_ajax_header()
- if package and channels:
- self.set_package_settings(package, channels)
+ film_id = _id.split('#')[0]
+ event = self.event_page.go(_id=film_id).get_event(obj=event)
- while True:
- self.DATA['page'] = '%d' % page_nb
- page = self.ajax_page.open(data=self.DATA)
- try:
- event = page.list_events(_id=_id).next()
- except StopIteration:
- event = None
+ resume = self.get_resume(film_id)
+ if resume:
+ event.description += resume
- nb_events = page.count_events()
- if event or nb_events < self.LIMIT or page_nb >= self.LIMIT_NB_PAGES:
- break
+ return event
- page_nb += 1
-
- if event:
- _id = _id.split('#')[0]
- self.set_profile(Firefox())
- event = self.event_page.go(_id=_id).get_event(obj=event)
- resume = self.get_resume(_id)
- if resume:
- event.description += self.get_resume(_id)
- return event
-
- def get_resume(self, _id):
+ def get_resume(self, film_id):
self.set_json_header()
- re_id = re.compile('^/?.*/(.*)', re.DOTALL)
- _id = re_id.search(_id).group(1)
- return self.json_page.go(_id=_id).get_resume()
+ _id = film_id.split('/')[-1]
+ resume = self.json_page.go(_id=_id).get_resume()
+ self.set_profile(Firefox())
+ return resume
diff --git a/modules/senscritique/module.py b/modules/senscritique/module.py
index 5446b4e1..cd83fe99 100644
--- a/modules/senscritique/module.py
+++ b/modules/senscritique/module.py
@@ -17,9 +17,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-from weboob.tools.backend import Module, BackendConfig
-from weboob.tools.ordereddict import OrderedDict
-from weboob.tools.value import Value, ValueBool
+from weboob.tools.backend import Module
from weboob.capabilities.calendar import CapCalendarEvent, CATEGORIES
from .browser import SenscritiqueBrowser
@@ -38,49 +36,21 @@ class SenscritiqueModule(Module, CapCalendarEvent):
ASSOCIATED_CATEGORIES = [CATEGORIES.TELE]
BROWSER = SenscritiqueBrowser
- tv_settings_choices = OrderedDict([(k, u'%s' % (v)) for k, v in sorted({
- '000000': u'-- Indifférent --',
- '9': u'TNT',
- '1': u'Canalsat',
- '2': u'Numericable',
- '10': u'Orange',
- '11': u'Free',
- '12': u'SFR',
- '15': u'Darty box via ADSL',
- '16': u'Bouygues',
- }.iteritems())])
-
- CONFIG = BackendConfig(Value('tv_settings', label=u'T.V. package', choices=tv_settings_choices),
- ValueBool('general', label='General', default=True),
- ValueBool('cinema', label='Cinema', default=False),
- )
-
- def get_package_and_channels(self):
- package = int(self.config['tv_settings'].get())
- channels = self.browser.get_selected_channels(package, self.config['general'].get(),
- self.config['cinema'].get())
- return package, channels
-
def search_events(self, query):
if self.has_matching_categories(query):
- package, channels = self.get_package_and_channels()
- return self.browser.list_events(query.start_date,
- query.end_date,
- package,
- channels)
+ return self.list_events(query.start_date,
+ query.end_date)
def list_events(self, date_from, date_to=None):
items = []
- package, channels = self.get_package_and_channels()
- for item in self.browser.list_events(date_from, date_to, package, channels):
+ for item in self.browser.list_events(date_from, date_to):
items.append(item)
items.sort(key=lambda o: o.start_date)
return items
def get_event(self, _id, event=None):
- package, channels = self.get_package_and_channels()
- return self.browser.get_event(_id, event, package=package, channels=channels)
+ return self.browser.get_event(_id, event)
def fill_obj(self, event, fields):
return self.get_event(event.id, event)
diff --git a/modules/senscritique/pages.py b/modules/senscritique/pages.py
index d834e760..d0bcd993 100644
--- a/modules/senscritique/pages.py
+++ b/modules/senscritique/pages.py
@@ -20,29 +20,31 @@
from .calendar import SensCritiquenCalendarEvent
from datetime import date, datetime, timedelta
-from weboob.capabilities.base import empty, BaseObject
+from weboob.capabilities.base import empty
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.filters.standard import Filter, CleanText, Regexp, Join, Format, BrowserURL, Env
from weboob.browser.filters.html import Link
-class Channel(Filter):
+class Description(Filter):
+ def filter(self, el):
+ header = "//div[@class='pvi-hero-product']"
+ section = "//section[@class='pvi-productDetails']"
+ return Format(u'%s %s\n\n%s%s\n\n',
+ CleanText("%s/div[@class='d-rubric-inner']/h1" % header),
+ CleanText("%s/div[@class='d-rubric-inner']/small" % header),
+ Join(u'- ', "%s/ul[@class='pvi-product-specs']/li" % header, newline=True),
+ Join(u'- ', "%s/ul/li" % section, newline=True, addBefore='- '))(el[0])
- def __call__(self, item):
- channels = item.page.browser.get_channels()
- return self.filter(self.select(self.selector, item, key=self._key, obj=self._obj), channels)
- def filter(self, el, channels):
- channel_info = el[0].xpath('div/div[@class="elgr-data-channel"]')
- if channel_info:
- return CleanText('.', children=False)(channel_info[0])
- else:
- channel_id = Regexp(CleanText('div[@class="elgr-product-data"]/span/@class'),
- 'einst-(.*) elgr-data-logo')(el[0])
- for channel in channels:
- if channel_id == channel.id:
- return channel._name
+class FormatDate(Filter):
+ def __init__(self, pattern, selector):
+ super(FormatDate, self).__init__(selector)
+ self.pattern = pattern
+
+ def filter(self, _date):
+ return _date.strftime(self.pattern)
class Date(Filter):
@@ -65,68 +67,10 @@ class Date(Filter):
return datetime.combine(_date, _time.time())
-class FormatDate(Filter):
- def __init__(self, pattern, selector):
- super(FormatDate, self).__init__(selector)
- self.pattern = pattern
-
- def filter(self, date):
- return date.strftime(self.pattern)
-
-
-class AjaxPage(HTMLPage):
-
- def count_events(self):
- return len(self.doc.xpath("//a"))
-
- @method
- class list_events(ListElement):
- item_xpath = '//a'
- ignore_duplicate = True
-
- class item(ItemElement):
- klass = SensCritiquenCalendarEvent
-
- def condition(self):
- if '_id' in self.env and self.env['_id']:
- return Format(u'%s#%s#%s',
- Regexp(Link('.'), '/film/(.*)'),
- FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')),
- CleanText(Channel('.'), replace=[(' ', '-')]))(self) == self.env['_id']
- return True
-
- def validate(self, obj):
- if 'date_from' in self.env and self.env['date_from'] and obj.start_date > self.env['date_from']:
- if not self.env['date_to']:
- return True
- else:
- if empty(obj.end_date) or obj.end_date <= self.env['date_to']:
- return True
-
- if '_id' in self.env:
- return True
-
- return False
-
- obj_id = Format(u'%s#%s#%s',
- Regexp(Link('.'), '/film/(.*)'),
- FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')),
- CleanText(Channel('.'), replace=[(' ', '-')]))
- obj_start_date = Date('div/div[@class="elgr-data-diffusion"]')
- obj_summary = Format('%s - %s',
- Regexp(CleanText('./div/img/@alt'), '^Affiche(.*)'),
- Channel('.'))
-
-
-class Description(Filter):
- def filter(self, el):
- header = "//div[@class='pvi-hero-product']"
- section = "//section[@class='pvi-productDetails']"
- return Format(u'%s %s\n\n%s%s\n\n',
- CleanText("%s/div[@class='d-rubric-inner']/h1" % header),
- CleanText("%s/div[@class='d-rubric-inner']/small" % header),
- Join(u'- ', "%s/ul[@class='pvi-product-specs']/li" % header, newline=True),
- Join(u'- ', "%s/ul/li" % section, newline=True, addBefore=' - '))(el[0])
+class JsonResumePage(JsonPage):
+ def get_resume(self):
+ if self.doc['json']['success']:
+ return self.doc['json']['data']
class EventPage(HTMLPage):
@@ -138,24 +82,45 @@ class EventPage(HTMLPage):
obj_description = Description('.')
-class JsonResumePage(JsonPage):
- def get_resume(self):
- if self.doc['json']['success']:
- return self.doc['json']['data']
-
-
-class SettingsPage(HTMLPage):
+class FilmsPage(HTMLPage):
@method
- class get_channels(ListElement):
- item_xpath = '//li[@class="tse-channels-item hide"]'
+ class iter_films(ListElement):
+ item_xpath = '//li[@class="elgr-mosaic "]/a'
class item(ItemElement):
- klass = BaseObject
+ klass = SensCritiquenCalendarEvent
- obj_id = CleanText('./@data-sc-channel-id')
+ def condition(self):
+ if '_id' in self.env and self.env['_id']:
+ return Format(u'%s#%s#%s',
+ Regexp(Link('.'), '/film/(.*)'),
+ FormatDate("%Y%m%d%H%M",
+ Date('div/div[@class="elgr-data-diffusion"]')),
+ CleanText('./div/span[@class="d-offset"]',
+ replace=[(' ', '-')]))(self) == self.env['_id']
+ return True
- def obj__networks(self):
- return CleanText('./@data-sc-networks')(self).split(',')
+ def validate(self, obj):
+ if 'date_from' in self.env and self.env['date_from'] and obj.start_date > self.env['date_from']:
+ if not self.env['date_to']:
+ return True
+ else:
+ if empty(obj.end_date):
+ if obj.start_date < self.env['date_to']:
+ return True
+ elif obj.end_date <= self.env['date_to']:
+ return True
- obj__thema = CleanText('./@data-sc-thema-id')
- obj__name = CleanText('./label')
+ if '_id' in self.env:
+ return True
+
+ return False
+
+ obj_id = Format(u'%s#%s#%s',
+ Regexp(Link('.'), '/film/(.*)'),
+ FormatDate("%Y%m%d%H%M", Date('div/div[@class="elgr-data-diffusion"]')),
+ CleanText('./div/span[@class="d-offset"]', replace=[(' ', '-')]))
+ obj_start_date = Date('div/div[@class="elgr-data-diffusion"]')
+ obj_summary = Format('%s - %s',
+ Regexp(CleanText('./div/img/@alt'), '^Affiche(.*)'),
+ CleanText('./div/span[@class="d-offset"]'))