[senscritique] adapt to browser2

This commit is contained in:
Bezleputh 2014-03-20 00:38:16 +01:00
commit 640504d79c
5 changed files with 197 additions and 159 deletions

View file

@ -17,46 +17,65 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser
from weboob.tools.browser2 import PagesBrowser, URL, Profile, Firefox
from weboob.tools.json import json as simplejson
from .calendar import SensCritiquenCalendarEvent
from .pages import ProgramPage, EventPage
from .pages import AjaxPage, EventPage, JsonResumePage
import urllib
import urllib2
import re
__all__ = ['SenscritiqueBrowser']
class SenscritiqueBrowser(BaseBrowser):
PROTOCOL = 'http'
DOMAIN = 'www.senscritique.com'
class SensCritiqueAjaxProfile(Profile):
def setup_session(self, session):
session.headers.update({"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
"NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
" GTB7.1 (.NET CLR 3.5.30729)",
"Accept": "text/html, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"Referer": "http://www.senscritique.com/sc/tv_guides",
"Origin": "http://www.senscritique.com",
"Accept-Language": "fr-fr;q=0.667",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
})
class SensCritiqueJsonProfile(Profile):
def setup_session(self, session):
session.headers.update({"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
"NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
" GTB7.1 (.NET CLR 3.5.30729)",
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
})
class SenscritiqueBrowser(PagesBrowser):
ENCODING = 'utf-8'
PAGES = {
'%s://%s/sc/tv_guides' % (PROTOCOL, DOMAIN): ProgramPage,
'%s://%s/film/(.*?)' % (PROTOCOL, DOMAIN): EventPage,
}
BASEURL = 'http://www.senscritique.com'
program_page = URL('/sc/tv_guides')
ajax_page = URL('/sc/tv_guides/gridContent.ajax', AjaxPage)
event_page = URL('/film/(?P<_id>.*)', EventPage)
json_page = URL('/sc/products/storyline/(?P<_id>.*).json', JsonResumePage)
LIMIT = 25 # number of results returned for each ajax call (defined in the website).
LIMIT_NB_PAGES = 10 # arbitrary limit to avoid infinitive loop that can occurs if total number of films is a multiple of LIMIT (in website it causes an infinite scroll)
HEADER_AJAX = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
"NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
" GTB7.1 (.NET CLR 3.5.30729)",
"Accept": "gzip, deflate",
"X-Requested-With": "XMLHttpRequest",
"Referer": "http://www.senscritique.com/sc/tv_guides",
}
LIMIT_NB_PAGES = 10 # arbitrary limit to avoid infinitive loop that can occurs if total number of films is a multiple of LIMIT (in website it causes an infinite scroll)
"""
HEADER_RESUME = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows "
"NT 5.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8"
" GTB7.1 (.NET CLR 3.5.30729)",
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
}
"""
DATA = {'order': 'chrono',
'without_product_done': '0',
@ -64,66 +83,70 @@ class SenscritiqueBrowser(BaseBrowser):
'limit': '%d' % LIMIT,
}
URL = "http://www.senscritique.com/sc/tv_guides/gridContent.ajax"
def home(self):
self.location("http://www.senscritique.com/sc/tv_guides")
assert self.is_on_page(ProgramPage)
def list_events(self, date_from, date_to=None, package=None, channels=None):
self.home()
page = 1
if package and channels:
self.set_package_settings(package, channels)
while True:
self.DATA['page'] = '%d' % page
self.page.document = self.get_ajax_content()
nb_events = self.page.count_events()
events = self.page.list_events(date_from, date_to)
for event in events:
yield event
if nb_events < self.LIMIT or page >= self.LIMIT_NB_PAGES:
break
page += 1
def set_package_settings(self, package, channels):
url = 'http://www.senscritique.com/sc/tv_guides/saveSettings.json'
params = "network=%s" % package
params += ''.join(["&channels%%5B%%5D=%d" % (channel) for channel in channels])
self.openurl(url, params)
self.open(url, data=params)
def get_ajax_content(self):
req = urllib2.Request(self.URL, urllib.urlencode(self.DATA), headers=self.HEADER_AJAX)
response = self.open(req)
return self.get_document(response)
def list_events(self, date_from, date_to=None, package=None, channels=None):
self.program_page.stay_or_go()
page_nb = 1
if package and channels:
self.set_package_settings(package, channels)
self._setup_session(SensCritiqueAjaxProfile())
while True:
self.DATA['page'] = '%d' % page_nb
page = self.ajax_page.open(data=urllib.urlencode(self.DATA))
nb_events = page.count_events()
events = page.list_events(date_from=date_from, date_to=date_to)
for event in events:
yield event
if nb_events < self.LIMIT or page_nb >= self.LIMIT_NB_PAGES:
break
page_nb += 1
def get_event(self, _id, event=None):
if not event:
self.home()
page = 1
self.program_page.stay_or_go()
page_nb = 1
self._setup_session(SensCritiqueAjaxProfile())
while True:
self.DATA['page'] = '%d' % page
self.page.document = self.get_ajax_content()
event = self.page.find_event(_id)
nb_events = self.page.count_events()
self.DATA['page'] = '%d' % page_nb
page = self.ajax_page.open(data=urllib.urlencode(self.DATA))
event = page.list_events(_id=_id)
nb_events = page.count_events()
if event or nb_events < self.LIMIT or page >= self.LIMIT_NB_PAGES:
break
page += 1
if event:
url = SensCritiquenCalendarEvent.id2url(_id)
self.location(url)
assert self.is_on_page(EventPage)
return self.page.get_event(url, event)
if not isinstance(event, SensCritiquenCalendarEvent):
event = event.next()
def get_resume(self, url, _id):
event.resume = self.get_resume(_id)
self._setup_session(Firefox())
event = self.event_page.go(_id=_id).get_event(obj=event)
return event
def get_resume(self, _id):
self._setup_session(SensCritiqueJsonProfile())
re_id = re.compile('/(.*)/(.*?).json', re.DOTALL)
a_id = re_id.search(_id).group(1)
print a_id
return self.json_page.go(_id=a_id).get_resume()
# return "get resume"
"""
self.HEADER_RESUME['Referer'] = url
req = urllib2.Request('http://www.senscritique.com/sc/products/storyline/%s.json' % _id,
headers=self.HEADER_RESUME)
@ -131,3 +154,4 @@ class SenscritiqueBrowser(BaseBrowser):
result = simplejson.loads(response.read(), self.ENCODING)
if result['json']['success']:
return result['json']['data']
"""