weboob-devel/modules/gdcvault/browser.py
Laurent Bachelier a6ad7e83ff Use newer form of catching exceptions
autopep8 -i --select=W602
Also some other minor deprecated syntax changes, like "while 1".
I did not commit the less obvious changes.
2013-07-27 15:16:16 +02:00

156 lines
5.1 KiB
Python

# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
# Copyright(C) 2012 François Revol
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword, BrowserUnavailable,\
BrowserBanned
from weboob.tools.browser.decorators import id2url
#from .pages.index import IndexPage
from .pages import VideoPage, IndexPage, SearchPage
from .video import GDCVaultVideo
#HACK
from urllib2 import HTTPError
import re
from weboob.capabilities.base import NotAvailable
__all__ = ['GDCVaultBrowser']
class GDCVaultBrowser(BaseBrowser):
DOMAIN = 'gdcvault.com'
ENCODING = 'utf-8'
PAGES = {r'http://[w\.]*gdcvault.com/play/(?P<id>[\d]+)/?.*': VideoPage,
r'http://[w\.]*gdcvault.com/search\.php.*': (SearchPage, "json"),
r'http://[w\.]*gdcvault.com/.*': IndexPage,
}
def is_logged(self):
if self.password is None:
return True
if not self.page:
return False
obj = self.parser.select(self.page.document.getroot(), 'h3[id=welcome_user_name]', 1)
if obj is None:
return False
return obj.attrib.get('class','') != "hidden"
def login(self):
if self.password is None:
return
params = {'remember_me': 0,
'email': self.username,
'password': self.password,
}
data = self.readurl('http://gdcvault.com/api/login.php',
urllib.urlencode(params))
# some data returned as JSON, not sure yet if it's useful
#print data
if data is None:
self.openurl('/logout', '')
raise BrowserBanned('Too many open sessions?')
self.location('/', no_login=True)
if not self.is_logged():
raise BrowserIncorrectPassword()
def close_session(self):
if self.password is None or not self.is_logged():
return
self.openurl('/logout', '')
@id2url(GDCVaultVideo.id2url)
def get_video(self, url, video=None):
requires_account = False
redir_url = None
# FIXME: this is quite ugly
# but is required to handle cases like 1013422@gdcvault
self.set_handle_redirect(False)
try:
self.open_novisit(url)
#headers = req.info()
except HTTPError as e:
# print e.getcode()
if e.getcode() == 302 and hasattr(e, 'hdrs'):
#print e.hdrs['Location']
if e.hdrs['Location'] in ['/', '/login']:
requires_account = True
else:
# 1015865 redirects to a file with an eacute in the name
redir_url = unicode(e.hdrs['Location'], encoding='utf-8')
self.set_handle_redirect(True)
if requires_account:
raise BrowserUnavailable('Requires account')
if redir_url:
if video is None:
m = re.match('http://[w\.]*gdcvault.com/play/(?P<id>[\d]+)/?.*', url)
if m:
# print m.group(1)
video = GDCVaultVideo(int(m.group(1)))
else:
raise BrowserUnavailable('Cannot find ID on page with redirection')
video.url = redir_url
video.set_empty_fields(NotAvailable)
# best effort for now
return video
self.location(url)
# redirects to /login means the video is not public
if not self.is_on_page(VideoPage):
raise BrowserUnavailable('Requires account')
return self.page.get_video(video)
def search_videos(self, pattern, sortby):
post_data = {"firstfocus" : "",
"category" : "free",
"keyword" : pattern.encode('utf-8'),
"conference_id" : "", }
post_data = urllib.urlencode(post_data)
# probably not required
self.addheaders = [('Referer', 'http://gdcvault.com/'),
("Content-Type" , 'application/x-www-form-urlencoded') ]
#print post_data
# is_logged assumes html page
self.location('http://gdcvault.com/search.php',
data=post_data, no_login=True)
assert self.is_on_page(SearchPage)
return self.page.iter_videos()
def latest_videos(self):
print "browser:latest_videos()"
#self.home()
self.location('/free')
assert self.is_on_page(IndexPage)
return self.page.iter_videos()