weboob-devel/dlfp/browser.py
2010-02-10 00:40:24 +01:00

186 lines
6 KiB
Python

# -*- coding: utf-8 -*-
"""
Copyright(C) 2010 Romain Bignon
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
from mechanize import Browser, response_seek_wrapper, BrowserStateError
import urllib2
import html5lib
from html5lib import treebuilders
import re
import time
from logging import warning, error
from copy import copy
from dlfp.pages.login import IndexPage, LoginPage
from dlfp.exceptions import DLFPIncorrectPassword, DLFPUnavailable, DLFPRetry
from dlfp.firefox_cookies import FirefoxCookieJar
class NoHistory:
def __init__(self): pass
def add(self, request, response): pass
def back(self, n, _response): pass
def clear(self): pass
def close(self): pass
class DLFP(Browser):
pages = {'https://linuxfr.org/': IndexPage,
'https://linuxfr.org/pub/': IndexPage,
'https://linuxfr.org/my/': IndexPage,
'https://linuxfr.org/login.html': LoginPage,
}
def __init__(self, username, password=None, firefox_cookies=None):
Browser.__init__(self, history=NoHistory())
self.addheaders = [
['User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.4) Gecko/2008111318 Ubuntu/8.10 (intrepid) Firefox/3.0.3']
]
# Share cookies with firefox
if firefox_cookies:
self.__cookie = FirefoxCookieJar(firefox_cookies)
self.__cookie.load()
self.set_cookiejar(self.__cookie)
else:
self.__cookie = None
self.__parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
self.__page = None
self.__last_update = 0.0
self.username = username
self.password = password
if self.password:
try:
self.home()
except DLFPUnavailable:
pass
def page(self):
return self.__page
def home(self):
return self.location('https://linuxfr.org')
def pageaccess(func):
def inner(self, *args, **kwargs):
if not self.__page or not self.__page.isLogged() and self.password:
self.home()
return func(self, *args, **kwargs)
return inner
@pageaccess
def keepalive(self):
self.home()
def login(self):
self.location('/login.html', 'login=%s&passwd=%s&isauto=1' % (self.username, self.password))
def openurl(self, *args, **kwargs):
try:
return Browser.open(self, *args, **kwargs)
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
error(e)
raise DLFPUnavailable()
except BrowserStateError:
self.home()
return Browser.open(self, *args, **kwargs)
def submit(self, *args, **kwargs):
try:
self.__changeLocation(Browser.submit(self, *args, **kwargs))
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
error(e)
self.__page = None
raise DLFPUnavailable()
except (BrowserStateError,DLFPRetry):
self.home()
raise DLFPUnavailable()
def isOnPage(self, pageCls):
return isinstance(self.__page, pageCls)
def follow_link(self, *args, **kwargs):
try:
self.__changeLocation(Browser.follow_link(self, *args, **kwargs))
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
error(e)
self.__page = None
raise DLFPUnavailable()
except (BrowserStateError,DLFPRetry):
self.home()
raise DLFPUnavailable()
def location(self, *args, **kwargs):
keep_args = copy(args)
keep_kwargs = kwargs.copy()
try:
self.__changeLocation(Browser.open(self, *args, **kwargs))
except DLFPRetry:
if not self.__page or not args or self.__page.url != args[0]:
self.location(keep_args, keep_kwargs)
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
error(e)
self.__page = None
raise DLFPUnavailable()
except BrowserStateError:
self.home()
self.location(*keep_args, **keep_kwargs)
def __changeLocation(self, result):
# Find page from url
pageCls = None
for key, value in self.pages.items():
regexp = re.compile('^%s$' % key)
m = regexp.match(result.geturl())
if m:
pageCls = value
break
# Not found
if not pageCls:
self.__page = None
r = result.read()
if isinstance(r, unicode):
r = r.encode('iso-8859-15', 'replace')
print r
warning('Ho my fucking god, there isn\'t any page named %s' % result.geturl())
return
print '[%s] Gone on %s' % (self.username, result.geturl())
self.__last_update = time.time()
document = self.__parser.parse(result, encoding='iso-8859-1')
self.__page = pageCls(self, document, result.geturl())
self.__page.loaded()
# Special pages
if isinstance(self.__page, LoginPage):
if self.__page.hasError():
raise DLFPIncorrectPassword()
raise DLFPRetry()
if not self.__page.isLogged() and self.password:
print '!! Relogin !!'
self.login()
return
if self.__cookie:
self.__cookie.save()