renamed dlfp2mail to weboob
This commit is contained in:
parent
7906433c20
commit
4065f7efcd
13 changed files with 127 additions and 113 deletions
0
weboob/tools/__init__.py
Normal file
0
weboob/tools/__init__.py
Normal file
216
weboob/tools/browser.py
Normal file
216
weboob/tools/browser.py
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Copyright(C) 2010 Romain Bignon
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, version 3 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
"""
|
||||
|
||||
import mechanize.Browser
|
||||
from mechanize import response_seek_wrapper, BrowserStateError
|
||||
import urllib2
|
||||
import html5lib
|
||||
from html5lib import treebuilders
|
||||
import re
|
||||
import time
|
||||
from logging import warning, error
|
||||
from copy import copy
|
||||
|
||||
from weboob.tools.firefox_cookies import FirefoxCookieJar
|
||||
|
||||
class BrowserIncorrectPassword(Exception):
|
||||
pass
|
||||
|
||||
class BrowserUnavailable(Exception):
|
||||
pass
|
||||
|
||||
class BrowserRetry(Exception):
|
||||
pass
|
||||
|
||||
class NoHistory:
|
||||
def __init__(self): pass
|
||||
def add(self, request, response): pass
|
||||
def back(self, n, _response): pass
|
||||
def clear(self): pass
|
||||
def close(self): pass
|
||||
|
||||
class BasePage:
|
||||
def __init__(self, browser, document, url=''):
|
||||
self.browser = browser
|
||||
self.document = document
|
||||
self.url = url
|
||||
|
||||
def loaded(self):
|
||||
pass
|
||||
|
||||
class Browser(mechanize.Browser):
|
||||
|
||||
# ------ Class attributes --------------------------------------
|
||||
|
||||
DOMAIN = None
|
||||
PAGES = {}
|
||||
USER_AGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.4) Gecko/2008111318 Ubuntu/8.10 (intrepid) Firefox/3.0.3'
|
||||
|
||||
# ------ Abstract methods --------------------------------------
|
||||
|
||||
# Go to home
|
||||
def home(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
# Login to the website
|
||||
def login(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
# Return True if we are logged on website
|
||||
def isLogged(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
# ------ Browser methods ---------------------------------------
|
||||
|
||||
def __init__(self, username, password=None, firefox_cookies=None):
|
||||
mechanize.Browser.__init__(self, history=NoHistory())
|
||||
self.addheaders = [
|
||||
['User-agent', self.USER_AGENT]
|
||||
]
|
||||
|
||||
# Share cookies with firefox
|
||||
if firefox_cookies:
|
||||
self.__cookie = FirefoxCookieJar(self.DOMAIN, firefox_cookies)
|
||||
self.__cookie.load()
|
||||
self.set_cookiejar(self.__cookie)
|
||||
else:
|
||||
self.__cookie = None
|
||||
|
||||
self.__parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
|
||||
self.page = None
|
||||
self.last_update = 0.0
|
||||
self.username = username
|
||||
self.password = password
|
||||
if self.password:
|
||||
try:
|
||||
self.home()
|
||||
except BrowserUnavailable:
|
||||
pass
|
||||
|
||||
def pageaccess(func):
|
||||
def inner(self, *args, **kwargs):
|
||||
if not self.page or not self.page.isLogged() and self.password:
|
||||
self.home()
|
||||
|
||||
return func(self, *args, **kwargs)
|
||||
return inner
|
||||
|
||||
@pageaccess
|
||||
def keepalive(self):
|
||||
self.home()
|
||||
|
||||
def change_location(func):
|
||||
def inner(self, *args, **kwargs):
|
||||
if args and args[0][0] == '/' and (not self.request or self.request.host != self.DOMAIN):
|
||||
args = ('%s://%s%s' % (self.PROTOCOL, self.DOMAIN, args[0]),) + args[1:]
|
||||
print args
|
||||
|
||||
return func(self, *args, **kwargs)
|
||||
return inner
|
||||
|
||||
@change_location
|
||||
def openurl(self, *args, **kwargs):
|
||||
try:
|
||||
return mechanize.Browser.open(self, *args, **kwargs)
|
||||
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
||||
error(e)
|
||||
raise BrowserUnavailable()
|
||||
except BrowserStateError:
|
||||
self.home()
|
||||
return mechanize.Browser.open(self, *args, **kwargs)
|
||||
|
||||
def submit(self, *args, **kwargs):
|
||||
try:
|
||||
self.__changeLocation(mechanize.Browser.submit(self, *args, **kwargs))
|
||||
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
||||
error(e)
|
||||
self.page = None
|
||||
raise BrowserUnavailable()
|
||||
except (BrowserStateError,BrowserRetry):
|
||||
self.home()
|
||||
raise BrowserUnavailable()
|
||||
|
||||
def isOnPage(self, pageCls):
|
||||
return isinstance(self.page, pageCls)
|
||||
|
||||
def follow_link(self, *args, **kwargs):
|
||||
try:
|
||||
self.__changeLocation(mechanize.Browser.follow_link(self, *args, **kwargs))
|
||||
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
||||
error(e)
|
||||
self.page = None
|
||||
raise BrowserUnavailable()
|
||||
except (BrowserStateError,BrowserRetry):
|
||||
self.home()
|
||||
raise BrowserUnavailable()
|
||||
|
||||
@change_location
|
||||
def location(self, *args, **kwargs):
|
||||
keep_args = copy(args)
|
||||
keep_kwargs = kwargs.copy()
|
||||
|
||||
try:
|
||||
self.__changeLocation(mechanize.Browser.open(self, *args, **kwargs))
|
||||
except BrowserRetry:
|
||||
if not self.page or not args or self.page.url != args[0]:
|
||||
self.location(keep_args, keep_kwargs)
|
||||
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
||||
error(e)
|
||||
self.page = None
|
||||
raise BrowserUnavailable()
|
||||
except BrowserStateError:
|
||||
self.home()
|
||||
self.location(*keep_args, **keep_kwargs)
|
||||
|
||||
def __changeLocation(self, result):
|
||||
# Find page from url
|
||||
pageCls = None
|
||||
for key, value in self.PAGES.items():
|
||||
regexp = re.compile('^%s$' % key)
|
||||
m = regexp.match(result.geturl())
|
||||
if m:
|
||||
pageCls = value
|
||||
break
|
||||
|
||||
# Not found
|
||||
if not pageCls:
|
||||
self.page = None
|
||||
r = result.read()
|
||||
if isinstance(r, unicode):
|
||||
r = r.encode('iso-8859-15', 'replace')
|
||||
print r
|
||||
warning('Ho my fucking god, there isn\'t any page named %s' % result.geturl())
|
||||
return
|
||||
|
||||
print '[%s] Gone on %s' % (self.username, result.geturl())
|
||||
self.last_update = time.time()
|
||||
|
||||
document = self.__parser.parse(result, encoding='iso-8859-1')
|
||||
self.page = pageCls(self, document, result.geturl())
|
||||
self.page.loaded()
|
||||
|
||||
if not self.isLogged() and self.password:
|
||||
print '!! Relogin !!'
|
||||
self.login()
|
||||
return
|
||||
|
||||
if self.__cookie:
|
||||
self.__cookie.save()
|
||||
|
||||
118
weboob/tools/firefox_cookies.py
Normal file
118
weboob/tools/firefox_cookies.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Copyright(C) 2010 Romain Bignon
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, version 3 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
import sqlite3 as sqlite
|
||||
except ImportError, e:
|
||||
from pysqlite2 import dbapi2 as sqlite
|
||||
|
||||
from mechanize import CookieJar, Cookie
|
||||
|
||||
#import sys, logging
|
||||
#logger = logging.getLogger("mechanize")
|
||||
#logger.addHandler(logging.StreamHandler(sys.stdout))
|
||||
#logger.setLevel(logging.DEBUG)
|
||||
|
||||
class FirefoxCookieJar(CookieJar):
|
||||
def __init__(self, domain, sqlite_file=None, policy=None):
|
||||
CookieJar.__init__(self, policy)
|
||||
|
||||
self.domain = domain
|
||||
self.sqlite_file = sqlite_file
|
||||
|
||||
def __connect(self):
|
||||
try:
|
||||
db = sqlite.connect(database=self.sqlite_file, timeout=10.0)
|
||||
except sqlite.OperationalError, err:
|
||||
print 'Unable to open %s database: %s' % (self.sqlite_file, err)
|
||||
return None
|
||||
|
||||
return db
|
||||
|
||||
def load(self):
|
||||
|
||||
db = self.__connect()
|
||||
if not db: return
|
||||
|
||||
cookies = db.execute("""SELECT host, path, name, value, expiry, lastAccessed, isSecure
|
||||
FROM moz_cookies
|
||||
WHERE host LIKE '%%%s%%'""" % self.domain)
|
||||
|
||||
for entry in cookies:
|
||||
|
||||
domain = entry[0]
|
||||
initial_dot = domain.startswith(".")
|
||||
domain_specified = initial_dot
|
||||
path = entry[1]
|
||||
name = entry[2]
|
||||
value = entry[3]
|
||||
expires = entry[4]
|
||||
secure = entry[6]
|
||||
|
||||
discard = False
|
||||
|
||||
c = Cookie(0, name, value,
|
||||
None, False,
|
||||
domain, domain_specified, initial_dot,
|
||||
path, False,
|
||||
secure,
|
||||
expires,
|
||||
discard,
|
||||
None,
|
||||
None,
|
||||
{})
|
||||
#if not ignore_discard and c.discard:
|
||||
# continue
|
||||
#if not ignore_expires and c.is_expired(now):
|
||||
# continue
|
||||
self.set_cookie(c)
|
||||
|
||||
def save(self):
|
||||
|
||||
db = self.__connect()
|
||||
if not db: return
|
||||
|
||||
db.execute("DELETE FROM moz_cookies WHERE host LIKE '%%%s%%'" % self.domain)
|
||||
for cookie in self:
|
||||
if cookie.secure: secure = 1
|
||||
else: secure = 0
|
||||
if cookie.expires is not None:
|
||||
expires = cookie.expires
|
||||
else:
|
||||
expires = 0
|
||||
|
||||
if cookie.value is None:
|
||||
# cookies.txt regards 'Set-Cookie: foo' as a cookie
|
||||
# with no name, whereas cookielib regards it as a
|
||||
# cookie with no value.
|
||||
name = ""
|
||||
value = cookie.name
|
||||
else:
|
||||
name = cookie.name
|
||||
value = cookie.value
|
||||
|
||||
# XXX ugly hack to keep this cookie
|
||||
if name == 'PHPSESSID':
|
||||
expires = 1854242393
|
||||
|
||||
db.execute("""INSERT INTO moz_cookies (host, path, name, value, expiry, isSecure)
|
||||
VALUES (?, ?, ?, ?, ?, ?)""",
|
||||
(cookie.domain, cookie.path, name, value, int(expires), int(secure)))
|
||||
db.commit()
|
||||
Loading…
Add table
Add a link
Reference in a new issue