renamed dlfp2mail to weboob
This commit is contained in:
parent
7906433c20
commit
4065f7efcd
13 changed files with 127 additions and 113 deletions
|
|
@ -1,31 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
Copyright(C) 2010 Romain Bignon
|
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, version 3 of the License.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
class DLFPUnavailable(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class DLFPBanned(DLFPUnavailable):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class DLFPIncorrectPassword(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class DLFPRetry(Exception):
|
|
||||||
pass
|
|
||||||
|
|
@ -18,21 +18,23 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from mechanize import FormNotFoundError
|
from weboob.tools.browser import Browser
|
||||||
|
from weboob.backends.dlfp.pages.index import IndexPage, LoginPage
|
||||||
|
|
||||||
class BasePage:
|
class DLFP(Browser):
|
||||||
def __init__(self, dlfp, document, url=''):
|
|
||||||
self.dlfp = dlfp
|
|
||||||
self.document = document
|
|
||||||
self.url = url
|
|
||||||
|
|
||||||
def loaded(self):
|
DOMAIN = 'linuxfr.org'
|
||||||
pass
|
PAGES = {'https://linuxfr.org/': IndexPage,
|
||||||
|
'https://linuxfr.org/pub/': IndexPage,
|
||||||
|
'https://linuxfr.org/my/': IndexPage,
|
||||||
|
'https://linuxfr.org/login.html': LoginPage,
|
||||||
|
}
|
||||||
|
|
||||||
|
def home(self):
|
||||||
|
return self.location('https://linuxfr.org')
|
||||||
|
|
||||||
|
def login(self):
|
||||||
|
self.location('/login.html', 'login=%s&passwd=%s&isauto=1' % (self.username, self.password))
|
||||||
|
|
||||||
def isLogged(self):
|
def isLogged(self):
|
||||||
forms = self.document.getElementsByTagName('form')
|
return (self.page and self.page.isLogged())
|
||||||
for form in forms:
|
|
||||||
if form.getAttribute('id') == 'formulaire':
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
@ -24,14 +24,15 @@ import re
|
||||||
class Article:
|
class Article:
|
||||||
RSS = None
|
RSS = None
|
||||||
|
|
||||||
def __init__(self, _id, title, author, datetime):
|
def __init__(self, _id, url, title, author, datetime):
|
||||||
self._id = _id
|
self._id = _id
|
||||||
|
self.url = url
|
||||||
self.title = title
|
self.title = title
|
||||||
self.author = author
|
self.author = author
|
||||||
self.datetime = datetime
|
self.datetime = datetime
|
||||||
|
|
||||||
class Newspaper(Article):
|
class Newspaper(Article):
|
||||||
RSS = 'http://linuxfr.org/backend/news/rss20.rss'
|
RSS = 'https://linuxfr.org/backend/news/rss20.rss'
|
||||||
|
|
||||||
class Telegram(Article):
|
class Telegram(Article):
|
||||||
RSS = 'https://linuxfr.org/backend/journaux/rss20.rss'
|
RSS = 'https://linuxfr.org/backend/journaux/rss20.rss'
|
||||||
|
|
@ -58,5 +59,5 @@ class ArticlesList:
|
||||||
warning('Unable to parse ID from link \'%s\'' % item['link'])
|
warning('Unable to parse ID from link \'%s\'' % item['link'])
|
||||||
continue
|
continue
|
||||||
_id = m.group(1)
|
_id = m.group(1)
|
||||||
article = klass(_id, item['title'], item['author'], item['date_parsed'])
|
article = klass(_id, item['link'], item['title'], item['author'], item['date_parsed'])
|
||||||
print _id, item['author'], item['title']
|
print _id, item['author'], item['title']
|
||||||
0
weboob/backends/dlfp/pages/__init__.py
Normal file
0
weboob/backends/dlfp/pages/__init__.py
Normal file
|
|
@ -18,12 +18,26 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from dlfp.pages.base import BasePage
|
from weboob.tools.browser import BrowserIncorrectPassword, BasePage
|
||||||
|
|
||||||
class IndexPage(BasePage):
|
class DLFPPage(BasePage):
|
||||||
|
def isLogged(self):
|
||||||
|
forms = self.document.getElementsByTagName('form')
|
||||||
|
for form in forms:
|
||||||
|
if form.getAttribute('id') == 'formulaire':
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
class IndexPage(DLFPPage):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class LoginPage(BasePage):
|
class LoginPage(DLFPPage):
|
||||||
|
|
||||||
|
def loaded(self):
|
||||||
|
if self.hasError():
|
||||||
|
raise BrowserIncorrectPassword()
|
||||||
|
|
||||||
def hasError(self):
|
def hasError(self):
|
||||||
plist = self.document.getElementsByTagName('p')
|
plist = self.document.getElementsByTagName('p')
|
||||||
for p in plist:
|
for p in plist:
|
||||||
0
weboob/frontends/__init__.py
Normal file
0
weboob/frontends/__init__.py
Normal file
0
weboob/tools/__init__.py
Normal file
0
weboob/tools/__init__.py
Normal file
|
|
@ -18,7 +18,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from mechanize import Browser, response_seek_wrapper, BrowserStateError
|
import mechanize.Browser
|
||||||
|
from mechanize import response_seek_wrapper, BrowserStateError
|
||||||
import urllib2
|
import urllib2
|
||||||
import html5lib
|
import html5lib
|
||||||
from html5lib import treebuilders
|
from html5lib import treebuilders
|
||||||
|
|
@ -27,9 +28,16 @@ import time
|
||||||
from logging import warning, error
|
from logging import warning, error
|
||||||
from copy import copy
|
from copy import copy
|
||||||
|
|
||||||
from dlfp.pages.login import IndexPage, LoginPage
|
from weboob.tools.firefox_cookies import FirefoxCookieJar
|
||||||
from dlfp.exceptions import DLFPIncorrectPassword, DLFPUnavailable, DLFPRetry
|
|
||||||
from dlfp.firefox_cookies import FirefoxCookieJar
|
class BrowserIncorrectPassword(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class BrowserUnavailable(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class BrowserRetry(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
class NoHistory:
|
class NoHistory:
|
||||||
def __init__(self): pass
|
def __init__(self): pass
|
||||||
|
|
@ -38,48 +46,67 @@ class NoHistory:
|
||||||
def clear(self): pass
|
def clear(self): pass
|
||||||
def close(self): pass
|
def close(self): pass
|
||||||
|
|
||||||
class DLFP(Browser):
|
class BasePage:
|
||||||
|
def __init__(self, browser, document, url=''):
|
||||||
|
self.browser = browser
|
||||||
|
self.document = document
|
||||||
|
self.url = url
|
||||||
|
|
||||||
pages = {'https://linuxfr.org/': IndexPage,
|
def loaded(self):
|
||||||
'https://linuxfr.org/pub/': IndexPage,
|
pass
|
||||||
'https://linuxfr.org/my/': IndexPage,
|
|
||||||
'https://linuxfr.org/login.html': LoginPage,
|
class Browser(mechanize.Browser):
|
||||||
}
|
|
||||||
|
# ------ Class attributes --------------------------------------
|
||||||
|
|
||||||
|
DOMAIN = None
|
||||||
|
PAGES = {}
|
||||||
|
USER_AGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.4) Gecko/2008111318 Ubuntu/8.10 (intrepid) Firefox/3.0.3'
|
||||||
|
|
||||||
|
# ------ Abstract methods --------------------------------------
|
||||||
|
|
||||||
|
# Go to home
|
||||||
|
def home(self):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
# Login to the website
|
||||||
|
def login(self):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
# Return True if we are logged on website
|
||||||
|
def isLogged(self):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
# ------ Browser methods ---------------------------------------
|
||||||
|
|
||||||
def __init__(self, username, password=None, firefox_cookies=None):
|
def __init__(self, username, password=None, firefox_cookies=None):
|
||||||
Browser.__init__(self, history=NoHistory())
|
mechanize.Browser.__init__(self, history=NoHistory())
|
||||||
self.addheaders = [
|
self.addheaders = [
|
||||||
['User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.4) Gecko/2008111318 Ubuntu/8.10 (intrepid) Firefox/3.0.3']
|
['User-agent', self.USER_AGENT]
|
||||||
]
|
]
|
||||||
|
|
||||||
# Share cookies with firefox
|
# Share cookies with firefox
|
||||||
if firefox_cookies:
|
if firefox_cookies:
|
||||||
self.__cookie = FirefoxCookieJar(firefox_cookies)
|
self.__cookie = FirefoxCookieJar(self.DOMAIN, firefox_cookies)
|
||||||
self.__cookie.load()
|
self.__cookie.load()
|
||||||
self.set_cookiejar(self.__cookie)
|
self.set_cookiejar(self.__cookie)
|
||||||
else:
|
else:
|
||||||
self.__cookie = None
|
self.__cookie = None
|
||||||
|
|
||||||
self.__parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
|
self.__parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
|
||||||
self.__page = None
|
self.page = None
|
||||||
self.__last_update = 0.0
|
self.last_update = 0.0
|
||||||
self.username = username
|
self.username = username
|
||||||
self.password = password
|
self.password = password
|
||||||
if self.password:
|
if self.password:
|
||||||
try:
|
try:
|
||||||
self.home()
|
self.home()
|
||||||
except DLFPUnavailable:
|
except BrowserUnavailable:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def page(self):
|
|
||||||
return self.__page
|
|
||||||
|
|
||||||
def home(self):
|
|
||||||
return self.location('https://linuxfr.org')
|
|
||||||
|
|
||||||
def pageaccess(func):
|
def pageaccess(func):
|
||||||
def inner(self, *args, **kwargs):
|
def inner(self, *args, **kwargs):
|
||||||
if not self.__page or not self.__page.isLogged() and self.password:
|
if not self.page or not self.page.isLogged() and self.password:
|
||||||
self.home()
|
self.home()
|
||||||
|
|
||||||
return func(self, *args, **kwargs)
|
return func(self, *args, **kwargs)
|
||||||
|
|
@ -89,57 +116,65 @@ class DLFP(Browser):
|
||||||
def keepalive(self):
|
def keepalive(self):
|
||||||
self.home()
|
self.home()
|
||||||
|
|
||||||
def login(self):
|
def change_location(func):
|
||||||
self.location('/login.html', 'login=%s&passwd=%s&isauto=1' % (self.username, self.password))
|
def inner(self, *args, **kwargs):
|
||||||
|
if args and args[0][0] == '/' and (not self.request or self.request.host != self.DOMAIN):
|
||||||
|
args = ('%s://%s%s' % (self.PROTOCOL, self.DOMAIN, args[0]),) + args[1:]
|
||||||
|
print args
|
||||||
|
|
||||||
|
return func(self, *args, **kwargs)
|
||||||
|
return inner
|
||||||
|
|
||||||
|
@change_location
|
||||||
def openurl(self, *args, **kwargs):
|
def openurl(self, *args, **kwargs):
|
||||||
try:
|
try:
|
||||||
return Browser.open(self, *args, **kwargs)
|
return mechanize.Browser.open(self, *args, **kwargs)
|
||||||
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
||||||
error(e)
|
error(e)
|
||||||
raise DLFPUnavailable()
|
raise BrowserUnavailable()
|
||||||
except BrowserStateError:
|
except BrowserStateError:
|
||||||
self.home()
|
self.home()
|
||||||
return Browser.open(self, *args, **kwargs)
|
return mechanize.Browser.open(self, *args, **kwargs)
|
||||||
|
|
||||||
def submit(self, *args, **kwargs):
|
def submit(self, *args, **kwargs):
|
||||||
try:
|
try:
|
||||||
self.__changeLocation(Browser.submit(self, *args, **kwargs))
|
self.__changeLocation(mechanize.Browser.submit(self, *args, **kwargs))
|
||||||
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
||||||
error(e)
|
error(e)
|
||||||
self.__page = None
|
self.page = None
|
||||||
raise DLFPUnavailable()
|
raise BrowserUnavailable()
|
||||||
except (BrowserStateError,DLFPRetry):
|
except (BrowserStateError,BrowserRetry):
|
||||||
self.home()
|
self.home()
|
||||||
raise DLFPUnavailable()
|
raise BrowserUnavailable()
|
||||||
|
|
||||||
def isOnPage(self, pageCls):
|
def isOnPage(self, pageCls):
|
||||||
return isinstance(self.__page, pageCls)
|
return isinstance(self.page, pageCls)
|
||||||
|
|
||||||
def follow_link(self, *args, **kwargs):
|
def follow_link(self, *args, **kwargs):
|
||||||
try:
|
try:
|
||||||
self.__changeLocation(Browser.follow_link(self, *args, **kwargs))
|
self.__changeLocation(mechanize.Browser.follow_link(self, *args, **kwargs))
|
||||||
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
||||||
error(e)
|
error(e)
|
||||||
self.__page = None
|
self.page = None
|
||||||
raise DLFPUnavailable()
|
raise BrowserUnavailable()
|
||||||
except (BrowserStateError,DLFPRetry):
|
except (BrowserStateError,BrowserRetry):
|
||||||
self.home()
|
self.home()
|
||||||
raise DLFPUnavailable()
|
raise BrowserUnavailable()
|
||||||
|
|
||||||
|
@change_location
|
||||||
def location(self, *args, **kwargs):
|
def location(self, *args, **kwargs):
|
||||||
keep_args = copy(args)
|
keep_args = copy(args)
|
||||||
keep_kwargs = kwargs.copy()
|
keep_kwargs = kwargs.copy()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.__changeLocation(Browser.open(self, *args, **kwargs))
|
self.__changeLocation(mechanize.Browser.open(self, *args, **kwargs))
|
||||||
except DLFPRetry:
|
except BrowserRetry:
|
||||||
if not self.__page or not args or self.__page.url != args[0]:
|
if not self.page or not args or self.page.url != args[0]:
|
||||||
self.location(keep_args, keep_kwargs)
|
self.location(keep_args, keep_kwargs)
|
||||||
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
except (response_seek_wrapper, urllib2.HTTPError, urllib2.URLError), e:
|
||||||
error(e)
|
error(e)
|
||||||
self.__page = None
|
self.page = None
|
||||||
raise DLFPUnavailable()
|
raise BrowserUnavailable()
|
||||||
except BrowserStateError:
|
except BrowserStateError:
|
||||||
self.home()
|
self.home()
|
||||||
self.location(*keep_args, **keep_kwargs)
|
self.location(*keep_args, **keep_kwargs)
|
||||||
|
|
@ -147,7 +182,7 @@ class DLFP(Browser):
|
||||||
def __changeLocation(self, result):
|
def __changeLocation(self, result):
|
||||||
# Find page from url
|
# Find page from url
|
||||||
pageCls = None
|
pageCls = None
|
||||||
for key, value in self.pages.items():
|
for key, value in self.PAGES.items():
|
||||||
regexp = re.compile('^%s$' % key)
|
regexp = re.compile('^%s$' % key)
|
||||||
m = regexp.match(result.geturl())
|
m = regexp.match(result.geturl())
|
||||||
if m:
|
if m:
|
||||||
|
|
@ -156,7 +191,7 @@ class DLFP(Browser):
|
||||||
|
|
||||||
# Not found
|
# Not found
|
||||||
if not pageCls:
|
if not pageCls:
|
||||||
self.__page = None
|
self.page = None
|
||||||
r = result.read()
|
r = result.read()
|
||||||
if isinstance(r, unicode):
|
if isinstance(r, unicode):
|
||||||
r = r.encode('iso-8859-15', 'replace')
|
r = r.encode('iso-8859-15', 'replace')
|
||||||
|
|
@ -165,18 +200,13 @@ class DLFP(Browser):
|
||||||
return
|
return
|
||||||
|
|
||||||
print '[%s] Gone on %s' % (self.username, result.geturl())
|
print '[%s] Gone on %s' % (self.username, result.geturl())
|
||||||
self.__last_update = time.time()
|
self.last_update = time.time()
|
||||||
|
|
||||||
document = self.__parser.parse(result, encoding='iso-8859-1')
|
document = self.__parser.parse(result, encoding='iso-8859-1')
|
||||||
self.__page = pageCls(self, document, result.geturl())
|
self.page = pageCls(self, document, result.geturl())
|
||||||
self.__page.loaded()
|
self.page.loaded()
|
||||||
|
|
||||||
# Special pages
|
if not self.isLogged() and self.password:
|
||||||
if isinstance(self.__page, LoginPage):
|
|
||||||
if self.__page.hasError():
|
|
||||||
raise DLFPIncorrectPassword()
|
|
||||||
raise DLFPRetry()
|
|
||||||
if not self.__page.isLogged() and self.password:
|
|
||||||
print '!! Relogin !!'
|
print '!! Relogin !!'
|
||||||
self.login()
|
self.login()
|
||||||
return
|
return
|
||||||
|
|
@ -31,11 +31,10 @@ from mechanize import CookieJar, Cookie
|
||||||
#logger.setLevel(logging.DEBUG)
|
#logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
class FirefoxCookieJar(CookieJar):
|
class FirefoxCookieJar(CookieJar):
|
||||||
|
def __init__(self, domain, sqlite_file=None, policy=None):
|
||||||
def __init__(self, sqlite_file=None, policy=None):
|
|
||||||
|
|
||||||
CookieJar.__init__(self, policy)
|
CookieJar.__init__(self, policy)
|
||||||
|
|
||||||
|
self.domain = domain
|
||||||
self.sqlite_file = sqlite_file
|
self.sqlite_file = sqlite_file
|
||||||
|
|
||||||
def __connect(self):
|
def __connect(self):
|
||||||
|
|
@ -47,7 +46,6 @@ class FirefoxCookieJar(CookieJar):
|
||||||
|
|
||||||
return db
|
return db
|
||||||
|
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
|
|
||||||
db = self.__connect()
|
db = self.__connect()
|
||||||
|
|
@ -55,7 +53,7 @@ class FirefoxCookieJar(CookieJar):
|
||||||
|
|
||||||
cookies = db.execute("""SELECT host, path, name, value, expiry, lastAccessed, isSecure
|
cookies = db.execute("""SELECT host, path, name, value, expiry, lastAccessed, isSecure
|
||||||
FROM moz_cookies
|
FROM moz_cookies
|
||||||
WHERE host LIKE '%linuxfr%'""")
|
WHERE host LIKE '%%%s%%'""" % self.domain)
|
||||||
|
|
||||||
for entry in cookies:
|
for entry in cookies:
|
||||||
|
|
||||||
|
|
@ -91,7 +89,7 @@ class FirefoxCookieJar(CookieJar):
|
||||||
db = self.__connect()
|
db = self.__connect()
|
||||||
if not db: return
|
if not db: return
|
||||||
|
|
||||||
db.execute("DELETE FROM moz_cookies WHERE host LIKE '%linuxfr%'")
|
db.execute("DELETE FROM moz_cookies WHERE host LIKE '%%%s%%'" % self.domain)
|
||||||
for cookie in self:
|
for cookie in self:
|
||||||
if cookie.secure: secure = 1
|
if cookie.secure: secure = 1
|
||||||
else: secure = 0
|
else: secure = 0
|
||||||
Loading…
Add table
Add a link
Reference in a new issue