Improved Wells Fargo module stability.

Signed-off-by: Oleg Plakhotniuk <olegus8@gmail.com>
This commit is contained in:
Oleg Plakhotniuk 2014-10-10 15:30:13 -05:00 committed by Laurent Bachelier
commit cbd084283d
3 changed files with 109 additions and 119 deletions

View file

@ -24,8 +24,9 @@ from weboob.capabilities.bank import AccountNotFound
from weboob.browser import LoginBrowser, URL, need_login
from weboob.exceptions import BrowserIncorrectPassword
from .pages import LoginPage, LoginRedirectPage, LoggedInPage, SummaryPage, \
DynamicPage
from .pages import LoginPage, LoginProceedPage, LoginRedirectPage, \
SummaryPage, ActivityCashPage, ActivityCardPage, \
StatementsPage, StatementPage, LoggedInPage
__all__ = ['WellsFargo']
@ -34,45 +35,58 @@ __all__ = ['WellsFargo']
class WellsFargo(LoginBrowser):
BASEURL = 'https://online.wellsfargo.com'
login = URL('/$', LoginPage)
loginRedirect = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON$',
LoginRedirectPage)
loggedIn = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON_PORTAL_PAUSE$',
'/das/cgi-bin/session.cgi\?screenid=SIGNON&LOB=CONS$',
'/login\?ERROR_CODE=.*LOB=CONS&$',
LoggedInPage)
login_proceed = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON.*$',
'/login\?ERROR_CODE=.*LOB=CONS&$',
LoginProceedPage)
login_redirect = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON.*$',
'/login\?ERROR_CODE=.*LOB=CONS&$',
LoginRedirectPage)
summary = URL('/das/channel/accountSummary$', SummaryPage)
dynamic = URL('/das/cgi-bin/session.cgi\?sessargs=.+$',
'/das/channel/accountActivityDDA\?action=doSetPage&page=.*$',
DynamicPage)
_pause = 1
activity_cash = URL('/das/cgi-bin/session.cgi\?sessargs=.+$',
ActivityCashPage)
activity_card = URL('/das/cgi-bin/session.cgi\?sessargs=.+$',
ActivityCardPage)
statements = URL(
'/das/cgi-bin/session.cgi\?sessargs=.+$',
'/das/channel/accountActivityDDA\?action=doSetPage&page=.*$',
StatementsPage)
statement = URL('/das/cgi-bin/session.cgi\?sessargs=.+$',
StatementPage)
unknown = URL('/.*$', LoggedInPage) # E.g. random advertisement pages.
def do_login(self):
self.session.cookies.clear()
self.login.go()
self.page.login(self.username, self.password)
if not self.loginRedirect.is_here():
if not self.page.logged:
raise BrowserIncorrectPassword()
# Sometimes Wells Fargo server returns "Session time out" error
# right after login if we don't make a pause here.
sleep(self._pause)
self._pause = min(30, self._pause*2)
self.page.redirect()
self._pause = 1
def location(self, *args, **kwargs):
"""
Wells Fargo inserts redirecting pages from time to time,
so we should follow them whenever we see them.
"""
r = super(WellsFargo, self).location(*args, **kwargs)
if self.login_proceed.is_here():
return self.page.proceed()
elif self.login_redirect.is_here():
return self.page.redirect()
else:
return r
def get_account(self, id_):
self.to_activity()
if id_ not in self.page.subpage.accounts_ids():
if id_ not in self.page.accounts_ids():
raise AccountNotFound()
else:
self.to_activity(id_)
return self.page.subpage.get_account()
return self.page.get_account()
def get_accounts(self):
def iter_accounts(self):
self.to_activity()
for id_ in self.page.subpage.accounts_ids():
for id_ in self.page.accounts_ids():
self.to_activity(id_)
yield self.page.subpage.get_account()
yield self.page.get_account()
@need_login
def to_summary(self):
@ -80,10 +94,7 @@ class WellsFargo(LoginBrowser):
assert self.summary.is_here()
def is_activity(self):
try:
return self.page.subpage.is_activity()
except AttributeError:
return False
return self.activity_cash.is_here() or self.activity_card.is_here()
@need_login
def to_activity(self, id_=None):
@ -91,42 +102,30 @@ class WellsFargo(LoginBrowser):
self.to_summary()
self.page.to_activity()
assert self.is_activity()
if id_ and self.page.subpage.account_id() != id_:
self.page.subpage.to_account(id_)
if id_ and self.page.account_id() != id_:
self.page.to_account(id_)
assert self.is_activity()
assert self.page.subpage.account_id() == id_
def is_statements(self):
try:
return self.page.subpage.is_statements()
except AttributeError:
return False
assert self.page.account_id() == id_
@need_login
def to_statements(self, id_=None, year=None):
if not self.is_statements():
if not self.statements.is_here():
self.to_summary()
self.page.to_statements()
assert self.is_statements()
if id_ and self.page.subpage.account_id() != id_:
self.page.subpage.to_account(id_)
assert self.is_statements()
assert self.page.subpage.account_id() == id_
if year and self.page.subpage.year() != year:
self.page.subpage.to_year(year)
assert self.is_statements()
assert self.page.subpage.year() == year
def is_statement(self):
try:
return self.page.subpage.is_statement()
except AttributeError:
return False
assert self.statements.is_here()
if id_ and self.page.account_id() != id_:
self.page.to_account(id_)
assert self.statements.is_here()
assert self.page.account_id() == id_
if year and self.page.year() != year:
self.page.to_year(year)
assert self.statements.is_here()
assert self.page.year() == year
@need_login
def to_statement(self, uri):
self.location(uri)
assert self.is_statement()
assert self.statement.is_here()
def iter_history(self, account):
self.to_activity(account.id)
@ -138,19 +137,18 @@ class WellsFargo(LoginBrowser):
# transactions grouped by statement period will not be available
# for up to seven days."
# (www.wellsfargo.com, 2014-07-20)
if self.page.subpage.since_last_statement():
assert self.page.subpage.account_id() == account.id
if self.page.since_last_statement():
assert self.page.account_id() == account.id
while True:
for trans in self.page.subpage.iter_transactions():
for trans in self.page.iter_transactions():
yield trans
if not self.page.subpage.next_():
if not self.page.next_():
break
self.to_statements(account.id)
for year in self.page.subpage.years():
for year in self.page.years():
self.to_statements(account.id, year)
for stmt in self.page.subpage.statements():
for stmt in self.page.statements():
self.to_statement(stmt)
for trans in self.page.subpage.iter_transactions():
for trans in self.page.iter_transactions():
yield trans

View file

@ -44,7 +44,7 @@ class WellsFargoModule(Module, CapBank):
self.config['password'].get())
def iter_accounts(self):
return self.browser.get_accounts()
return self.browser.iter_accounts()
def get_account(self, id_):
return self.browser.get_account(id_)

View file

@ -20,10 +20,12 @@
from weboob.capabilities.bank import Account, Transaction
from weboob.browser.pages import Page, HTMLPage, LoggedPage, RawPage
from urllib import unquote
from requests.cookies import morsel_to_cookie
from .parsers import StatementParser, clean_amount, clean_label
import itertools
import re
import datetime
import Cookie
class LoginPage(HTMLPage):
@ -34,12 +36,28 @@ class LoginPage(HTMLPage):
form.submit()
class LoginProceedPage(LoggedPage, HTMLPage):
is_here = '//script[contains(text(),"setAndCheckCookie")]'
def proceed(self):
script = self.doc.xpath('//script/text()')[0]
cookieStr = re.match('.*document\.cookie = "([^"]+)".*',
script, re.DOTALL).group(1)
morsel = Cookie.Cookie(cookieStr).values()[0]
self.browser.session.cookies.set_cookie(morsel_to_cookie(morsel))
form = self.get_form()
return form.submit()
class LoginRedirectPage(LoggedPage, HTMLPage):
is_here = 'contains(//meta[@http-equiv="Refresh"]/@content,' \
'"SIGNON_PORTAL_PAUSE")'
def redirect(self):
refresh = self.doc.xpath(
'//meta[@http-equiv="Refresh"]/@content')[0]
url = re.match(r'^.*URL=(.*)$', refresh).group(1)
self.browser.location(url)
return self.browser.location(url)
class LoggedInPage(HTMLPage):
@ -50,6 +68,8 @@ class LoggedInPage(HTMLPage):
class SummaryPage(LoggedInPage):
is_here = u'//title[contains(text(),"Account Summary")]'
def to_activity(self):
href = self.doc.xpath(u'//a[text()="Account Activity"]/@href')[0]
self.browser.location(href)
@ -60,45 +80,7 @@ class SummaryPage(LoggedInPage):
self.browser.location(href)
class DynamicPage(Page):
"""
Most of Wells Fargo pages have the same URI pattern.
Some of these pages are HTML, some are PDF.
"""
def __init__(self, browser, response, *args, **kwargs):
super(DynamicPage, self).__init__(browser, response, *args, **kwargs)
# Ugly hack to figure out the page type
klass = RawPage if response.content[:4] == '%PDF' else HTMLPage
self.doc = klass(browser, response, *args, **kwargs).doc
subclass = None
# Ugly hack to figure out the page type
if response.content[:4] == '%PDF':
subclass = StatementSubPage
elif u'Account Activity' in self._title():
name = self._account_name()
if u'CHECKING' in name or u'SAVINGS' in name:
subclass = ActivityCashSubPage
elif u'CARD' in name:
subclass = ActivityCardSubPage
elif u'Statements & Documents' in self._title():
subclass = StatementsSubPage
assert subclass
self.subpage = subclass(browser, response, *args, **kwargs)
@property
def logged(self):
return self.subpage.logged
def _title(self):
return self.doc.xpath(u'//title/text()')[0]
def _account_name(self):
return self.doc.xpath(
u'//select[@name="selectedAccountUID"]'
u'/option[@selected="selected"]/text()')[0]
class AccountSubPage(LoggedInPage):
class AccountPage(LoggedInPage):
def account_id(self, name=None):
if name:
return name[-4:] # Last 4 digits of "BLAH XXXXXXX1234"
@ -106,9 +88,10 @@ class AccountSubPage(LoggedInPage):
return self.account_id(self.account_name())
class ActivitySubPage(AccountSubPage):
def is_activity(self):
return True
class ActivityPage(AccountPage):
def is_here(self):
return bool(self.doc.xpath(
u'contains(//title/text(),"Account Activity")'))
def accounts_names(self):
return self.doc.xpath(
@ -128,9 +111,10 @@ class ActivitySubPage(AccountSubPage):
u'/option[@selected="selected"]/@value')[0]
def account_name(self):
return self.doc.xpath(
u'//select[@name="selectedAccountUID"]'
u'/option[@selected="selected"]/text()')[0]
for name in self.doc.xpath(u'//select[@name="selectedAccountUID"]'
u'/option[@selected="selected"]/text()'):
return name
return u''
def account_type(self, name=None):
raise NotImplementedError()
@ -168,7 +152,12 @@ class ActivitySubPage(AccountSubPage):
raise NotImplementedError()
class ActivityCashSubPage(ActivitySubPage):
class ActivityCashPage(ActivityPage):
def is_here(self):
return super(ActivityCashPage, self).is_here() and \
(u'CHECKING' in self.account_name() or
u'SAVINGS' in self.account_name())
def account_type(self, name=None):
name = name or self.account_name()
if u'CHECKING' in name:
@ -231,7 +220,11 @@ class ActivityCashSubPage(ActivitySubPage):
return False
class ActivityCardSubPage(ActivitySubPage):
class ActivityCardPage(ActivityPage):
def is_here(self):
return super(ActivityCardPage, self).is_here() and \
u'CARD' in self.account_name()
def account_type(self, name=None):
return Account.TYPE_CARD
@ -240,7 +233,7 @@ class ActivityCardSubPage(ActivitySubPage):
u'//td[@headers="outstandingBalance"]/text()')[0]
def get_account(self):
account = ActivitySubPage.get_account(self)
account = ActivityPage.get_account(self)
# Credit card is essentially a liability.
# Negative amount means there's a payment due.
@ -298,9 +291,8 @@ class ActivityCardSubPage(ActivitySubPage):
return False
class StatementsSubPage(AccountSubPage):
def is_statements(self):
return True
class StatementsPage(AccountPage):
is_here = u'contains(//title/text(),"Statements")'
def account_name(self):
return self.doc.xpath(
@ -344,13 +336,13 @@ class StatementsSubPage(AccountSubPage):
yield unquote(inner_uri)
class StatementSubPage(LoggedPage, RawPage):
class StatementPage(LoggedPage, RawPage):
def __init__(self, *args, **kwArgs):
RawPage.__init__(self, *args, **kwArgs)
self._parser = StatementParser(self.doc)
def is_statement(self):
return True
def is_here(self):
return self.doc[:4] == '%PDF'
def iter_transactions(self):
# Maintain a nice consistent newer-to-older order of transactions.