Improved Wells Fargo module stability.

Signed-off-by: Oleg Plakhotniuk <olegus8@gmail.com>
This commit is contained in:
Oleg Plakhotniuk 2014-10-10 15:30:13 -05:00 committed by Laurent Bachelier
commit cbd084283d
3 changed files with 109 additions and 119 deletions

View file

@ -24,8 +24,9 @@ from weboob.capabilities.bank import AccountNotFound
from weboob.browser import LoginBrowser, URL, need_login from weboob.browser import LoginBrowser, URL, need_login
from weboob.exceptions import BrowserIncorrectPassword from weboob.exceptions import BrowserIncorrectPassword
from .pages import LoginPage, LoginRedirectPage, LoggedInPage, SummaryPage, \ from .pages import LoginPage, LoginProceedPage, LoginRedirectPage, \
DynamicPage SummaryPage, ActivityCashPage, ActivityCardPage, \
StatementsPage, StatementPage, LoggedInPage
__all__ = ['WellsFargo'] __all__ = ['WellsFargo']
@ -34,45 +35,58 @@ __all__ = ['WellsFargo']
class WellsFargo(LoginBrowser): class WellsFargo(LoginBrowser):
BASEURL = 'https://online.wellsfargo.com' BASEURL = 'https://online.wellsfargo.com'
login = URL('/$', LoginPage) login = URL('/$', LoginPage)
loginRedirect = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON$', login_proceed = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON.*$',
LoginRedirectPage)
loggedIn = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON_PORTAL_PAUSE$',
'/das/cgi-bin/session.cgi\?screenid=SIGNON&LOB=CONS$',
'/login\?ERROR_CODE=.*LOB=CONS&$', '/login\?ERROR_CODE=.*LOB=CONS&$',
LoggedInPage) LoginProceedPage)
login_redirect = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON.*$',
'/login\?ERROR_CODE=.*LOB=CONS&$',
LoginRedirectPage)
summary = URL('/das/channel/accountSummary$', SummaryPage) summary = URL('/das/channel/accountSummary$', SummaryPage)
dynamic = URL('/das/cgi-bin/session.cgi\?sessargs=.+$', activity_cash = URL('/das/cgi-bin/session.cgi\?sessargs=.+$',
ActivityCashPage)
activity_card = URL('/das/cgi-bin/session.cgi\?sessargs=.+$',
ActivityCardPage)
statements = URL(
'/das/cgi-bin/session.cgi\?sessargs=.+$',
'/das/channel/accountActivityDDA\?action=doSetPage&page=.*$', '/das/channel/accountActivityDDA\?action=doSetPage&page=.*$',
DynamicPage) StatementsPage)
statement = URL('/das/cgi-bin/session.cgi\?sessargs=.+$',
_pause = 1 StatementPage)
unknown = URL('/.*$', LoggedInPage) # E.g. random advertisement pages.
def do_login(self): def do_login(self):
self.session.cookies.clear()
self.login.go() self.login.go()
self.page.login(self.username, self.password) self.page.login(self.username, self.password)
if not self.loginRedirect.is_here(): if not self.page.logged:
raise BrowserIncorrectPassword() raise BrowserIncorrectPassword()
# Sometimes Wells Fargo server returns "Session time out" error def location(self, *args, **kwargs):
# right after login if we don't make a pause here. """
sleep(self._pause) Wells Fargo inserts redirecting pages from time to time,
self._pause = min(30, self._pause*2) so we should follow them whenever we see them.
self.page.redirect() """
self._pause = 1 r = super(WellsFargo, self).location(*args, **kwargs)
if self.login_proceed.is_here():
return self.page.proceed()
elif self.login_redirect.is_here():
return self.page.redirect()
else:
return r
def get_account(self, id_): def get_account(self, id_):
self.to_activity() self.to_activity()
if id_ not in self.page.subpage.accounts_ids(): if id_ not in self.page.accounts_ids():
raise AccountNotFound() raise AccountNotFound()
else: else:
self.to_activity(id_) self.to_activity(id_)
return self.page.subpage.get_account() return self.page.get_account()
def get_accounts(self): def iter_accounts(self):
self.to_activity() self.to_activity()
for id_ in self.page.subpage.accounts_ids(): for id_ in self.page.accounts_ids():
self.to_activity(id_) self.to_activity(id_)
yield self.page.subpage.get_account() yield self.page.get_account()
@need_login @need_login
def to_summary(self): def to_summary(self):
@ -80,10 +94,7 @@ class WellsFargo(LoginBrowser):
assert self.summary.is_here() assert self.summary.is_here()
def is_activity(self): def is_activity(self):
try: return self.activity_cash.is_here() or self.activity_card.is_here()
return self.page.subpage.is_activity()
except AttributeError:
return False
@need_login @need_login
def to_activity(self, id_=None): def to_activity(self, id_=None):
@ -91,42 +102,30 @@ class WellsFargo(LoginBrowser):
self.to_summary() self.to_summary()
self.page.to_activity() self.page.to_activity()
assert self.is_activity() assert self.is_activity()
if id_ and self.page.subpage.account_id() != id_: if id_ and self.page.account_id() != id_:
self.page.subpage.to_account(id_) self.page.to_account(id_)
assert self.is_activity() assert self.is_activity()
assert self.page.subpage.account_id() == id_ assert self.page.account_id() == id_
def is_statements(self):
try:
return self.page.subpage.is_statements()
except AttributeError:
return False
@need_login @need_login
def to_statements(self, id_=None, year=None): def to_statements(self, id_=None, year=None):
if not self.is_statements(): if not self.statements.is_here():
self.to_summary() self.to_summary()
self.page.to_statements() self.page.to_statements()
assert self.is_statements() assert self.statements.is_here()
if id_ and self.page.subpage.account_id() != id_: if id_ and self.page.account_id() != id_:
self.page.subpage.to_account(id_) self.page.to_account(id_)
assert self.is_statements() assert self.statements.is_here()
assert self.page.subpage.account_id() == id_ assert self.page.account_id() == id_
if year and self.page.subpage.year() != year: if year and self.page.year() != year:
self.page.subpage.to_year(year) self.page.to_year(year)
assert self.is_statements() assert self.statements.is_here()
assert self.page.subpage.year() == year assert self.page.year() == year
def is_statement(self):
try:
return self.page.subpage.is_statement()
except AttributeError:
return False
@need_login @need_login
def to_statement(self, uri): def to_statement(self, uri):
self.location(uri) self.location(uri)
assert self.is_statement() assert self.statement.is_here()
def iter_history(self, account): def iter_history(self, account):
self.to_activity(account.id) self.to_activity(account.id)
@ -138,19 +137,18 @@ class WellsFargo(LoginBrowser):
# transactions grouped by statement period will not be available # transactions grouped by statement period will not be available
# for up to seven days." # for up to seven days."
# (www.wellsfargo.com, 2014-07-20) # (www.wellsfargo.com, 2014-07-20)
if self.page.subpage.since_last_statement(): if self.page.since_last_statement():
assert self.page.subpage.account_id() == account.id assert self.page.account_id() == account.id
while True: while True:
for trans in self.page.subpage.iter_transactions(): for trans in self.page.iter_transactions():
yield trans yield trans
if not self.page.subpage.next_(): if not self.page.next_():
break break
self.to_statements(account.id) self.to_statements(account.id)
for year in self.page.subpage.years(): for year in self.page.years():
self.to_statements(account.id, year) self.to_statements(account.id, year)
for stmt in self.page.subpage.statements(): for stmt in self.page.statements():
self.to_statement(stmt) self.to_statement(stmt)
for trans in self.page.subpage.iter_transactions(): for trans in self.page.iter_transactions():
yield trans yield trans

View file

@ -44,7 +44,7 @@ class WellsFargoModule(Module, CapBank):
self.config['password'].get()) self.config['password'].get())
def iter_accounts(self): def iter_accounts(self):
return self.browser.get_accounts() return self.browser.iter_accounts()
def get_account(self, id_): def get_account(self, id_):
return self.browser.get_account(id_) return self.browser.get_account(id_)

View file

@ -20,10 +20,12 @@
from weboob.capabilities.bank import Account, Transaction from weboob.capabilities.bank import Account, Transaction
from weboob.browser.pages import Page, HTMLPage, LoggedPage, RawPage from weboob.browser.pages import Page, HTMLPage, LoggedPage, RawPage
from urllib import unquote from urllib import unquote
from requests.cookies import morsel_to_cookie
from .parsers import StatementParser, clean_amount, clean_label from .parsers import StatementParser, clean_amount, clean_label
import itertools import itertools
import re import re
import datetime import datetime
import Cookie
class LoginPage(HTMLPage): class LoginPage(HTMLPage):
@ -34,12 +36,28 @@ class LoginPage(HTMLPage):
form.submit() form.submit()
class LoginProceedPage(LoggedPage, HTMLPage):
is_here = '//script[contains(text(),"setAndCheckCookie")]'
def proceed(self):
script = self.doc.xpath('//script/text()')[0]
cookieStr = re.match('.*document\.cookie = "([^"]+)".*',
script, re.DOTALL).group(1)
morsel = Cookie.Cookie(cookieStr).values()[0]
self.browser.session.cookies.set_cookie(morsel_to_cookie(morsel))
form = self.get_form()
return form.submit()
class LoginRedirectPage(LoggedPage, HTMLPage): class LoginRedirectPage(LoggedPage, HTMLPage):
is_here = 'contains(//meta[@http-equiv="Refresh"]/@content,' \
'"SIGNON_PORTAL_PAUSE")'
def redirect(self): def redirect(self):
refresh = self.doc.xpath( refresh = self.doc.xpath(
'//meta[@http-equiv="Refresh"]/@content')[0] '//meta[@http-equiv="Refresh"]/@content')[0]
url = re.match(r'^.*URL=(.*)$', refresh).group(1) url = re.match(r'^.*URL=(.*)$', refresh).group(1)
self.browser.location(url) return self.browser.location(url)
class LoggedInPage(HTMLPage): class LoggedInPage(HTMLPage):
@ -50,6 +68,8 @@ class LoggedInPage(HTMLPage):
class SummaryPage(LoggedInPage): class SummaryPage(LoggedInPage):
is_here = u'//title[contains(text(),"Account Summary")]'
def to_activity(self): def to_activity(self):
href = self.doc.xpath(u'//a[text()="Account Activity"]/@href')[0] href = self.doc.xpath(u'//a[text()="Account Activity"]/@href')[0]
self.browser.location(href) self.browser.location(href)
@ -60,45 +80,7 @@ class SummaryPage(LoggedInPage):
self.browser.location(href) self.browser.location(href)
class DynamicPage(Page): class AccountPage(LoggedInPage):
"""
Most of Wells Fargo pages have the same URI pattern.
Some of these pages are HTML, some are PDF.
"""
def __init__(self, browser, response, *args, **kwargs):
super(DynamicPage, self).__init__(browser, response, *args, **kwargs)
# Ugly hack to figure out the page type
klass = RawPage if response.content[:4] == '%PDF' else HTMLPage
self.doc = klass(browser, response, *args, **kwargs).doc
subclass = None
# Ugly hack to figure out the page type
if response.content[:4] == '%PDF':
subclass = StatementSubPage
elif u'Account Activity' in self._title():
name = self._account_name()
if u'CHECKING' in name or u'SAVINGS' in name:
subclass = ActivityCashSubPage
elif u'CARD' in name:
subclass = ActivityCardSubPage
elif u'Statements & Documents' in self._title():
subclass = StatementsSubPage
assert subclass
self.subpage = subclass(browser, response, *args, **kwargs)
@property
def logged(self):
return self.subpage.logged
def _title(self):
return self.doc.xpath(u'//title/text()')[0]
def _account_name(self):
return self.doc.xpath(
u'//select[@name="selectedAccountUID"]'
u'/option[@selected="selected"]/text()')[0]
class AccountSubPage(LoggedInPage):
def account_id(self, name=None): def account_id(self, name=None):
if name: if name:
return name[-4:] # Last 4 digits of "BLAH XXXXXXX1234" return name[-4:] # Last 4 digits of "BLAH XXXXXXX1234"
@ -106,9 +88,10 @@ class AccountSubPage(LoggedInPage):
return self.account_id(self.account_name()) return self.account_id(self.account_name())
class ActivitySubPage(AccountSubPage): class ActivityPage(AccountPage):
def is_activity(self): def is_here(self):
return True return bool(self.doc.xpath(
u'contains(//title/text(),"Account Activity")'))
def accounts_names(self): def accounts_names(self):
return self.doc.xpath( return self.doc.xpath(
@ -128,9 +111,10 @@ class ActivitySubPage(AccountSubPage):
u'/option[@selected="selected"]/@value')[0] u'/option[@selected="selected"]/@value')[0]
def account_name(self): def account_name(self):
return self.doc.xpath( for name in self.doc.xpath(u'//select[@name="selectedAccountUID"]'
u'//select[@name="selectedAccountUID"]' u'/option[@selected="selected"]/text()'):
u'/option[@selected="selected"]/text()')[0] return name
return u''
def account_type(self, name=None): def account_type(self, name=None):
raise NotImplementedError() raise NotImplementedError()
@ -168,7 +152,12 @@ class ActivitySubPage(AccountSubPage):
raise NotImplementedError() raise NotImplementedError()
class ActivityCashSubPage(ActivitySubPage): class ActivityCashPage(ActivityPage):
def is_here(self):
return super(ActivityCashPage, self).is_here() and \
(u'CHECKING' in self.account_name() or
u'SAVINGS' in self.account_name())
def account_type(self, name=None): def account_type(self, name=None):
name = name or self.account_name() name = name or self.account_name()
if u'CHECKING' in name: if u'CHECKING' in name:
@ -231,7 +220,11 @@ class ActivityCashSubPage(ActivitySubPage):
return False return False
class ActivityCardSubPage(ActivitySubPage): class ActivityCardPage(ActivityPage):
def is_here(self):
return super(ActivityCardPage, self).is_here() and \
u'CARD' in self.account_name()
def account_type(self, name=None): def account_type(self, name=None):
return Account.TYPE_CARD return Account.TYPE_CARD
@ -240,7 +233,7 @@ class ActivityCardSubPage(ActivitySubPage):
u'//td[@headers="outstandingBalance"]/text()')[0] u'//td[@headers="outstandingBalance"]/text()')[0]
def get_account(self): def get_account(self):
account = ActivitySubPage.get_account(self) account = ActivityPage.get_account(self)
# Credit card is essentially a liability. # Credit card is essentially a liability.
# Negative amount means there's a payment due. # Negative amount means there's a payment due.
@ -298,9 +291,8 @@ class ActivityCardSubPage(ActivitySubPage):
return False return False
class StatementsSubPage(AccountSubPage): class StatementsPage(AccountPage):
def is_statements(self): is_here = u'contains(//title/text(),"Statements")'
return True
def account_name(self): def account_name(self):
return self.doc.xpath( return self.doc.xpath(
@ -344,13 +336,13 @@ class StatementsSubPage(AccountSubPage):
yield unquote(inner_uri) yield unquote(inner_uri)
class StatementSubPage(LoggedPage, RawPage): class StatementPage(LoggedPage, RawPage):
def __init__(self, *args, **kwArgs): def __init__(self, *args, **kwArgs):
RawPage.__init__(self, *args, **kwArgs) RawPage.__init__(self, *args, **kwArgs)
self._parser = StatementParser(self.doc) self._parser = StatementParser(self.doc)
def is_statement(self): def is_here(self):
return True return self.doc[:4] == '%PDF'
def iter_transactions(self): def iter_transactions(self):
# Maintain a nice consistent newer-to-older order of transactions. # Maintain a nice consistent newer-to-older order of transactions.