From cbd084283db1d7d8e4fcbf36c6e0f2eb5838d1ae Mon Sep 17 00:00:00 2001 From: Oleg Plakhotniuk Date: Fri, 10 Oct 2014 15:30:13 -0500 Subject: [PATCH] Improved Wells Fargo module stability. Signed-off-by: Oleg Plakhotniuk --- modules/wellsfargo/browser.py | 124 +++++++++++++++++----------------- modules/wellsfargo/module.py | 2 +- modules/wellsfargo/pages.py | 102 +++++++++++++--------------- 3 files changed, 109 insertions(+), 119 deletions(-) diff --git a/modules/wellsfargo/browser.py b/modules/wellsfargo/browser.py index 95a9012a..ae256154 100644 --- a/modules/wellsfargo/browser.py +++ b/modules/wellsfargo/browser.py @@ -24,8 +24,9 @@ from weboob.capabilities.bank import AccountNotFound from weboob.browser import LoginBrowser, URL, need_login from weboob.exceptions import BrowserIncorrectPassword -from .pages import LoginPage, LoginRedirectPage, LoggedInPage, SummaryPage, \ - DynamicPage +from .pages import LoginPage, LoginProceedPage, LoginRedirectPage, \ + SummaryPage, ActivityCashPage, ActivityCardPage, \ + StatementsPage, StatementPage, LoggedInPage __all__ = ['WellsFargo'] @@ -34,45 +35,58 @@ __all__ = ['WellsFargo'] class WellsFargo(LoginBrowser): BASEURL = 'https://online.wellsfargo.com' login = URL('/$', LoginPage) - loginRedirect = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON$', - LoginRedirectPage) - loggedIn = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON_PORTAL_PAUSE$', - '/das/cgi-bin/session.cgi\?screenid=SIGNON&LOB=CONS$', - '/login\?ERROR_CODE=.*LOB=CONS&$', - LoggedInPage) + login_proceed = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON.*$', + '/login\?ERROR_CODE=.*LOB=CONS&$', + LoginProceedPage) + login_redirect = URL('/das/cgi-bin/session.cgi\?screenid=SIGNON.*$', + '/login\?ERROR_CODE=.*LOB=CONS&$', + LoginRedirectPage) summary = URL('/das/channel/accountSummary$', SummaryPage) - dynamic = URL('/das/cgi-bin/session.cgi\?sessargs=.+$', - '/das/channel/accountActivityDDA\?action=doSetPage&page=.*$', - DynamicPage) - - _pause = 1 + activity_cash = URL('/das/cgi-bin/session.cgi\?sessargs=.+$', + ActivityCashPage) + activity_card = URL('/das/cgi-bin/session.cgi\?sessargs=.+$', + ActivityCardPage) + statements = URL( + '/das/cgi-bin/session.cgi\?sessargs=.+$', + '/das/channel/accountActivityDDA\?action=doSetPage&page=.*$', + StatementsPage) + statement = URL('/das/cgi-bin/session.cgi\?sessargs=.+$', + StatementPage) + unknown = URL('/.*$', LoggedInPage) # E.g. random advertisement pages. def do_login(self): + self.session.cookies.clear() self.login.go() self.page.login(self.username, self.password) - if not self.loginRedirect.is_here(): + if not self.page.logged: raise BrowserIncorrectPassword() - # Sometimes Wells Fargo server returns "Session time out" error - # right after login if we don't make a pause here. - sleep(self._pause) - self._pause = min(30, self._pause*2) - self.page.redirect() - self._pause = 1 + def location(self, *args, **kwargs): + """ + Wells Fargo inserts redirecting pages from time to time, + so we should follow them whenever we see them. + """ + r = super(WellsFargo, self).location(*args, **kwargs) + if self.login_proceed.is_here(): + return self.page.proceed() + elif self.login_redirect.is_here(): + return self.page.redirect() + else: + return r def get_account(self, id_): self.to_activity() - if id_ not in self.page.subpage.accounts_ids(): + if id_ not in self.page.accounts_ids(): raise AccountNotFound() else: self.to_activity(id_) - return self.page.subpage.get_account() + return self.page.get_account() - def get_accounts(self): + def iter_accounts(self): self.to_activity() - for id_ in self.page.subpage.accounts_ids(): + for id_ in self.page.accounts_ids(): self.to_activity(id_) - yield self.page.subpage.get_account() + yield self.page.get_account() @need_login def to_summary(self): @@ -80,10 +94,7 @@ class WellsFargo(LoginBrowser): assert self.summary.is_here() def is_activity(self): - try: - return self.page.subpage.is_activity() - except AttributeError: - return False + return self.activity_cash.is_here() or self.activity_card.is_here() @need_login def to_activity(self, id_=None): @@ -91,42 +102,30 @@ class WellsFargo(LoginBrowser): self.to_summary() self.page.to_activity() assert self.is_activity() - if id_ and self.page.subpage.account_id() != id_: - self.page.subpage.to_account(id_) + if id_ and self.page.account_id() != id_: + self.page.to_account(id_) assert self.is_activity() - assert self.page.subpage.account_id() == id_ - - def is_statements(self): - try: - return self.page.subpage.is_statements() - except AttributeError: - return False + assert self.page.account_id() == id_ @need_login def to_statements(self, id_=None, year=None): - if not self.is_statements(): + if not self.statements.is_here(): self.to_summary() self.page.to_statements() - assert self.is_statements() - if id_ and self.page.subpage.account_id() != id_: - self.page.subpage.to_account(id_) - assert self.is_statements() - assert self.page.subpage.account_id() == id_ - if year and self.page.subpage.year() != year: - self.page.subpage.to_year(year) - assert self.is_statements() - assert self.page.subpage.year() == year - - def is_statement(self): - try: - return self.page.subpage.is_statement() - except AttributeError: - return False + assert self.statements.is_here() + if id_ and self.page.account_id() != id_: + self.page.to_account(id_) + assert self.statements.is_here() + assert self.page.account_id() == id_ + if year and self.page.year() != year: + self.page.to_year(year) + assert self.statements.is_here() + assert self.page.year() == year @need_login def to_statement(self, uri): self.location(uri) - assert self.is_statement() + assert self.statement.is_here() def iter_history(self, account): self.to_activity(account.id) @@ -138,19 +137,18 @@ class WellsFargo(LoginBrowser): # transactions grouped by statement period will not be available # for up to seven days." # (www.wellsfargo.com, 2014-07-20) - if self.page.subpage.since_last_statement(): - assert self.page.subpage.account_id() == account.id + if self.page.since_last_statement(): + assert self.page.account_id() == account.id while True: - for trans in self.page.subpage.iter_transactions(): + for trans in self.page.iter_transactions(): yield trans - if not self.page.subpage.next_(): + if not self.page.next_(): break self.to_statements(account.id) - for year in self.page.subpage.years(): + for year in self.page.years(): self.to_statements(account.id, year) - for stmt in self.page.subpage.statements(): + for stmt in self.page.statements(): self.to_statement(stmt) - for trans in self.page.subpage.iter_transactions(): + for trans in self.page.iter_transactions(): yield trans - diff --git a/modules/wellsfargo/module.py b/modules/wellsfargo/module.py index 4ff0b196..66a5e8cc 100644 --- a/modules/wellsfargo/module.py +++ b/modules/wellsfargo/module.py @@ -44,7 +44,7 @@ class WellsFargoModule(Module, CapBank): self.config['password'].get()) def iter_accounts(self): - return self.browser.get_accounts() + return self.browser.iter_accounts() def get_account(self, id_): return self.browser.get_account(id_) diff --git a/modules/wellsfargo/pages.py b/modules/wellsfargo/pages.py index b0cffeb7..2c75bdae 100644 --- a/modules/wellsfargo/pages.py +++ b/modules/wellsfargo/pages.py @@ -20,10 +20,12 @@ from weboob.capabilities.bank import Account, Transaction from weboob.browser.pages import Page, HTMLPage, LoggedPage, RawPage from urllib import unquote +from requests.cookies import morsel_to_cookie from .parsers import StatementParser, clean_amount, clean_label import itertools import re import datetime +import Cookie class LoginPage(HTMLPage): @@ -34,12 +36,28 @@ class LoginPage(HTMLPage): form.submit() +class LoginProceedPage(LoggedPage, HTMLPage): + is_here = '//script[contains(text(),"setAndCheckCookie")]' + + def proceed(self): + script = self.doc.xpath('//script/text()')[0] + cookieStr = re.match('.*document\.cookie = "([^"]+)".*', + script, re.DOTALL).group(1) + morsel = Cookie.Cookie(cookieStr).values()[0] + self.browser.session.cookies.set_cookie(morsel_to_cookie(morsel)) + form = self.get_form() + return form.submit() + + class LoginRedirectPage(LoggedPage, HTMLPage): + is_here = 'contains(//meta[@http-equiv="Refresh"]/@content,' \ + '"SIGNON_PORTAL_PAUSE")' + def redirect(self): refresh = self.doc.xpath( '//meta[@http-equiv="Refresh"]/@content')[0] url = re.match(r'^.*URL=(.*)$', refresh).group(1) - self.browser.location(url) + return self.browser.location(url) class LoggedInPage(HTMLPage): @@ -50,6 +68,8 @@ class LoggedInPage(HTMLPage): class SummaryPage(LoggedInPage): + is_here = u'//title[contains(text(),"Account Summary")]' + def to_activity(self): href = self.doc.xpath(u'//a[text()="Account Activity"]/@href')[0] self.browser.location(href) @@ -60,45 +80,7 @@ class SummaryPage(LoggedInPage): self.browser.location(href) -class DynamicPage(Page): - """ - Most of Wells Fargo pages have the same URI pattern. - Some of these pages are HTML, some are PDF. - """ - def __init__(self, browser, response, *args, **kwargs): - super(DynamicPage, self).__init__(browser, response, *args, **kwargs) - # Ugly hack to figure out the page type - klass = RawPage if response.content[:4] == '%PDF' else HTMLPage - self.doc = klass(browser, response, *args, **kwargs).doc - subclass = None - # Ugly hack to figure out the page type - if response.content[:4] == '%PDF': - subclass = StatementSubPage - elif u'Account Activity' in self._title(): - name = self._account_name() - if u'CHECKING' in name or u'SAVINGS' in name: - subclass = ActivityCashSubPage - elif u'CARD' in name: - subclass = ActivityCardSubPage - elif u'Statements & Documents' in self._title(): - subclass = StatementsSubPage - assert subclass - self.subpage = subclass(browser, response, *args, **kwargs) - - @property - def logged(self): - return self.subpage.logged - - def _title(self): - return self.doc.xpath(u'//title/text()')[0] - - def _account_name(self): - return self.doc.xpath( - u'//select[@name="selectedAccountUID"]' - u'/option[@selected="selected"]/text()')[0] - - -class AccountSubPage(LoggedInPage): +class AccountPage(LoggedInPage): def account_id(self, name=None): if name: return name[-4:] # Last 4 digits of "BLAH XXXXXXX1234" @@ -106,9 +88,10 @@ class AccountSubPage(LoggedInPage): return self.account_id(self.account_name()) -class ActivitySubPage(AccountSubPage): - def is_activity(self): - return True +class ActivityPage(AccountPage): + def is_here(self): + return bool(self.doc.xpath( + u'contains(//title/text(),"Account Activity")')) def accounts_names(self): return self.doc.xpath( @@ -128,9 +111,10 @@ class ActivitySubPage(AccountSubPage): u'/option[@selected="selected"]/@value')[0] def account_name(self): - return self.doc.xpath( - u'//select[@name="selectedAccountUID"]' - u'/option[@selected="selected"]/text()')[0] + for name in self.doc.xpath(u'//select[@name="selectedAccountUID"]' + u'/option[@selected="selected"]/text()'): + return name + return u'' def account_type(self, name=None): raise NotImplementedError() @@ -168,7 +152,12 @@ class ActivitySubPage(AccountSubPage): raise NotImplementedError() -class ActivityCashSubPage(ActivitySubPage): +class ActivityCashPage(ActivityPage): + def is_here(self): + return super(ActivityCashPage, self).is_here() and \ + (u'CHECKING' in self.account_name() or + u'SAVINGS' in self.account_name()) + def account_type(self, name=None): name = name or self.account_name() if u'CHECKING' in name: @@ -231,7 +220,11 @@ class ActivityCashSubPage(ActivitySubPage): return False -class ActivityCardSubPage(ActivitySubPage): +class ActivityCardPage(ActivityPage): + def is_here(self): + return super(ActivityCardPage, self).is_here() and \ + u'CARD' in self.account_name() + def account_type(self, name=None): return Account.TYPE_CARD @@ -240,7 +233,7 @@ class ActivityCardSubPage(ActivitySubPage): u'//td[@headers="outstandingBalance"]/text()')[0] def get_account(self): - account = ActivitySubPage.get_account(self) + account = ActivityPage.get_account(self) # Credit card is essentially a liability. # Negative amount means there's a payment due. @@ -298,9 +291,8 @@ class ActivityCardSubPage(ActivitySubPage): return False -class StatementsSubPage(AccountSubPage): - def is_statements(self): - return True +class StatementsPage(AccountPage): + is_here = u'contains(//title/text(),"Statements")' def account_name(self): return self.doc.xpath( @@ -344,13 +336,13 @@ class StatementsSubPage(AccountSubPage): yield unquote(inner_uri) -class StatementSubPage(LoggedPage, RawPage): +class StatementPage(LoggedPage, RawPage): def __init__(self, *args, **kwArgs): RawPage.__init__(self, *args, **kwArgs) self._parser = StatementParser(self.doc) - def is_statement(self): - return True + def is_here(self): + return self.doc[:4] == '%PDF' def iter_transactions(self): # Maintain a nice consistent newer-to-older order of transactions.