# -*- coding: utf-8 -*- # Copyright(C) 2014 Oleg Plakhotniuk # # This file is part of weboob. # # weboob is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # weboob is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . from weboob.capabilities.bank import Account, AccountNotFound, Transaction from weboob.exceptions import BrowserIncorrectPassword, BrowserUnavailable from weboob.tools.capabilities.bank.transactions import \ AmericanTransaction as AmTr from .parser import StatementParser, clean_label from time import sleep from tempfile import mkdtemp from shutil import rmtree from itertools import chain import datetime import re import os import subprocess __all__ = ['Citibank'] def retrying(func): def inner(*args, **kwargs): MAX_RETRIES = 10 MAX_DELAY = 10 for i in xrange(MAX_RETRIES): try: return func(*args, **kwargs) except OnceAgain: sleep(min(1 << i, MAX_DELAY)) raise BrowserUnavailable('Unexpected site behavior. ' 'Perhaps this module needs some fixing...') return inner class OnceAgain(Exception): pass class Citibank(object): """ Citibank website uses lots of Javascript, some of which seems to be dynamically generated and intentionally obfuscated. Our answer to THAT is... heavy artillery firing Selenium rounds! External dependencies: Firefox (https://www.mozilla.org/firefox). MuPDF (http://www.mupdf.com). Python bindings for Selenium (https://pypi.python.org/pypi/selenium). Xvfb (http://www.x.org/releases/X11R7.6/doc/man/man1/Xvfb.1.xhtml). Tested on Arch Linux snapshot of 2014-08-25 (official and user packages). Don't forget to do "export DISPLAY=:0". Only a single credit card account is currently supported. Contributions are welcome! """ def __init__(self, username, password, logger, **kwargs): self._logger = logger self._username = username self._password = password def get_account(self, id_): account = next(self.iter_accounts()) if account.id != id_: raise AccountNotFound() return account def iter_accounts(self): self.start() self._account_link().click() self.wait_ajax() label = self.wait('label#accountSingle .cS-accountMenuAccount')[0].text bal = self._browser.find_elements_by_xpath(u'//span[contains(text(),' '"Current Balance:")]/../../div[@class="cT-valueItem"]')[0].text cardlimit = self._browser.find_elements_by_xpath( u'//span[contains(text(),"Total Revolving Credit Line:")]' '/../../div[@class="cT-valueItem"]')[0].text paydue=self._browser.find_elements_by_xpath(u'//span[contains(text(),' '"Minimum Payment Due") and @role="gridcell"]')[0].text paydate = re.match(u'Minimum Payment Due.On (..-..-....):', paydue, re.DOTALL + re.UNICODE).group(1) paymin = self._browser.find_elements_by_xpath( u'//span[contains(text(),"Minimum Payment Due")]' '/../../div[contains(@class,"cT-valueItem")]')[0].text account = Account() account.id = label[-4:] account.label = label account.currency = Account.get_currency(bal) account.balance = -AmTr.decimal_amount(bal) account.cardlimit = AmTr.decimal_amount(cardlimit) account.paydate = datetime.datetime.strptime(paydate, '%m-%d-%Y') account.paymin = AmTr.decimal_amount(paymin) account.type = Account.TYPE_CARD self.finish() yield account def iter_history(self, account): for trans in chain(self.iter_history_recent(account), self.iter_history_statements(account)): yield trans def start(self): # To avoid ImportError during e.g. building modules list. from selenium import webdriver HOME_URL = 'https://online.citibank.com/US/JPS/portal/Home.do' WIDTH = 1920 HEIGHT = 10000 # So that everything fits... self._downloads = mkdtemp() self._logger.debug('Saving downloaded files to %s' % self._downloads) prof = webdriver.FirefoxProfile() prof.set_preference('browser.download.folderList', 2) prof.set_preference('browser.download.dir', self._downloads) prof.set_preference('browser.helperApps.neverAsk.saveToDisk', 'application/pdf') prof.set_preference('pdfjs.disabled', True) self._browser = webdriver.Firefox(prof) self._browser.set_window_size(WIDTH, HEIGHT) self._browser.get('https://online.citibank.com') self.wait('input[name="usernameMasked"]')[0].send_keys(self._username) self.wait('input[name="password"]')[0].send_keys(self._password) self.wait('form[name="SignonForm"]')[0].submit() self._browser.get(HOME_URL) if self._browser.current_url != HOME_URL: raise BrowserIncorrectPassword() def finish(self): prof_dir = self._browser.firefox_profile.profile_dir self._browser.close() del self._browser rmtree(self._downloads) del self._downloads rmtree(prof_dir) def iter_history_recent(self, account): self.start() if account.id != self._account_id(): raise AccountNotFound() self._account_link().click() self.wait_ajax() for span in self.find('span.cM-maximizeButton'): span.click() for tr in self.find('tr.payments,tr.purchase'): trdata = lambda n: tr.find_element_by_css_selector( 'td.cT-bodyTableColumn%i span.cT-line1' % n).text treid = tr.get_attribute('id').replace('rowID', 'rowIDExt') tredata = {} for tre in self.find('tr#%s' % treid): labels = [x.text for x in tre.find_elements_by_css_selector( 'div.cT-labelItem')] values = [x.text for x in tre.find_elements_by_css_selector( 'div.cT-valueItem')] tredata = dict(zip(labels, values)) ref = tredata.get(u'Reference Number:', u'') tdate = trdata(1) pdate = tredata.get(u'Posted Date :', tdate) desc = clean_label(trdata(2)) amount = trdata(4) tdate = datetime.datetime.strptime(tdate, '%m-%d-%Y') pdate = datetime.datetime.strptime(pdate, '%m-%d-%Y') if amount.startswith(u'(') and amount.endswith(u')'): amount = AmTr.decimal_amount(amount[1:-1]) else: amount = -AmTr.decimal_amount(amount) trans = Transaction(ref) trans.date = tdate trans.rdate = pdate trans.type = Transaction.TYPE_UNKNOWN trans.raw = desc trans.label = desc trans.amount = amount yield trans self.finish() def iter_history_statements(self, account): # To avoid ImportError during e.g. building modules list. from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import MoveTargetOutOfBoundsException,\ ElementNotVisibleException self.start() if account.id != self._account_id(): raise AccountNotFound() self.wait('a#cmlink_ViewPastStmtLink')[0].click() opts = self.wait('option#currentStatementDateOptions') for i, opt in enumerate(opts): # We're interested only in finalized statements. if u'Unbilled' in opt.get_attribute('value'): continue self.wait('div#currentStatementsDate-button')[0].click() ul = self.wait('ul#currentStatementsDate-menu')[0] while True: try: self.wait('li#currentStatementDateOptions span')[i].click() break except (MoveTargetOutOfBoundsException, ElementNotVisibleException): ul.send_keys(Keys.ARROW_DOWN) self.wait('a#downloadCurrentStatements')[0].click() pdfname = self.wait_file('.pdf') pdfpath = os.path.join(self._downloads, pdfname) with open(pdfpath, 'rb') as f: parser = StatementParser(f.read()) os.remove(pdfpath) # Transactions in a statement can go in different order. ts = sorted(parser.read_transactions(), cmp=lambda t1, t2: cmp(t2.date, t1.date)) for t in ts: yield t self.finish() def find(self, selector): self._logger.debug('Finding selector """%s""" on page %s' % ( selector, self._browser.current_url)) return self._browser.find_elements_by_css_selector(selector) @retrying def wait(self, selector): els = self.find(selector) if not els: raise OnceAgain() return els @retrying def wait_ajax(self): from selenium.common.exceptions import StaleElementReferenceException self._logger.debug('Waiting for async requests to finish on page %s' % self._browser.current_url) els = self._browser.find_elements_by_xpath( u'//*[contains(text(),"Please wait")]') try: if not els or any(x.is_displayed() for x in els): raise OnceAgain() except StaleElementReferenceException: raise OnceAgain() @retrying def wait_file(self, suffix): self._logger.debug('Waiting for file "*%s" to finish downloading.' % suffix) for name in os.listdir(self._downloads): if not name.endswith(suffix): continue path = os.path.join(self._downloads, name) # Wait until file is not empty. if not os.stat(path).st_size: continue # Wait until no processes are accessing the file. if subprocess.call(['fuser', '-s', path]) == 0: continue return name raise OnceAgain() def _account_link(self): return self.wait('a#cmlink_AccountNameLink')[0] def _account_id(self): return re.match('.*-([0-9]+)$', self._account_link().text).group(1)