Retrieve all transactions from the history. Merchant and regular account support. All transactions add up to balance.

Signed-off-by: Oleg Plakhotniuk <olegus8@gmail.com>

closes #1406
This commit is contained in:
Oleg Plakhotniuk 2014-06-22 22:31:35 +02:00 committed by Laurent Bachelier
commit 9a2f0d55aa
4 changed files with 136 additions and 42 deletions

View file

@ -56,5 +56,5 @@ class PaypalBackend(BaseBackend, ICapBank):
def iter_history(self, account): def iter_history(self, account):
with self.browser: with self.browser:
for history in self.browser.get_history(account): for history in self.browser.get_download_history(account):
yield history yield history

View file

@ -20,6 +20,7 @@
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
from .pages import LoginPage, AccountPage, DownloadHistoryPage, SubmitPage, HistoryParser, UselessPage, HistoryPage from .pages import LoginPage, AccountPage, DownloadHistoryPage, SubmitPage, HistoryParser, UselessPage, HistoryPage
import datetime
__all__ = ['Paypal'] __all__ = ['Paypal']
@ -43,6 +44,8 @@ class Paypal(BaseBrowser):
DEFAULT_TIMEOUT = 30 # CSV export is slow DEFAULT_TIMEOUT = 30 # CSV export is slow
BEGINNING = datetime.date(1998,6,1) # The day PayPal was founded
def home(self): def home(self):
self.location('https://' + self.DOMAIN + '/en/cgi-bin/webscr?cmd=_login-run') self.location('https://' + self.DOMAIN + '/en/cgi-bin/webscr?cmd=_login-run')
@ -75,28 +78,65 @@ class Paypal(BaseBrowser):
return self.page.get_account(_id) return self.page.get_account(_id)
def get_history(self, account): def get_history(self, account):
self.history() self.history(start=self.BEGINNING, end=datetime.date.today())
parse = True parse = True
while parse: while parse:
for trans in self.page.iter_transactions(account): for trans in self.page.iter_transactions(account):
yield trans yield trans
parse = self.page.next() parse = self.page.next()
def history(self): def history(self, start, end):
self.location('/en/cgi-bin/webscr?cmd=_history&nav=0.3.0') self.location('/en/cgi-bin/webscr?cmd=_history&nav=0.3.0')
self.page.filter() self.page.filter(start, end)
assert self.is_on_page(HistoryPage) assert self.is_on_page(HistoryPage)
def download_history(self): def get_download_history(self, account):
for csv in self.download_history():
for trans in self.page.iter_transactions(account):
yield trans
def period_has_trans(self, start, end):
"""
Checks if there're any transactions in a given period.
"""
self.history(start, end)
return next(self.page.parse(), False) or self.page.next()
def bisect_oldest_date(self, start, end, steps=5):
"""
Finds an approximate beginning of transactions history in a
given number of iterations.
"""
if not steps:
return start
middle = start + (end-start)/2
if self.period_has_trans(start, middle):
return self.bisect_oldest_date(start, middle, steps-1)
else:
return self.bisect_oldest_date(middle, end, steps-1)
def download_history(self, step=90):
""" """
Download CSV history. Download CSV history.
However, it is not normalized, and sometimes the download is refused However, it is not normalized, and sometimes the download is refused
and sent later by mail. and sent later by mail.
""" """
self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1') # PayPal limitations as of 2014-06-16
assert self.is_on_page(DownloadHistoryPage) assert step <= 365*2
self.page.download()
return self.page.document # To minimize the number of CSV requests, let's first find an
# approximate starting point of transaction history.
end = datetime.date.today()
beginning = self.bisect_oldest_date(self.BEGINNING, end)
while end > beginning:
start = end - datetime.timedelta(step)
self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1')
assert self.is_on_page(DownloadHistoryPage)
self.page.download(start, end)
assert self.is_on_page(SubmitPage)
yield self.page.document
end = start - datetime.timedelta(1)
def transfer(self, from_id, to_id, amount, reason=None): def transfer(self, from_id, to_id, amount, reason=None):
raise NotImplementedError() raise NotImplementedError()

View file

@ -125,18 +125,16 @@ class AccountPage(BasePage):
class DownloadHistoryPage(BasePage): class DownloadHistoryPage(BasePage):
def download(self, days=90): def download(self, start, end):
today = datetime.date.today()
start = today - datetime.timedelta(days)
self.browser.select_form(name='form1') self.browser.select_form(name='form1')
self.browser['to_c'] = str(today.year) self.browser['to_c'] = str(end.year)
self.browser['to_a'] = str(today.month) self.browser['to_a'] = str(end.month)
self.browser['to_b'] = str(today.day) self.browser['to_b'] = str(end.day)
self.browser['from_c'] = str(start.year) self.browser['from_c'] = str(start.year)
self.browser['from_a'] = str(start.month) self.browser['from_a'] = str(start.month)
self.browser['from_b'] = str(start.day) self.browser['from_b'] = str(start.day)
self.browser['custom_file_type'] = ['comma_balaffecting'] self.browser['custom_file_type'] = ['comma_allactivity']
self.browser['latest_completed_file_type'] = [''] self.browser['latest_completed_file_type'] = ['']
self.browser.submit() self.browser.submit()
@ -147,43 +145,71 @@ class SubmitPage(BasePage):
Any result of form submission Any result of form submission
""" """
def iter_transactions(self, account): def iter_transactions(self, account):
DATE = 0
TIME = 1
NAME = 3
TYPE = 4
CURRENCY = 6
GROSS = 7
FEE = 8
NET = 9
FROM = 10
TO = 11
TRANS_ID = 12
ITEM = 15
SITE = 24
csv = self.document csv = self.document
if len(csv.header) == 43:
# Merchant multi-currency account
DATE = 0
TIME = 1
NAME = 3
TYPE = 4
CURRENCY = 6
GROSS = 7
FEE = 8
NET = 9
FROM = 10
TO = 11
TRANS_ID = 12
ITEM = 15
SITE = 24
elif len(csv.header) == 11:
# Regular multi-currency account
DATE = 0
TIME = 1
NAME = 3
TYPE = 4
CURRENCY = 6
GROSS = -1
FEE = -1
NET = 7
FROM = -1
TO = -1
TRANS_ID = -1
ITEM = -1
SITE = -1
else:
raise ValueError('CSV fields count of %i is not supported' % len(csv.header))
for row in csv.rows: for row in csv.rows:
# we filter accounts by currency # we filter accounts by currency
if account.get_currency(row[CURRENCY]) != account.currency: if account.get_currency(row[CURRENCY]) != account.currency:
continue continue
trans = Transaction(row[TRANS_ID]) # analog to dict.get()
get = lambda i, v=None: row[i] if 0 <= i < len(row) else v
trans = Transaction(get(TRANS_ID, u''))
# silly American locale # silly American locale
if re.search(r'\d\.\d\d$', row[NET]): if re.search(r'\d\.\d\d$', row[NET]):
date = datetime.datetime.strptime(row[DATE] + ' ' + row[TIME], "%m/%d/%Y %I:%M:%S %p") date = datetime.datetime.strptime(row[DATE] + ' ' + row[TIME], "%m/%d/%Y %H:%M:%S")
else: else:
date = datetime.datetime.strptime(row[DATE] + ' ' + row[TIME], "%d/%m/%Y %H:%M:%S") date = datetime.datetime.strptime(row[DATE] + ' ' + row[TIME], "%d/%m/%Y %H:%M:%S")
trans.date = date trans.date = date
trans.rdate = date trans.rdate = date
line = row[NAME] line = row[NAME]
if row[ITEM]: if get(ITEM):
line += u' ' + row[ITEM] line += u' ' + row[ITEM]
if row[SITE]: if get(SITE):
line += u"(" + row[SITE] + u")" line += u"(" + row[SITE] + u")"
trans.raw = line trans.raw = line
trans.label = row[NAME] trans.label = row[NAME]
if row[TYPE].startswith(u'Update to eCheck') or \
row[TYPE].startswith(u'Order'):
continue
if row[TYPE].endswith(u'Credit Card') or row[TYPE].endswith(u'carte bancaire'): if row[TYPE].endswith(u'Credit Card') or row[TYPE].endswith(u'carte bancaire'):
trans.type = Transaction.TYPE_CARD trans.type = Transaction.TYPE_CARD
elif row[TYPE].endswith(u'Payment Sent') or row[TYPE].startswith(u'Paiement'): elif row[TYPE].endswith(u'Payment Sent') or row[TYPE].startswith(u'Paiement'):
@ -195,11 +221,11 @@ class SubmitPage(BasePage):
# Net is what happens after the fee (0 for most users), so what is the most "real" # Net is what happens after the fee (0 for most users), so what is the most "real"
trans.amount = clean_amount(row[NET]) trans.amount = clean_amount(row[NET])
trans._gross = clean_amount(row[GROSS]) trans._gross = clean_amount(get(GROSS, row[NET]))
trans._fees = clean_amount(row[FEE]) trans._fees = clean_amount(get(FEE, u'0.00'))
trans._to = row[TO] or None trans._to = get(TO)
trans._from = row[FROM] or None trans._from = get(FROM)
yield trans yield trans
@ -250,14 +276,12 @@ class HistoryPage(BasePage):
time_format = "%H:%M:%S" time_format = "%H:%M:%S"
return date_format, time_format, months return date_format, time_format, months
def filter(self): def filter(self, start, end):
date_format = self.guess_format()[0] date_format = self.guess_format()[0]
today = datetime.date.today()
start = datetime.date(1998,6,1) # The day PayPal was founded
self.browser.select_form(name='history') self.browser.select_form(name='history')
self.browser['dateoption'] = ['dateselect'] self.browser['dateoption'] = ['dateselect']
self.browser['from_date'] = start.strftime(date_format) self.browser['from_date'] = start.strftime(date_format)
self.browser['to_date'] = today.strftime(date_format) self.browser['to_date'] = end.strftime(date_format)
self.browser.submit(name='show') self.browser.submit(name='show')
self.browser.select_form(name='history') self.browser.select_form(name='history')
self.browser.submit(name='filter_2') self.browser.submit(name='filter_2')
@ -301,7 +325,8 @@ class HistoryPage(BasePage):
info = to_unicode(row.xpath('.//td[@class="paymentTypeInfo"]')[0].text_content().strip()) info = to_unicode(row.xpath('.//td[@class="paymentTypeInfo"]')[0].text_content().strip())
trans.raw = info + u' ' + trans.label trans.raw = info + u' ' + trans.label
if u'Authorization' in info or u'Autorisation' in info: if u'Authorization' in info or u'Autorisation' in info or \
u'Order' in info:
continue continue
if u'Credit Card' in trans.label or u'Carte bancaire' in trans.label: if u'Credit Card' in trans.label or u'Carte bancaire' in trans.label:

29
modules/paypal/test.py Normal file
View file

@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Oleg Plakhotniuk
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
class PaypalTest(BackendTest):
BACKEND = 'paypal'
def test_balance(self):
for account in self.backend.iter_accounts():
balance = sum(t.amount for t in self.backend.iter_history(account))
self.assertEqual(balance, account.balance)