Paypal transactions history fetching with adaptive steps.

Signed-off-by: Oleg Plakhotniuk <olegus8@gmail.com>

closes #1406
This commit is contained in:
Oleg Plakhotniuk 2014-06-23 15:25:43 -05:00 committed by Laurent Bachelier
commit 983ef7b925

View file

@ -77,66 +77,72 @@ class Paypal(BaseBrowser):
return self.page.get_account(_id) return self.page.get_account(_id)
def get_history(self, account): def get_history(self, account, step_min=90, step_max=365*10):
self.history(start=self.BEGINNING, end=datetime.date.today()) def fetch_fn(start, end):
parse = True def transactions():
while parse: parse = True
for trans in self.page.iter_transactions(account): while parse:
yield trans for trans in self.page.iter_transactions(account):
parse = self.page.next() yield trans
parse = self.page.next()
self.history(start=start, end=end)
if next(self.page.parse(), False):
return transactions()
return self.smart_fetch(beginning=self.BEGINNING,
end=datetime.date.today(),
step_min=step_min,
step_max=step_max,
fetch_fn=fetch_fn)
def history(self, start, end): def history(self, start, end):
self.location('/en/cgi-bin/webscr?cmd=_history&nav=0.3.0') self.location('/en/cgi-bin/webscr?cmd=_history&nav=0.3.0')
self.page.filter(start, end) self.page.filter(start, end)
assert self.is_on_page(HistoryPage) assert self.is_on_page(HistoryPage)
def get_download_history(self, account): def get_download_history(self, account, step_min=90, step_max=365*2):
for csv in self.download_history(): def fetch_fn(start, end):
for trans in self.page.iter_transactions(account): if self.download_history(start, end).rows:
yield trans return self.page.iter_transactions(account)
assert step_max <= 365*2 # PayPal limitations as of 2014-06-16
return self.smart_fetch(beginning=self.BEGINNING,
end=datetime.date.today(),
step_min=step_min,
step_max=step_max,
fetch_fn=fetch_fn)
def period_has_trans(self, start, end): def smart_fetch(self, beginning, end, step_min, step_max, fetch_fn):
""" """
Checks if there're any transactions in a given period. Fetches transactions in small chunks to avoid request timeouts.
Time period of each requested chunk is adjusted dynamically.
""" """
self.history(start, end) FACTOR = 2
return next(self.page.parse(), False) or self.page.next() step = step_min
while end > beginning:
start = end - datetime.timedelta(step)
chunk = fetch_fn(start, end)
end = start - datetime.timedelta(1)
if chunk:
# If there're transactions in current period,
# decrease the period.
step = max(step_min, step/FACTOR)
for trans in chunk:
yield trans
else:
# If there's no transactions in current period,
# increase the period.
step = min(step_max, step*FACTOR)
def bisect_oldest_date(self, start, end, steps=5): def download_history(self, start, end):
"""
Finds an approximate beginning of transactions history in a
given number of iterations.
"""
if not steps:
return start
middle = start + (end-start)/2
if self.period_has_trans(start, middle):
return self.bisect_oldest_date(start, middle, steps-1)
else:
return self.bisect_oldest_date(middle, end, steps-1)
def download_history(self, step=90):
""" """
Download CSV history. Download CSV history.
However, it is not normalized, and sometimes the download is refused However, it is not normalized, and sometimes the download is refused
and sent later by mail. and sent later by mail.
""" """
# PayPal limitations as of 2014-06-16 self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1')
assert step <= 365*2 assert self.is_on_page(DownloadHistoryPage)
self.page.download(start, end)
# To minimize the number of CSV requests, let's first find an assert self.is_on_page(SubmitPage)
# approximate starting point of transaction history. return self.page.document
end = datetime.date.today()
beginning = self.bisect_oldest_date(self.BEGINNING, end)
while end > beginning:
start = end - datetime.timedelta(step)
self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1')
assert self.is_on_page(DownloadHistoryPage)
self.page.download(start, end)
assert self.is_on_page(SubmitPage)
yield self.page.document
end = start - datetime.timedelta(1)
def transfer(self, from_id, to_id, amount, reason=None): def transfer(self, from_id, to_id, amount, reason=None):
raise NotImplementedError() raise NotImplementedError()