CrAgr: optimized operations retrieval for CA Centre by adding a start_offset parameter to AccountsList.get_history
This commit is contained in:
parent
51f8efc41d
commit
43d562a338
2 changed files with 35 additions and 19 deletions
|
|
@ -131,26 +131,29 @@ class Cragr(BaseBrowser):
|
|||
return None
|
||||
|
||||
def get_history(self, account):
|
||||
page_url = account.link_id
|
||||
history_url = account.link_id
|
||||
operations_count = 0
|
||||
while (page_url):
|
||||
# 1st, go on the account page
|
||||
self.logger.debug('going on: %s' % page_url)
|
||||
self.location('https://%s%s' % (self.DOMAIN, page_url))
|
||||
|
||||
# then, expand all history
|
||||
# (it's not a next page, but more operation on one page)
|
||||
# tested on CA centre
|
||||
while True:
|
||||
history_url = self.page.expand_history_page_url()
|
||||
if not history_url :
|
||||
break
|
||||
self.location(history_url)
|
||||
# 1st, go on the account page
|
||||
self.logger.debug('going on: %s' % history_url)
|
||||
self.location('https://%s%s' % (self.DOMAIN, history_url))
|
||||
|
||||
for page_operation in self.page.get_history(operations_count):
|
||||
# Some regions have a "Show more" (well, actually "Voir les 25
|
||||
# suivants") link we have to use to get all the operations.
|
||||
# However, it does not show only the 25 next results, it *adds* them
|
||||
# to the current view. Therefore, we have to parse each new page using
|
||||
# an offset, in order to ignore all already-fetched operations.
|
||||
# This especially occurs on CA Centre.
|
||||
use_expand_url = bool(self.page.expand_history_page_url())
|
||||
while (history_url):
|
||||
# we skip "operations_count" operations on each page if we are in the case described above
|
||||
operations_offset = operations_count if use_expand_url else 0
|
||||
for page_operation in self.page.get_history(operations_count, operations_offset):
|
||||
operations_count += 1
|
||||
yield page_operation
|
||||
page_url = self.page.next_page_url()
|
||||
history_url = self.page.expand_history_page_url() if use_expand_url else self.page.next_page_url()
|
||||
self.logger.debug('going on: %s' % history_url)
|
||||
self.location('https://%s%s' % (self.DOMAIN, history_url))
|
||||
|
||||
def dict_find_value(self, dictionary, value):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -173,10 +173,12 @@ class AccountsList(CragrBasePage):
|
|||
data = re.sub(' +', ' ', data.replace("\n", ' ').strip())
|
||||
return data
|
||||
|
||||
def get_history(self, start_index = 0):
|
||||
def get_history(self, start_index = 0, start_offset = 0):
|
||||
"""
|
||||
Returns the history of a specific account. Note that this function
|
||||
expects the current page page to be the one dedicated to this history.
|
||||
expects the current page to be the one dedicated to this history.
|
||||
start_index is the id used for the first created operation.
|
||||
start_offset allows ignoring the `n' first Operations on the page.
|
||||
"""
|
||||
# tested on CA Lorraine, Paris, Toulouse
|
||||
# avoir parsing the page as an account-dedicated page if it is not the case
|
||||
|
|
@ -185,6 +187,7 @@ class AccountsList(CragrBasePage):
|
|||
|
||||
index = start_index
|
||||
operation = False
|
||||
skipped = 0
|
||||
|
||||
body_elmt_list = self.document.xpath('/html/body/*')
|
||||
|
||||
|
|
@ -229,6 +232,9 @@ class AccountsList(CragrBasePage):
|
|||
if table_layout:
|
||||
lines = self.document.xpath('id("operationsContent")//table[@class="tb"]/tr')
|
||||
for line in lines:
|
||||
if skipped < start_offset:
|
||||
skipped += 1
|
||||
continue
|
||||
operation = Operation(index)
|
||||
index += 1
|
||||
operation.date = self.extract_text(line[0])
|
||||
|
|
@ -237,6 +243,10 @@ class AccountsList(CragrBasePage):
|
|||
yield operation
|
||||
elif (not alternate_layout):
|
||||
for body_elmt in interesting_divs:
|
||||
if skipped < start_offset:
|
||||
if self.is_right_aligned_div(body_elmt):
|
||||
skipped += 1
|
||||
continue
|
||||
if (self.is_right_aligned_div(body_elmt)):
|
||||
# this is the second line of an operation entry, displaying the amount
|
||||
operation.amount = clean_amount(self.extract_text(body_elmt))
|
||||
|
|
@ -255,6 +265,9 @@ class AccountsList(CragrBasePage):
|
|||
operation.label = u'Unknown'
|
||||
else:
|
||||
for i in range(0, len(interesting_divs)/3):
|
||||
if skipped < start_offset:
|
||||
skipped += 1
|
||||
continue
|
||||
operation = Operation(index)
|
||||
index += 1
|
||||
# amount
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue