CrAgr: trivia: reordered methods within the AccountsList class.
This commit is contained in:
parent
ca4240cb19
commit
63dbc13322
1 changed files with 101 additions and 101 deletions
|
|
@ -35,6 +35,37 @@ class AccountsList(CragrBasePage):
|
||||||
based on its URL.
|
based on its URL.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def is_accounts_list(self):
|
||||||
|
"""
|
||||||
|
Returns True if the current page appears to be the page dedicated to
|
||||||
|
list the accounts.
|
||||||
|
"""
|
||||||
|
# we check for the presence of a "mes comptes titres" link_id
|
||||||
|
link = self.document.xpath('/html/body//a[contains(text(), "comptes titres")]')
|
||||||
|
return bool(link)
|
||||||
|
|
||||||
|
def is_account_page(self):
|
||||||
|
"""
|
||||||
|
Returns True if the current page appears to be a page dedicated to list
|
||||||
|
the history of a specific account.
|
||||||
|
"""
|
||||||
|
# tested on CA Lorraine, Paris, Toulouse
|
||||||
|
title_spans = self.document.xpath('/html/body//div[@class="dv"]/span')
|
||||||
|
for title_span in title_spans:
|
||||||
|
title_text = title_span.text_content().strip().replace("\n", '')
|
||||||
|
if (re.match('.*Compte.*n.*[0-9]+', title_text, flags=re.IGNORECASE)):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_transfer_page(self):
|
||||||
|
"""
|
||||||
|
Returns True if the current page appears to be the page dedicated to
|
||||||
|
order transfers between accounts.
|
||||||
|
"""
|
||||||
|
source_account_select_field = self.document.xpath('/html/body//form//select[@name="numCompteEmetteur"]')
|
||||||
|
target_account_select_field = self.document.xpath('/html/body//form//select[@name="numCompteBeneficiaire"]')
|
||||||
|
return bool(source_account_select_field) and bool(target_account_select_field)
|
||||||
|
|
||||||
def get_list(self):
|
def get_list(self):
|
||||||
"""
|
"""
|
||||||
Returns the list of available bank accounts
|
Returns the list of available bank accounts
|
||||||
|
|
@ -81,36 +112,70 @@ class AccountsList(CragrBasePage):
|
||||||
l.append(account)
|
l.append(account)
|
||||||
return l
|
return l
|
||||||
|
|
||||||
def is_accounts_list(self):
|
def get_history(self, start_index=0, start_offset=0):
|
||||||
"""
|
"""
|
||||||
Returns True if the current page appears to be the page dedicated to
|
Returns the history of a specific account. Note that this function
|
||||||
list the accounts.
|
expects the current page to be the one dedicated to this history.
|
||||||
"""
|
start_index is the id used for the first created operation.
|
||||||
# we check for the presence of a "mes comptes titres" link_id
|
start_offset allows ignoring the `n' first Transactions on the page.
|
||||||
link = self.document.xpath('/html/body//a[contains(text(), "comptes titres")]')
|
|
||||||
return bool(link)
|
|
||||||
|
|
||||||
def is_account_page(self):
|
|
||||||
"""
|
|
||||||
Returns True if the current page appears to be a page dedicated to list
|
|
||||||
the history of a specific account.
|
|
||||||
"""
|
"""
|
||||||
# tested on CA Lorraine, Paris, Toulouse
|
# tested on CA Lorraine, Paris, Toulouse
|
||||||
title_spans = self.document.xpath('/html/body//div[@class="dv"]/span')
|
# avoir parsing the page as an account-dedicated page if it is not the case
|
||||||
for title_span in title_spans:
|
if not self.is_account_page():
|
||||||
title_text = title_span.text_content().strip().replace("\n", '')
|
return
|
||||||
if (re.match('.*Compte.*n.*[0-9]+', title_text, flags=re.IGNORECASE)):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
def is_transfer_page(self):
|
# Step 1: extract text tokens
|
||||||
"""
|
tokens = []
|
||||||
Returns True if the current page appears to be the page dedicated to
|
token_extractor = TokenExtractor()
|
||||||
order transfers between accounts.
|
for div in self.document.getiterator('div'):
|
||||||
"""
|
if div.attrib.get('class', '') in ('dv'):
|
||||||
source_account_select_field = self.document.xpath('/html/body//form//select[@name="numCompteEmetteur"]')
|
self.logger.debug("Analyzing div %s" % div)
|
||||||
target_account_select_field = self.document.xpath('/html/body//form//select[@name="numCompteBeneficiaire"]')
|
for token in token_extractor.extract_tokens(div):
|
||||||
return bool(source_account_select_field) and bool(target_account_select_field)
|
self.logger.debug('Extracted text token: "%s"' % token)
|
||||||
|
tokens.append(token)
|
||||||
|
|
||||||
|
# Step 2: convert tokens into operations
|
||||||
|
# Notes:
|
||||||
|
# * the code below expects pieces of information to be in the date-label-amount order;
|
||||||
|
# could we achieve a heuristic smart enough to guess this order?
|
||||||
|
# * unlike the former code, we parse every operation
|
||||||
|
operations = []
|
||||||
|
current_operation = {}
|
||||||
|
for token in tokens:
|
||||||
|
self.logger.debug('Analyzing token: "%s"' % token)
|
||||||
|
date_analysis = self.look_like_date_only(token)
|
||||||
|
if date_analysis:
|
||||||
|
current_operation = {}
|
||||||
|
current_operation['date'] = date_analysis.groups()[0]
|
||||||
|
else:
|
||||||
|
date_desc_analysis = self.look_like_date_and_description(token)
|
||||||
|
if date_desc_analysis:
|
||||||
|
current_operation = {}
|
||||||
|
current_operation['date'] = date_desc_analysis.groups()[0]
|
||||||
|
current_operation['label'] = date_desc_analysis.groups()[1]
|
||||||
|
elif self.look_like_amount(token):
|
||||||
|
# we consider the amount is the last information we get for an operation
|
||||||
|
current_operation['amount'] = self.clean_amount(token)
|
||||||
|
if current_operation.get('label') is not None and current_operation.get('date') is not None:
|
||||||
|
self.logger.debug('Parsed operation: %s: %s: %s' % (current_operation['date'], current_operation['label'], current_operation['amount']))
|
||||||
|
operations.append(current_operation)
|
||||||
|
current_operation = {}
|
||||||
|
else:
|
||||||
|
if current_operation.get('label') is not None:
|
||||||
|
current_operation['label'] = u'%s %s' % (current_operation['label'], token)
|
||||||
|
else:
|
||||||
|
current_operation['label'] = token
|
||||||
|
|
||||||
|
# Step 3: yield adequate transactions
|
||||||
|
index = start_index
|
||||||
|
for op in operations[start_offset:]:
|
||||||
|
self.logger.debug('will yield the following transaction with index %d: %s: %s: %s' % (index, op['date'], op['label'], op['amount']))
|
||||||
|
transaction = Transaction(index)
|
||||||
|
index += 1
|
||||||
|
transaction.amount = op['amount']
|
||||||
|
transaction.date = self.date_from_string(op['date'])
|
||||||
|
transaction.raw = op['label']
|
||||||
|
yield transaction
|
||||||
|
|
||||||
def get_transfer_accounts(self, select_name):
|
def get_transfer_accounts(self, select_name):
|
||||||
"""
|
"""
|
||||||
|
|
@ -219,6 +284,16 @@ class AccountsList(CragrBasePage):
|
||||||
year = today.year
|
year = today.year
|
||||||
return date(year, month, day)
|
return date(year, month, day)
|
||||||
|
|
||||||
|
def clean_amount(self, amount):
|
||||||
|
"""
|
||||||
|
Removes weird characters and converts to a Decimal
|
||||||
|
>>> clean_amount(u'1 000,00 $')
|
||||||
|
1000.0
|
||||||
|
"""
|
||||||
|
data = amount.replace(',', '.').replace(' ', '').replace(u'\xa0', '')
|
||||||
|
matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data)
|
||||||
|
return Decimal(matches[0]) if (matches) else Decimal(0)
|
||||||
|
|
||||||
def look_like_account_owner(self, string):
|
def look_like_account_owner(self, string):
|
||||||
""" Returns a date object built from a given day/month pair. """
|
""" Returns a date object built from a given day/month pair. """
|
||||||
result = re.match('^\s*(M\.|Mr|Mme|Mlle|Monsieur|Madame|Mademoiselle)', string, re.IGNORECASE)
|
result = re.match('^\s*(M\.|Mr|Mme|Mlle|Monsieur|Madame|Mademoiselle)', string, re.IGNORECASE)
|
||||||
|
|
@ -256,78 +331,3 @@ class AccountsList(CragrBasePage):
|
||||||
result = re.search('^\s*((?:[012][0-9]|3[01])/(?:0[1-9]|1[012]))\s+(.+)\s*$', string)
|
result = re.search('^\s*((?:[012][0-9]|3[01])/(?:0[1-9]|1[012]))\s+(.+)\s*$', string)
|
||||||
self.logger.debug('Does "%s" look like a date+description pair? %s', string, ('yes' if result else 'no'))
|
self.logger.debug('Does "%s" look like a date+description pair? %s', string, ('yes' if result else 'no'))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def clean_amount(self, amount):
|
|
||||||
"""
|
|
||||||
Removes weird characters and converts to a Decimal
|
|
||||||
>>> clean_amount(u'1 000,00 $')
|
|
||||||
1000.0
|
|
||||||
"""
|
|
||||||
data = amount.replace(',', '.').replace(' ', '').replace(u'\xa0', '')
|
|
||||||
matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data)
|
|
||||||
return Decimal(matches[0]) if (matches) else Decimal(0)
|
|
||||||
|
|
||||||
def get_history(self, start_index=0, start_offset=0):
|
|
||||||
"""
|
|
||||||
Returns the history of a specific account. Note that this function
|
|
||||||
expects the current page to be the one dedicated to this history.
|
|
||||||
start_index is the id used for the first created operation.
|
|
||||||
start_offset allows ignoring the `n' first Transactions on the page.
|
|
||||||
"""
|
|
||||||
# tested on CA Lorraine, Paris, Toulouse
|
|
||||||
# avoir parsing the page as an account-dedicated page if it is not the case
|
|
||||||
if not self.is_account_page():
|
|
||||||
return
|
|
||||||
|
|
||||||
# Step 1: extract text tokens
|
|
||||||
tokens = []
|
|
||||||
token_extractor = TokenExtractor()
|
|
||||||
for div in self.document.getiterator('div'):
|
|
||||||
if div.attrib.get('class', '') in ('dv'):
|
|
||||||
self.logger.debug("Analyzing div %s" % div)
|
|
||||||
for token in token_extractor.extract_tokens(div):
|
|
||||||
self.logger.debug('Extracted text token: "%s"' % token)
|
|
||||||
tokens.append(token)
|
|
||||||
|
|
||||||
# Step 2: convert tokens into operations
|
|
||||||
# Notes:
|
|
||||||
# * the code below expects pieces of information to be in the date-label-amount order;
|
|
||||||
# could we achieve a heuristic smart enough to guess this order?
|
|
||||||
# * unlike the former code, we parse every operation
|
|
||||||
operations = []
|
|
||||||
current_operation = {}
|
|
||||||
for token in tokens:
|
|
||||||
self.logger.debug('Analyzing token: "%s"' % token)
|
|
||||||
date_analysis = self.look_like_date_only(token)
|
|
||||||
if date_analysis:
|
|
||||||
current_operation = {}
|
|
||||||
current_operation['date'] = date_analysis.groups()[0]
|
|
||||||
else:
|
|
||||||
date_desc_analysis = self.look_like_date_and_description(token)
|
|
||||||
if date_desc_analysis:
|
|
||||||
current_operation = {}
|
|
||||||
current_operation['date'] = date_desc_analysis.groups()[0]
|
|
||||||
current_operation['label'] = date_desc_analysis.groups()[1]
|
|
||||||
elif self.look_like_amount(token):
|
|
||||||
# we consider the amount is the last information we get for an operation
|
|
||||||
current_operation['amount'] = self.clean_amount(token)
|
|
||||||
if current_operation.get('label') is not None and current_operation.get('date') is not None:
|
|
||||||
self.logger.debug('Parsed operation: %s: %s: %s' % (current_operation['date'], current_operation['label'], current_operation['amount']))
|
|
||||||
operations.append(current_operation)
|
|
||||||
current_operation = {}
|
|
||||||
else:
|
|
||||||
if current_operation.get('label') is not None:
|
|
||||||
current_operation['label'] = u'%s %s' % (current_operation['label'], token)
|
|
||||||
else:
|
|
||||||
current_operation['label'] = token
|
|
||||||
|
|
||||||
# Step 3: yield adequate transactions
|
|
||||||
index = start_index
|
|
||||||
for op in operations[start_offset:]:
|
|
||||||
self.logger.debug('will yield the following transaction with index %d: %s: %s: %s' % (index, op['date'], op['label'], op['amount']))
|
|
||||||
transaction = Transaction(index)
|
|
||||||
index += 1
|
|
||||||
transaction.amount = op['amount']
|
|
||||||
transaction.date = self.date_from_string(op['date'])
|
|
||||||
transaction.raw = op['label']
|
|
||||||
yield transaction
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue