Implemented history operation for CrAgr backend
Signed-off-by: Xavier G <xavier@tuxfamily.org> Signed-off-by: Romain Bignon <romain@peerfuse.org>
This commit is contained in:
parent
2a2ad84d8d
commit
f338439bd7
3 changed files with 130 additions and 3 deletions
|
|
@ -59,5 +59,5 @@ class CragrBackend(BaseBackend, ICapBank):
|
||||||
return iter([])
|
return iter([])
|
||||||
|
|
||||||
def iter_history(self, account):
|
def iter_history(self, account):
|
||||||
""" TODO Not supported yet """
|
for history in self.browser.get_history(account):
|
||||||
return iter([])
|
yield history
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ class Cragr(BaseBrowser):
|
||||||
self.PAGES = {'https://%s/' % website: pages.LoginPage,
|
self.PAGES = {'https://%s/' % website: pages.LoginPage,
|
||||||
'https://%s/.*\.c.*' % website: pages.AccountsList,
|
'https://%s/.*\.c.*' % website: pages.AccountsList,
|
||||||
'https://%s/login/process' % website: pages.AccountsList,
|
'https://%s/login/process' % website: pages.AccountsList,
|
||||||
|
'https://%s/accounting/listOperations' % website: pages.AccountsList,
|
||||||
}
|
}
|
||||||
BaseBrowser.__init__(self, *args, **kwargs)
|
BaseBrowser.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
|
|
@ -74,11 +75,21 @@ class Cragr(BaseBrowser):
|
||||||
|
|
||||||
l = self.get_accounts_list()
|
l = self.get_accounts_list()
|
||||||
for a in l:
|
for a in l:
|
||||||
if a.id == id:
|
if a.id == ('%s' % id):
|
||||||
return a
|
return a
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_history(self, account):
|
||||||
|
page_url = account.link_id
|
||||||
|
operations_count = 0
|
||||||
|
while (page_url):
|
||||||
|
self.location('https://%s%s' % (self.DOMAIN, page_url))
|
||||||
|
for page_operation in self.page.get_history(operations_count):
|
||||||
|
operations_count += 1
|
||||||
|
yield page_operation
|
||||||
|
page_url = self.page.next_page_url()
|
||||||
|
|
||||||
#def get_coming_operations(self, account):
|
#def get_coming_operations(self, account):
|
||||||
# if not self.is_on_page(pages.AccountComing) or self.page.account.id != account.id:
|
# if not self.is_on_page(pages.AccountComing) or self.page.account.id != account.id:
|
||||||
# self.location('/NS_AVEEC?ch4=%s' % account.link_id)
|
# self.location('/NS_AVEEC?ch4=%s' % account.link_id)
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,10 @@
|
||||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
from weboob.capabilities.bank import Account
|
from weboob.capabilities.bank import Account
|
||||||
from .base import CragrBasePage
|
from .base import CragrBasePage
|
||||||
|
from weboob.capabilities.bank import Operation
|
||||||
|
|
||||||
class AccountsList(CragrBasePage):
|
class AccountsList(CragrBasePage):
|
||||||
def get_list(self):
|
def get_list(self):
|
||||||
|
|
@ -29,11 +31,13 @@ class AccountsList(CragrBasePage):
|
||||||
if div.getchildren()[0].tag == 'a':
|
if div.getchildren()[0].tag == 'a':
|
||||||
# This is at least present on CA Nord-Est
|
# This is at least present on CA Nord-Est
|
||||||
account.label = ' '.join(div.find('a').text.split()[:-1])
|
account.label = ' '.join(div.find('a').text.split()[:-1])
|
||||||
|
account.link_id = div.find('a').get('href', '')
|
||||||
account.id = div.find('a').text.split()[-1]
|
account.id = div.find('a').text.split()[-1]
|
||||||
s = div.find('div').find('b').find('span').text
|
s = div.find('div').find('b').find('span').text
|
||||||
else:
|
else:
|
||||||
# This is at least present on CA Toulouse
|
# This is at least present on CA Toulouse
|
||||||
account.label = div.find('a').text.strip()
|
account.label = div.find('a').text.strip()
|
||||||
|
account.link_id = div.find('a').get('href', '')
|
||||||
account.id = div.findall('br')[1].tail.strip()
|
account.id = div.findall('br')[1].tail.strip()
|
||||||
s = div.find('div').find('span').find('b').text
|
s = div.find('div').find('span').find('b').text
|
||||||
balance = u''
|
balance = u''
|
||||||
|
|
@ -45,3 +49,115 @@ class AccountsList(CragrBasePage):
|
||||||
account.balance = float(balance)
|
account.balance = float(balance)
|
||||||
l.append(account)
|
l.append(account)
|
||||||
return l
|
return l
|
||||||
|
|
||||||
|
def is_account_page(self):
|
||||||
|
# tested on CA Lorraine, Paris, Toulouse
|
||||||
|
title_spans = self.document.xpath('/html/body/div[@class="dv"]/span')
|
||||||
|
for title_span in title_spans:
|
||||||
|
title_text = title_span.text_content().strip().replace("\n", '')
|
||||||
|
if (re.match('.*Compte.*n.[0-9]+.*au.*', title_text)):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def next_page_url(self):
|
||||||
|
# tested on CA Lorraine, Paris, Toulouse
|
||||||
|
a = self.document.xpath('/html/body//div[@class="navlink"]//a[contains(text(), "Suite")]')
|
||||||
|
if not a:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return a[0].get('href', '')
|
||||||
|
|
||||||
|
def is_right_aligned_div(self, div_elmt):
|
||||||
|
return(re.match('.*text-align: ?right.*', div_elmt.get('style', '')))
|
||||||
|
|
||||||
|
def extract_text(self, xml_elmt):
|
||||||
|
data = u''
|
||||||
|
for text in xml_elmt.itertext():
|
||||||
|
data = data + u'%s ' % text
|
||||||
|
data = re.sub(' +', ' ', data.replace("\n", ' ').strip())
|
||||||
|
return data
|
||||||
|
|
||||||
|
def get_history(self, start_index = 0):
|
||||||
|
# tested on CA Lorraine, Paris, Toulouse
|
||||||
|
# avoir parsing the page as an account-dedicated page if it is not the case
|
||||||
|
if not self.is_account_page():
|
||||||
|
return
|
||||||
|
|
||||||
|
index = start_index
|
||||||
|
operation = False
|
||||||
|
|
||||||
|
body_elmt_list = self.document.xpath('/html/body/*')
|
||||||
|
|
||||||
|
# type of separator used in the page
|
||||||
|
separators = 'hr'
|
||||||
|
# How many <hr> elements do we have under the <body>?
|
||||||
|
sep_expected = len(self.document.xpath('/html/body/hr'))
|
||||||
|
if (not sep_expected):
|
||||||
|
# no <hr>? Then how many class-less <div> used as separators instead?
|
||||||
|
sep_expected = len(self.document.xpath('/html/body/div[not(@class) and not(@style)]'))
|
||||||
|
separators = 'div'
|
||||||
|
|
||||||
|
# the interesting divs are after the <hr> elements
|
||||||
|
interesting_divs = []
|
||||||
|
right_div_count = 0
|
||||||
|
left_div_count = 0
|
||||||
|
sep_found = 0
|
||||||
|
for body_elmt in body_elmt_list:
|
||||||
|
if (separators == 'hr' and body_elmt.tag == 'hr'):
|
||||||
|
sep_found += 1
|
||||||
|
elif (separators == 'div' and body_elmt.tag == 'div' and body_elmt.get('class', 'nope') == 'nope'):
|
||||||
|
sep_found += 1
|
||||||
|
elif (sep_found >= sep_expected and body_elmt.tag == 'div'):
|
||||||
|
# we just want <div> with dv class and a style attribute
|
||||||
|
if (body_elmt.get('class', '') != 'dv'):
|
||||||
|
continue
|
||||||
|
if (body_elmt.get('style', 'nope') == 'nope'):
|
||||||
|
continue
|
||||||
|
interesting_divs.append(body_elmt)
|
||||||
|
if (self.is_right_aligned_div(body_elmt)):
|
||||||
|
right_div_count += 1
|
||||||
|
else:
|
||||||
|
left_div_count += 1
|
||||||
|
|
||||||
|
# So, how are data laid out?
|
||||||
|
toulouse_way_of_life = (left_div_count == 2 * right_div_count)
|
||||||
|
# we'll have: one left-aligned div for the date, one right-aligned
|
||||||
|
# div for the amount, and one left-aligned div for the label. Each time.
|
||||||
|
|
||||||
|
if (not toulouse_way_of_life):
|
||||||
|
for body_elmt in interesting_divs:
|
||||||
|
if (self.is_right_aligned_div(body_elmt)):
|
||||||
|
# this is the second line of an operation entry, displaying the amount
|
||||||
|
data = self.extract_text(body_elmt).replace(',', '.').replace(' ', '')
|
||||||
|
matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data)
|
||||||
|
operation.amount = float(matches[0]) if (matches) else 0.0
|
||||||
|
yield operation
|
||||||
|
else:
|
||||||
|
# this is the first line of an operation entry, displaying the date and label
|
||||||
|
data = self.extract_text(body_elmt)
|
||||||
|
matches = re.findall('^([012][0-9]|3[01])/(0[1-9]|1[012]).(.+)$', data)
|
||||||
|
operation = Operation(index)
|
||||||
|
index += 1
|
||||||
|
if (matches):
|
||||||
|
operation.date = u'%s/%s' % (matches[0][0], matches[0][1])
|
||||||
|
operation.label = u'%s' % matches[0][2]
|
||||||
|
else:
|
||||||
|
operation.date = u'01/01'
|
||||||
|
operation.label = u'Unknown'
|
||||||
|
else:
|
||||||
|
for i in range(0, len(interesting_divs)/3):
|
||||||
|
operation = Operation(index)
|
||||||
|
index += 1
|
||||||
|
# amount
|
||||||
|
data = self.extract_text(interesting_divs[(i*3)+1]).replace(',', '.').replace(' ', '')
|
||||||
|
matches = re.findall('^(-?[0-9]+\.[0-9]{2}).*$', data)
|
||||||
|
operation.amount = float(matches[0]) if (matches) else 0.0
|
||||||
|
# date
|
||||||
|
data = self.extract_text(interesting_divs[i*3])
|
||||||
|
matches = re.findall('^([012][0-9]|3[01])/(0[1-9]|1[012])', data)
|
||||||
|
operation.date = u'%s/%s' % (matches[0][0], matches[0][1]) if (matches) else u'01/01'
|
||||||
|
#label
|
||||||
|
data = self.extract_text(interesting_divs[(i*3)+2])
|
||||||
|
data = re.sub(' +', ' ', data)
|
||||||
|
operation.label = u'%s' % data
|
||||||
|
yield operation
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue