support new Crédit Agricole website

This commit is contained in:
Romain Bignon 2013-03-18 15:52:36 +01:00
commit 240efb85f4
11 changed files with 334 additions and 5 deletions

View file

View file

@ -0,0 +1,277 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2009-2013 Romain Bignon, Xavier Guerrin
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
from weboob.tools.date import LinearDateGuesser
from weboob.capabilities.bank import Transfer, TransferError
from .pages import LoginPage, AccountsList
import mechanize
from datetime import datetime
import re
__all__ = ['CragrMobile']
class CragrMobile(BaseBrowser):
PROTOCOL = 'https'
ENCODING = 'utf-8'
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
# a session id that is sometimes added, and should be ignored when matching pages
SESSION_REGEXP = '(?:|%s[A-Z0-9]+)' % re.escape(r';jsessionid=')
is_logging = False
def __init__(self, website, *args, **kwargs):
self.DOMAIN = website
self.PAGES = {'https://[^/]+/': LoginPage,
'https://[^/]+/.*\.c.*': AccountsList,
'https://[^/]+/login/process%s' % self.SESSION_REGEXP: AccountsList,
'https://[^/]+/accounting/listAccounts': AccountsList,
'https://[^/]+/accounting/listOperations': AccountsList,
'https://[^/]+/accounting/showAccountDetail.+': AccountsList,
'https://[^/]+/accounting/showMoreAccountOperations.*': AccountsList,
}
BaseBrowser.__init__(self, *args, **kwargs)
def viewing_html(self):
"""
As the fucking HTTP server returns a document in unknown mimetype
'application/vnd.wap.xhtml+xml' it is not recognized by mechanize.
So this is a fucking hack.
"""
return True
def is_logged(self):
logged = self.page and self.page.is_logged() or self.is_logging
self.logger.debug('logged: %s' % (logged and 'yes' or 'no'))
return logged
def login(self):
"""
Attempt to log in.
Note: this method does nothing if we are already logged in.
"""
assert isinstance(self.username, basestring)
assert isinstance(self.password, basestring)
# Do we really need to login?
if self.is_logged():
self.logger.debug('already logged in')
return
self.is_logging = True
# Are we on the good page?
if not self.is_on_page(LoginPage):
self.logger.debug('going to login page')
BaseBrowser.home(self)
self.logger.debug('attempting to log in')
self.page.login(self.username, self.password)
self.is_logging = False
if not self.is_logged():
raise BrowserIncorrectPassword()
self.addheaders = [
['User-agent', self.USER_AGENTS['desktop_firefox']]
]
def get_accounts_list(self):
self.logger.debug('accounts list required')
self.home()
return self.page.get_list()
def home(self):
"""
Ensure we are both logged and on the accounts list.
"""
self.logger.debug('accounts list page required')
if self.is_on_page(AccountsList) and self.page.is_accounts_list():
self.logger.debug('already on accounts list')
return
# simply go to http(s)://the.doma.in/
BaseBrowser.home(self)
if self.is_on_page(LoginPage):
if not self.is_logged():
# So, we are not logged on the login page -- what about logging ourselves?
self.login()
# we assume we are logged in
# for some regions, we may stay on the login page once we're
# logged in, without being redirected...
if self.is_on_page(LoginPage):
# ... so we have to move by ourselves
self.move_to_accounts_list()
def move_to_accounts_list(self):
"""
For regions where you can stay on http(s)://the.doma.in/ while you are
logged in, move to the accounts list
"""
self.location('%s://%s/accounting/listAccounts' % (self.PROTOCOL, self.DOMAIN))
def get_account(self, id):
assert isinstance(id, basestring)
l = self.get_accounts_list()
for a in l:
if a.id == ('%s' % id):
return a
return None
def get_history(self, account):
# some accounts may exist without a link to any history page
if account._link_id is None:
return
history_url = account._link_id
operations_count = 0
# 1st, go on the account page
self.logger.debug('going on: %s' % history_url)
self.location('https://%s%s' % (self.DOMAIN, history_url))
if self.page is None:
return
# Some regions have a "Show more" (well, actually "Voir les 25
# suivants") link we have to use to get all the operations.
# However, it does not show only the 25 next results, it *adds* them
# to the current view. Therefore, we have to parse each new page using
# an offset, in order to ignore all already-fetched operations.
# This especially occurs on CA Centre.
use_expand_url = bool(self.page.expand_history_page_url())
date_guesser = LinearDateGuesser()
while True:
# we skip "operations_count" operations on each page if we are in the case described above
operations_offset = operations_count if use_expand_url else 0
for page_operation in self.page.get_history(date_guesser, operations_count, operations_offset):
operations_count += 1
yield page_operation
history_url = self.page.expand_history_page_url() if use_expand_url else self.page.next_page_url()
if not history_url:
break
self.logger.debug('going on: %s' % history_url)
self.location('https://%s%s' % (self.DOMAIN, history_url))
def dict_find_value(self, dictionary, value):
"""
Returns the first key pointing on the given value, or None if none
is found.
"""
for k, v in dictionary.iteritems():
if v == value:
return k
return None
def do_transfer(self, account, to, amount, reason=None):
"""
Transfer the given amount of money from an account to another,
tagging the transfer with the given reason.
"""
# access the transfer page
transfer_page_unreachable_message = u'Could not reach the transfer page.'
self.home()
if not self.page.is_accounts_list():
raise TransferError(transfer_page_unreachable_message)
operations_url = self.page.operations_page_url()
self.location('https://%s%s' % (self.DOMAIN, operations_url))
transfer_url = self.page.transfer_page_url()
abs_transfer_url = 'https://%s%s' % (self.DOMAIN, transfer_url)
self.location(abs_transfer_url)
if not self.page.is_transfer_page():
raise TransferError(transfer_page_unreachable_message)
source_accounts = self.page.get_transfer_source_accounts()
target_accounts = self.page.get_transfer_target_accounts()
# check that the given source account can be used
if not account in source_accounts.values():
raise TransferError('You cannot use account %s as a source account.' % account)
# check that the given source account can be used
if not to in target_accounts.values():
raise TransferError('You cannot use account %s as a target account.' % to)
# separate euros from cents
amount_euros = int(amount)
amount_cents = int((amount * 100) - (amount_euros * 100))
# let's circumvent https://github.com/jjlee/mechanize/issues/closed#issue/17
# using http://wwwsearch.sourceforge.net/mechanize/faq.html#usage
adjusted_response = self.response().get_data().replace('<br/>', '<br />')
response = mechanize.make_response(adjusted_response, [('Content-Type', 'text/html')], abs_transfer_url, 200, 'OK')
self.set_response(response)
# fill the form
self.select_form(nr=0)
self['numCompteEmetteur'] = ['%s' % self.dict_find_value(source_accounts, account)]
self['numCompteBeneficiaire'] = ['%s' % self.dict_find_value(target_accounts, to)]
self['montantPartieEntiere'] = '%s' % amount_euros
self['montantPartieDecimale'] = '%02d' % amount_cents
if reason is not None:
self['libelle'] = reason
self.submit()
# look for known errors
content = unicode(self.response().get_data(), 'utf-8')
insufficient_amount_message = u'Montant insuffisant.'
maximum_allowed_balance_message = u'Solde maximum autorisé dépassé.'
if content.find(insufficient_amount_message) != -1:
raise TransferError('The amount you tried to transfer is too low.')
if content.find(maximum_allowed_balance_message) != -1:
raise TransferError('The maximum allowed balance for the target account has been / would be reached.')
# look for the known "all right" message
ready_for_transfer_message = u'Vous allez effectuer un virement'
if not content.find(ready_for_transfer_message):
raise TransferError('The expected message "%s" was not found.' % ready_for_transfer_message)
# submit the last form
self.select_form(nr=0)
submit_date = datetime.now()
self.submit()
# look for the known "everything went well" message
content = unicode(self.response().get_data(), 'utf-8')
transfer_ok_message = u'Vous venez d\'effectuer un virement du compte'
if not content.find(transfer_ok_message):
raise TransferError('The expected message "%s" was not found.' % transfer_ok_message)
# We now have to return a Transfer object
# the final page does not provide any transfer id, so we'll use the submit date
transfer = Transfer(submit_date.strftime('%Y%m%d%H%M%S'))
transfer.amount = amount
transfer.origin = account
transfer.recipient = to
transfer.date = submit_date
return transfer
#def get_coming_operations(self, account):
# if not self.is_on_page(AccountComing) or self.page.account.id != account.id:
# self.location('/NS_AVEEC?ch4=%s' % account._link_id)
# return self.page.get_operations()

View file

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .accounts_list import AccountsList
from .login import LoginPage
__all__ = ['AccountsList', 'LoginPage']

View file

@ -0,0 +1,325 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
from datetime import date
from weboob.capabilities.bank import Account
from .base import CragrBasePage
from .tokenextractor import TokenExtractor
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
class Transaction(FrenchTransaction):
PATTERNS = [
(re.compile('^(Vp|Vt|Vrt|Virt|Vir(ement)?)\s*(?P<text>.*)', re.IGNORECASE), FrenchTransaction.TYPE_TRANSFER),
(re.compile('^(?P<text>(Tip|Plt|Prlv|PRELEVT|Prelevement)\s*.*)', re.IGNORECASE), FrenchTransaction.TYPE_ORDER),
(re.compile('^Cheque\s*(?P<text>(No)?.*)', re.IGNORECASE), FrenchTransaction.TYPE_CHECK),
(re.compile('^(?P<text>Rem\s*Chq\s*.*)', re.IGNORECASE), FrenchTransaction.TYPE_DEPOSIT),
(re.compile('^Ret(rait)?\s*Dab\s*((?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2}))?\s*(?P<text>.*)', re.IGNORECASE),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^Paiement\s*Carte\s*(?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2})\s*(?P<text>.*)', re.IGNORECASE),
FrenchTransaction.TYPE_CARD),
(re.compile('^(?P<text>.*CAPITAL.*ECHEANCE.*)', re.IGNORECASE), FrenchTransaction.TYPE_LOAN_PAYMENT),
(re.compile('^(\*\*)?(?P<text>(frais|cotis(ation)?)\s*.*)', re.IGNORECASE), FrenchTransaction.TYPE_BANK),
(re.compile('^(?P<text>Interets\s*.*)', re.IGNORECASE), FrenchTransaction.TYPE_BANK),
(re.compile('^(?P<text>Prelev\.\s*(C\.r\.d\.s\.|R\.s\.a\.|C\.a\.p\.s\.|C\.s\.g|P\.s\.))', re.IGNORECASE),
FrenchTransaction.TYPE_BANK),
(re.compile('^(ACH.)?CARTE (?P<text>.*)', re.IGNORECASE), FrenchTransaction.TYPE_CARD),
(re.compile('^RET.CARTE (?P<text>.*)', re.IGNORECASE), FrenchTransaction.TYPE_WITHDRAWAL),
]
class AccountsList(CragrBasePage):
"""
Unlike most pages used with the Browser class, this class represents
several pages, notably accounts list, history and transfer. This is due
to the Credit Agricole not having a clear pattern to identify a page
based on its URL.
"""
def is_accounts_list(self):
"""
Returns True if the current page appears to be the page dedicated to
list the accounts.
"""
# we check for the presence of a "mes comptes titres" link_id
link = self.document.xpath('/html/body//a[contains(text(), "comptes titres")]')
return bool(link)
def is_account_page(self):
"""
Returns True if the current page appears to be a page dedicated to list
the history of a specific account.
"""
# tested on CA Lorraine, Paris, Toulouse
title_spans = self.document.xpath('/html/body//div[@class="dv"]/span')
for title_span in title_spans:
title_text = title_span.text_content().strip().replace("\n", '')
if (re.match('.*Compte.*n.*[0-9]+', title_text, flags=re.IGNORECASE)):
return True
return False
def is_transfer_page(self):
"""
Returns True if the current page appears to be the page dedicated to
order transfers between accounts.
"""
source_account_select_field = self.document.xpath('/html/body//form//select[@name="numCompteEmetteur"]')
target_account_select_field = self.document.xpath('/html/body//form//select[@name="numCompteBeneficiaire"]')
return bool(source_account_select_field) and bool(target_account_select_field)
def get_list(self):
"""
Returns the list of available bank accounts
"""
for div in self.document.getiterator('div'):
if div.attrib.get('class', '') in ('dv', 'headline') and div.getchildren()[0].tag in ('a', 'br'):
self.logger.debug("Analyzing div %s" % div)
# Step 1: extract text tokens
tokens = []
required_tokens = {}
optional_tokens = {}
token_extractor = TokenExtractor()
for token in token_extractor.extract_tokens(div):
self.logger.debug('Extracted text token: "%s"' % token)
tokens.append(token)
# Step 2: analyse tokens
for token in tokens:
if self.look_like_account_number(token):
required_tokens['account_number'] = token
elif self.look_like_amount(token):
required_tokens['account_amount'] = token
elif self.look_like_account_name(token):
required_tokens['account_name'] = token
elif self.look_like_account_owner(token):
optional_tokens['account_owner'] = token
# Step 3: create account objects
if len(required_tokens) >= 3:
account = Account()
account.label = required_tokens['account_name']
account.id = required_tokens['account_number']
account.balance = FrenchTransaction.clean_amount(required_tokens['account_amount'])
account.currency = account.get_currency(required_tokens['account_amount'])
# we found almost all required information to create an account object
self.logger.debug('Found account %s with number %s and balance = %.2f' % (account.label, account.id, account.balance))
# we may have found the owner name too
if optional_tokens.get('account_owner') is not None:
# well, we could add it to the label, but is this really required?
self.logger.debug(' the owner appears to be %s' % optional_tokens['account_owner'])
# we simply lack the link to the account history... which remains optional
first_link = div.find('a')
if first_link is not None:
account._link_id = first_link.get('href')
self.logger.debug(' the history link appears to be %s' % account._link_id)
else:
account._link_id = None
yield account
def get_history(self, date_guesser, start_index=0, start_offset=0):
"""
Returns the history of a specific account. Note that this function
expects the current page to be the one dedicated to this history.
start_index is the id used for the first created operation.
start_offset allows ignoring the `n' first Transactions on the page.
"""
# tested on CA Lorraine, Paris, Toulouse
# avoir parsing the page as an account-dedicated page if it is not the case
if not self.is_account_page():
return
# Step 1: extract text tokens
tokens = []
token_extractor = TokenExtractor()
for div in self.document.getiterator('div'):
if div.attrib.get('class', '') in ('dv'):
self.logger.debug("Analyzing div %s" % div)
for token in token_extractor.extract_tokens(div):
self.logger.debug('Extracted text token: "%s"' % token)
tokens.append(token)
# Step 2: convert tokens into operations
# Notes:
# * the code below expects pieces of information to be in the date-label-amount order;
# could we achieve a heuristic smart enough to guess this order?
# * unlike the former code, we parse every operation
operations = []
current_operation = {}
for token in tokens:
self.logger.debug('Analyzing token: "%s"' % token)
date_analysis = self.look_like_date_only(token)
if date_analysis:
current_operation = {}
current_operation['date'] = date_analysis.groups()[0]
else:
date_desc_analysis = self.look_like_date_and_description(token)
if date_desc_analysis:
current_operation = {}
current_operation['date'] = date_desc_analysis.groups()[0]
current_operation['label'] = date_desc_analysis.groups()[1]
elif self.look_like_amount(token):
# we consider the amount is the last information we get for an operation
current_operation['amount'] = FrenchTransaction.clean_amount(token)
if current_operation.get('label') is not None and current_operation.get('date') is not None:
self.logger.debug('Parsed operation: %s: %s: %s' % (current_operation['date'], current_operation['label'], current_operation['amount']))
operations.append(current_operation)
current_operation = {}
else:
if current_operation.get('label') is not None:
current_operation['label'] = u'%s %s' % (current_operation['label'], token)
else:
current_operation['label'] = token
# Step 3: yield adequate transactions
index = start_index
for op in operations[start_offset:]:
self.logger.debug('will yield the following transaction with index %d: %s: %s: %s' % (index, op['date'], op['label'], op['amount']))
transaction = Transaction(index)
index += 1
transaction.amount = op['amount']
transaction.parse(self.date_from_string(op['date'], date_guesser), re.sub('\s+', ' ', op['label']))
yield transaction
def get_transfer_accounts(self, select_name):
"""
Returns the accounts proposed for a transfer in a select field.
This method assumes the current page is the one dedicated to transfers.
select_name is the name of the select field to analyze
"""
if not self.is_transfer_page():
return False
source_accounts = {}
source_account_options = self.document.xpath('/html/body//form//select[@name="%s"]/option' % select_name)
for option in source_account_options:
source_account_value = option.get('value', -1)
if (source_account_value != -1):
matches = re.findall('^[A-Z0-9]+.*([0-9]{11}).*$', self.extract_text(option))
if matches:
source_accounts[source_account_value] = matches[0]
return source_accounts
def get_transfer_source_accounts(self):
return self.get_transfer_accounts('numCompteEmetteur')
def get_transfer_target_accounts(self):
return self.get_transfer_accounts('numCompteBeneficiaire')
def expand_history_page_url(self):
"""
When on a page dedicated to list the history of a specific account (see
is_account_page), returns the link to expand the history with 25 more results,
or False if the link is not present.
"""
# tested on CA centre france
a = self.document.xpath('/html/body//div[@class="headline"]//a[contains(text(), "Voir les 25 suivants")]')
if not a:
return False
else:
return a[0].get('href', '')
def next_page_url(self):
"""
When on a page dedicated to list the history of a specific account (see
is_account_page), returns the link to the next page, or False if the
link is not present.
"""
# tested on CA Lorraine, Paris, Toulouse
a = self.document.xpath('/html/body//div[@class="navlink"]//a[contains(text(), "Suite")]')
if not a:
return False
else:
return a[0].get('href', '')
def operations_page_url(self):
"""
Returns the link to the "Opérations" page. This function assumes the
current page is the accounts list (see is_accounts_list)
"""
link = self.document.xpath(u'/html/body//a[contains(text(), "Opérations")]')
return link[0].get('href')
def transfer_page_url(self):
"""
Returns the link to the "Virements" page. This function assumes the
current page is the operations list (see operations_page_url)
"""
link = self.document.xpath('/html/body//a[@accesskey=1]/@href')
return link[0]
def extract_text(self, xml_elmt):
"""
Given an XML element, returns its inner text in a reasonably readable way
"""
data = u''
for text in xml_elmt.itertext():
data = data + u'%s ' % text
data = re.sub(' +', ' ', data.replace("\n", ' ').strip())
return data
def fallback_date(self):
""" Returns a fallback, default date. """
return date(date.today().year, 1, 1)
def date_from_string(self, string, date_guesser):
"""
Builds a date object from a 'DD/MM' string
"""
matches = re.search('\s*([012]?[0-9]|3[01])\s*/\s*(0?[1-9]|1[012])\s*$', string)
if matches is None:
return self.fallback_date()
return date_guesser.guess_date(int(matches.group(1)), int(matches.group(2)))
def look_like_account_owner(self, string):
""" Returns a date object built from a given day/month pair. """
result = re.match('^\s*(M\.|Mr|Mme|Mlle|Monsieur|Madame|Mademoiselle)', string, re.IGNORECASE)
self.logger.debug('Does "%s" look like an account owner? %s', string, ('yes' if result else 'no'))
return result
def look_like_account_name(self, string):
""" Returns True of False depending whether string looks like an account name. """
result = (len(string) >= 3 and not self.look_like_account_owner(string))
self.logger.debug('Does "%s" look like an account name? %s', string, ('yes' if result else 'no'))
return result
def look_like_account_number(self, string):
""" Returns either False or a SRE_Match object depending whether string looks like an account number. """
# An account is a 11 digits number (no more, no less)
result = re.match('[^\d]*\d{11}[^\d]*', string)
self.logger.debug('Does "%s" look like an account number? %s', string, ('yes' if result else 'no'))
return result
def look_like_amount(self, string):
""" Returns either False or a SRE_Match object depending whether string looks like an amount. """
# It seems the Credit Agricole always mentions amounts using two decimals
result = re.match('-?[\d ]+[\.,]\d{2}', string)
self.logger.debug('Does "%s" look like an amount? %s', string, ('yes' if result else 'no'))
return result
def look_like_date_only(self, string):
""" Returns either False or a SRE_Match object depending whether string looks like an isolated date. """
result = re.search('^\s*((?:[012][0-9]|3[01])/(?:0[1-9]|1[012]))\s*$', string)
self.logger.debug('Does "%s" look like a date (and only a date)? %s', string, ('yes' if result else 'no'))
return result
def look_like_date_and_description(self, string):
""" Returns either False or a SRE_Match object depending on whether string looks like a date+description pair. """
result = re.search('^\s*((?:[012][0-9]|3[01])/(?:0[1-9]|1[012]))\s+(.+)\s*$', string)
self.logger.debug('Does "%s" look like a date+description pair? %s', string, ('yes' if result else 'no'))
return result

View file

@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BasePage
from weboob.tools.browser import BrowserUnavailable
class CragrBasePage(BasePage):
def on_loaded(self):
# Check for an error
for div in self.document.getiterator('div'):
if div.attrib.get('class', '') == 'dv' and div.getchildren()[0].tag in ('img') and div.getchildren()[0].attrib.get('alt', '') == 'Attention':
# Try to find a detailed error message
if div.getchildren()[1].tag == 'span':
raise BrowserUnavailable(div.find('span').find('b').text)
elif div.getchildren()[1].tag == 'b':
# I haven't encountered this variation in the wild,
# but I wouldn't be surprised if it existed
# given the similar differences between regions.
raise BrowserUnavailable(div.find('b').find('span').text)
raise BrowserUnavailable()
def is_logged(self):
return not self.document.xpath('/html/body//form//input[@name = "code"]') and \
not self.document.xpath('/html/body//form//input[@name = "userPassword"]')

View file

@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.mech import ClientForm
ControlNotFoundError = ClientForm.ControlNotFoundError
from .base import CragrBasePage
__all__ = ['LoginPage']
class LoginPage(CragrBasePage):
def login(self, login, password):
self.browser.select_form(nr=0)
self.browser.set_all_readonly(False)
try:
self.browser['numero'] = login
self.browser['code'] = password
except ControlNotFoundError:
try:
self.browser['userLogin'] = login
self.browser['userPassword'] = password
except ControlNotFoundError:
self.browser.controls.append(ClientForm.TextControl('text', 'numero', {'value': ''}))
self.browser.controls.append(ClientForm.TextControl('text', 'code', {'value': ''}))
self.browser.controls.append(ClientForm.TextControl('text', 'userLogin', {'value': ''}))
self.browser.controls.append(ClientForm.TextControl('text', 'userPassword', {'value': ''}))
self.browser['numero'] = login
self.browser['code'] = password
self.browser['userLogin'] = login
self.browser['userPassword'] = password
self.browser.submit()

View file

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2012 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
class TokenExtractor:
""" Extracts texts token from an HTML document """
def __init__(self):
self.iterated_elements = []
def clear(self):
"""
Reset any content stored within a TokenExtractor object. Useful to start
a new parsing without creating a new instance.
"""
self.iterated_elements = []
def element_iterated_already(self, html_element):
if html_element in self.iterated_elements:
return True
for ancestor in html_element.iterancestors():
if ancestor in self.iterated_elements:
return True
return False
def extract_tokens(self, html_element):
if self.element_iterated_already(html_element):
return
self.iterated_elements.append(html_element)
for text in html_element.itertext():
text = text.replace(u'\xa0', ' ')
text = text.replace("\n", ' ')
for token in self.split_text_into_smaller_tokens(text):
if self.token_looks_relevant(token):
yield token.strip()
@staticmethod
def split_text_into_smaller_tokens(text):
for subtext1 in text.split('\t'):
yield subtext1
@staticmethod
def token_looks_relevant(token):
return len(token.strip()) > 1