From 81e3da8bb0e5caa70b2d3a17b8d5af31b97a608b Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Mon, 4 Feb 2013 15:16:03 +0100 Subject: [PATCH] More robust currency guessing This allows to parse PayPal balances which includes both symbol and letters, for instance "$ 42,00 USD". Also, it does not require adding any new symbols to the regular expression. It might break modules, though numerous cases were tested. --- setup.cfg | 3 ++- weboob/capabilities/bank.py | 28 +++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 751135da..ff1ce8c8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,6 @@ [nosetests] verbosity = 2 detailed-errors = 1 +with-doctest = 1 where = weboob -tests = weboob.tools.capabilities.paste,weboob.tools.path +tests = weboob.tools.capabilities.paste,weboob.tools.path,weboob.capabilities.bank diff --git a/weboob/capabilities/bank.py b/weboob/capabilities/bank.py index 7777c67e..29cd9a25 100644 --- a/weboob/capabilities/bank.py +++ b/weboob/capabilities/bank.py @@ -41,7 +41,7 @@ class TransferError(UserError): A transfer has failed. """ -class Currency: +class Currency(object): CUR_UNKNOWN = 0 CUR_EUR = 1 CUR_CHF = 2 @@ -54,10 +54,32 @@ class Currency: u'USD': CUR_USD, } + EXTRACTOR = re.compile(r'[\d\s,\.\-]', re.UNICODE) + @classmethod def get_currency(klass, text): - text = re.sub(u'[^A-Z€]', '', text.upper()) - return klass.TXT2CUR.get(text, klass.CUR_UNKNOWN) + u""" + >>> Currency.get_currency(u'42') + 0 + >>> Currency.get_currency(u'42 €') + 1 + >>> Currency.get_currency(u'$42') + 3 + >>> Currency.get_currency(u'42.000,00€') + 1 + >>> Currency.get_currency(u'$42 USD') + 3 + >>> Currency.get_currency(u'%42 USD') + 3 + >>> Currency.get_currency(u'US1D') + 0 + """ + curtexts = klass.EXTRACTOR.sub(' ', text.upper()).split() + for curtext in curtexts: + cur = klass.TXT2CUR.get(curtext) + if cur is not None: + return cur + return klass.CUR_UNKNOWN class Recipient(CapBaseObject):