From ba6f31dac89388bd94456a195b04c924f3b91d30 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Sun, 3 Jul 2011 10:52:33 +0200 Subject: [PATCH] move remove_html_tags function into weboob.tools.misc --- weboob/backends/bp/pages/accounthistory.py | 16 +++------------- weboob/tools/browser/browser.py | 1 - weboob/tools/misc.py | 4 ++++ 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/weboob/backends/bp/pages/accounthistory.py b/weboob/backends/bp/pages/accounthistory.py index 1cb8341d..fece0797 100644 --- a/weboob/backends/bp/pages/accounthistory.py +++ b/weboob/backends/bp/pages/accounthistory.py @@ -21,23 +21,13 @@ import re from weboob.capabilities.bank import Operation - from weboob.tools.browser import BasePage +from weboob.tools.misc import remove_html_tags __all__ = ['AccountHistory'] -def remove_html_tags(data): - p = re.compile(r'<.*?>') - return p.sub(' ', data) - - -def remove_extra_spaces(data): - p = re.compile(r'\s+') - return p.sub(' ', data) - - class AccountHistory(BasePage): def get_history(self): @@ -50,10 +40,10 @@ class AccountHistory(BasePage): operation = Operation(len(operations)) operation.date = mvt.xpath("./td/span")[0].text tmp = mvt.xpath("./td/span")[1] - operation.label = remove_extra_spaces(remove_html_tags(self.parser.tostring(tmp))) + operation.label = remove_html_tags(self.parser.tostring(tmp)).strip() r = re.compile(r'\d+') - + tmp = mvt.xpath("./td/span/strong") if not tmp: tmp = mvt.xpath("./td/span") diff --git a/weboob/tools/browser/browser.py b/weboob/tools/browser/browser.py index 0a6cda3d..1826b03d 100644 --- a/weboob/tools/browser/browser.py +++ b/weboob/tools/browser/browser.py @@ -346,7 +346,6 @@ class BaseBrowser(mechanize.Browser): self.page = None raise self.get_exception(e)(e) except (mechanize.BrowserStateError, BrowserRetry), e: - self.home() raise BrowserUnavailable(e) def is_on_page(self, pageCls): diff --git a/weboob/tools/misc.py b/weboob/tools/misc.py index 0b0cce5c..e485a84c 100644 --- a/weboob/tools/misc.py +++ b/weboob/tools/misc.py @@ -24,6 +24,7 @@ from dateutil import tz from logging import warning from time import time, sleep from tempfile import gettempdir +import re import os import sys import traceback @@ -61,6 +62,9 @@ def get_bytes_size(size, unit_name): } return float(size * unit_data.get(unit_name, 1)) +def remove_html_tags(data): + p = re.compile(r'<.*?>') + return p.sub(' ', data) try: import html2text as h2t