move remove_html_tags function into weboob.tools.misc

This commit is contained in:
Romain Bignon 2011-07-03 10:52:33 +02:00
commit ba6f31dac8
3 changed files with 7 additions and 14 deletions

View file

@ -21,23 +21,13 @@
import re
from weboob.capabilities.bank import Operation
from weboob.tools.browser import BasePage
from weboob.tools.misc import remove_html_tags
__all__ = ['AccountHistory']
def remove_html_tags(data):
p = re.compile(r'<.*?>')
return p.sub(' ', data)
def remove_extra_spaces(data):
p = re.compile(r'\s+')
return p.sub(' ', data)
class AccountHistory(BasePage):
def get_history(self):
@ -50,10 +40,10 @@ class AccountHistory(BasePage):
operation = Operation(len(operations))
operation.date = mvt.xpath("./td/span")[0].text
tmp = mvt.xpath("./td/span")[1]
operation.label = remove_extra_spaces(remove_html_tags(self.parser.tostring(tmp)))
operation.label = remove_html_tags(self.parser.tostring(tmp)).strip()
r = re.compile(r'\d+')
tmp = mvt.xpath("./td/span/strong")
if not tmp:
tmp = mvt.xpath("./td/span")

View file

@ -346,7 +346,6 @@ class BaseBrowser(mechanize.Browser):
self.page = None
raise self.get_exception(e)(e)
except (mechanize.BrowserStateError, BrowserRetry), e:
self.home()
raise BrowserUnavailable(e)
def is_on_page(self, pageCls):

View file

@ -24,6 +24,7 @@ from dateutil import tz
from logging import warning
from time import time, sleep
from tempfile import gettempdir
import re
import os
import sys
import traceback
@ -61,6 +62,9 @@ def get_bytes_size(size, unit_name):
}
return float(size * unit_data.get(unit_name, 1))
def remove_html_tags(data):
p = re.compile(r'<.*?>')
return p.sub(' ', data)
try:
import html2text as h2t