move remove_html_tags function into weboob.tools.misc

This commit is contained in:
Romain Bignon 2011-07-03 10:52:33 +02:00
commit ba6f31dac8
3 changed files with 7 additions and 14 deletions

View file

@ -21,23 +21,13 @@
import re import re
from weboob.capabilities.bank import Operation from weboob.capabilities.bank import Operation
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.tools.misc import remove_html_tags
__all__ = ['AccountHistory'] __all__ = ['AccountHistory']
def remove_html_tags(data):
p = re.compile(r'<.*?>')
return p.sub(' ', data)
def remove_extra_spaces(data):
p = re.compile(r'\s+')
return p.sub(' ', data)
class AccountHistory(BasePage): class AccountHistory(BasePage):
def get_history(self): def get_history(self):
@ -50,10 +40,10 @@ class AccountHistory(BasePage):
operation = Operation(len(operations)) operation = Operation(len(operations))
operation.date = mvt.xpath("./td/span")[0].text operation.date = mvt.xpath("./td/span")[0].text
tmp = mvt.xpath("./td/span")[1] tmp = mvt.xpath("./td/span")[1]
operation.label = remove_extra_spaces(remove_html_tags(self.parser.tostring(tmp))) operation.label = remove_html_tags(self.parser.tostring(tmp)).strip()
r = re.compile(r'\d+') r = re.compile(r'\d+')
tmp = mvt.xpath("./td/span/strong") tmp = mvt.xpath("./td/span/strong")
if not tmp: if not tmp:
tmp = mvt.xpath("./td/span") tmp = mvt.xpath("./td/span")

View file

@ -346,7 +346,6 @@ class BaseBrowser(mechanize.Browser):
self.page = None self.page = None
raise self.get_exception(e)(e) raise self.get_exception(e)(e)
except (mechanize.BrowserStateError, BrowserRetry), e: except (mechanize.BrowserStateError, BrowserRetry), e:
self.home()
raise BrowserUnavailable(e) raise BrowserUnavailable(e)
def is_on_page(self, pageCls): def is_on_page(self, pageCls):

View file

@ -24,6 +24,7 @@ from dateutil import tz
from logging import warning from logging import warning
from time import time, sleep from time import time, sleep
from tempfile import gettempdir from tempfile import gettempdir
import re
import os import os
import sys import sys
import traceback import traceback
@ -61,6 +62,9 @@ def get_bytes_size(size, unit_name):
} }
return float(size * unit_data.get(unit_name, 1)) return float(size * unit_data.get(unit_name, 1))
def remove_html_tags(data):
p = re.compile(r'<.*?>')
return p.sub(' ', data)
try: try:
import html2text as h2t import html2text as h2t