move remove_html_tags function into weboob.tools.misc
This commit is contained in:
parent
0e5089c2c0
commit
ba6f31dac8
3 changed files with 7 additions and 14 deletions
|
|
@ -21,23 +21,13 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from weboob.capabilities.bank import Operation
|
from weboob.capabilities.bank import Operation
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
|
from weboob.tools.misc import remove_html_tags
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['AccountHistory']
|
__all__ = ['AccountHistory']
|
||||||
|
|
||||||
|
|
||||||
def remove_html_tags(data):
|
|
||||||
p = re.compile(r'<.*?>')
|
|
||||||
return p.sub(' ', data)
|
|
||||||
|
|
||||||
|
|
||||||
def remove_extra_spaces(data):
|
|
||||||
p = re.compile(r'\s+')
|
|
||||||
return p.sub(' ', data)
|
|
||||||
|
|
||||||
|
|
||||||
class AccountHistory(BasePage):
|
class AccountHistory(BasePage):
|
||||||
|
|
||||||
def get_history(self):
|
def get_history(self):
|
||||||
|
|
@ -50,10 +40,10 @@ class AccountHistory(BasePage):
|
||||||
operation = Operation(len(operations))
|
operation = Operation(len(operations))
|
||||||
operation.date = mvt.xpath("./td/span")[0].text
|
operation.date = mvt.xpath("./td/span")[0].text
|
||||||
tmp = mvt.xpath("./td/span")[1]
|
tmp = mvt.xpath("./td/span")[1]
|
||||||
operation.label = remove_extra_spaces(remove_html_tags(self.parser.tostring(tmp)))
|
operation.label = remove_html_tags(self.parser.tostring(tmp)).strip()
|
||||||
|
|
||||||
r = re.compile(r'\d+')
|
r = re.compile(r'\d+')
|
||||||
|
|
||||||
tmp = mvt.xpath("./td/span/strong")
|
tmp = mvt.xpath("./td/span/strong")
|
||||||
if not tmp:
|
if not tmp:
|
||||||
tmp = mvt.xpath("./td/span")
|
tmp = mvt.xpath("./td/span")
|
||||||
|
|
|
||||||
|
|
@ -346,7 +346,6 @@ class BaseBrowser(mechanize.Browser):
|
||||||
self.page = None
|
self.page = None
|
||||||
raise self.get_exception(e)(e)
|
raise self.get_exception(e)(e)
|
||||||
except (mechanize.BrowserStateError, BrowserRetry), e:
|
except (mechanize.BrowserStateError, BrowserRetry), e:
|
||||||
self.home()
|
|
||||||
raise BrowserUnavailable(e)
|
raise BrowserUnavailable(e)
|
||||||
|
|
||||||
def is_on_page(self, pageCls):
|
def is_on_page(self, pageCls):
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ from dateutil import tz
|
||||||
from logging import warning
|
from logging import warning
|
||||||
from time import time, sleep
|
from time import time, sleep
|
||||||
from tempfile import gettempdir
|
from tempfile import gettempdir
|
||||||
|
import re
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
|
@ -61,6 +62,9 @@ def get_bytes_size(size, unit_name):
|
||||||
}
|
}
|
||||||
return float(size * unit_data.get(unit_name, 1))
|
return float(size * unit_data.get(unit_name, 1))
|
||||||
|
|
||||||
|
def remove_html_tags(data):
|
||||||
|
p = re.compile(r'<.*?>')
|
||||||
|
return p.sub(' ', data)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import html2text as h2t
|
import html2text as h2t
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue