[browser2] add a CleanHTML filter

This commit is contained in:
Bezleputh 2014-03-27 00:16:11 +01:00
commit 66f3560b52

View file

@ -23,7 +23,9 @@ from dateutil.parser import parse as parse_date
import datetime import datetime
from decimal import Decimal, InvalidOperation from decimal import Decimal, InvalidOperation
import re import re
import lxml.html as html
from weboob.tools.misc import html2text
from weboob.capabilities.base import empty from weboob.capabilities.base import empty
@ -156,6 +158,9 @@ class TableCell(_Filter):
return self.default_or_raise(ColumnNotFound('Unable to find column %s' % ' or '.join(self.names))) return self.default_or_raise(ColumnNotFound('Unable to find column %s' % ' or '.join(self.names)))
class CleanHTML(Filter):
def filter(self, txt):
return html2text(html.tostring(txt[0], encoding=unicode))
class CleanText(Filter): class CleanText(Filter):
""" """