browser2: Add RawText filter

Allows getting .text of elements without any alteration.
This is useful for at least textarea and pre tags.

Maybe the .join character should be configurable.
This commit is contained in:
Laurent Bachelier 2014-06-04 00:49:00 +02:00
commit 9619ddcaa2

View file

@ -191,6 +191,18 @@ class CleanHTML(Filter):
def clean(cls, txt):
return html2text(html.tostring(txt, encoding=unicode))
class RawText(Filter):
def filter(self, el):
if isinstance(el, (tuple,list)):
return u' '.join([self.filter(e) for e in el])
if el.text is None:
return self.default
else:
return unicode(el.text)
class CleanText(Filter):
"""
Get a cleaned text from an element.