browser2: Add RawText filter
Allows getting .text of elements without any alteration. This is useful for at least textarea and pre tags. Maybe the .join character should be configurable.
This commit is contained in:
parent
c69c5cf5ef
commit
9619ddcaa2
1 changed files with 12 additions and 0 deletions
|
|
@ -191,6 +191,18 @@ class CleanHTML(Filter):
|
|||
def clean(cls, txt):
|
||||
return html2text(html.tostring(txt, encoding=unicode))
|
||||
|
||||
|
||||
class RawText(Filter):
|
||||
def filter(self, el):
|
||||
if isinstance(el, (tuple,list)):
|
||||
return u' '.join([self.filter(e) for e in el])
|
||||
|
||||
if el.text is None:
|
||||
return self.default
|
||||
else:
|
||||
return unicode(el.text)
|
||||
|
||||
|
||||
class CleanText(Filter):
|
||||
"""
|
||||
Get a cleaned text from an element.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue