From 9619ddcaa28a787d0ba261f0708d310019be5de4 Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Wed, 4 Jun 2014 00:49:00 +0200 Subject: [PATCH] browser2: Add RawText filter Allows getting .text of elements without any alteration. This is useful for at least textarea and pre tags. Maybe the .join character should be configurable. --- weboob/tools/browser2/filters.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/weboob/tools/browser2/filters.py b/weboob/tools/browser2/filters.py index 6be44419..c87ef0a1 100644 --- a/weboob/tools/browser2/filters.py +++ b/weboob/tools/browser2/filters.py @@ -191,6 +191,18 @@ class CleanHTML(Filter): def clean(cls, txt): return html2text(html.tostring(txt, encoding=unicode)) + +class RawText(Filter): + def filter(self, el): + if isinstance(el, (tuple,list)): + return u' '.join([self.filter(e) for e in el]) + + if el.text is None: + return self.default + else: + return unicode(el.text) + + class CleanText(Filter): """ Get a cleaned text from an element.