From 501c3c8e111e64f79747bfc0d543e6610c62fa79 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Mon, 7 Jan 2013 16:34:03 +0100 Subject: [PATCH] get all strings under this element --- weboob/tools/parsers/lxmlparser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/weboob/tools/parsers/lxmlparser.py b/weboob/tools/parsers/lxmlparser.py index 7691b969..58856cbf 100644 --- a/weboob/tools/parsers/lxmlparser.py +++ b/weboob/tools/parsers/lxmlparser.py @@ -46,8 +46,8 @@ class LxmlHtmlParser(IParser): return lxml.html.tostring(element, encoding=unicode) def tocleanstring(self, element): - txt = element.xpath('text()') # ['foo ', ' bar'] - txt = ' '.join(txt) # 'foo bar' + txt = [txt.strip() for txt in element.itertext()] + txt = u' '.join(txt) # 'foo bar' txt = re.sub('\s+', ' ', txt) # 'foo bar' return txt.strip()