From c4dfb49033806f413168d47d2ea0eb8743fb3249 Mon Sep 17 00:00:00 2001 From: Florent Date: Tue, 18 Mar 2014 17:08:30 +0100 Subject: [PATCH] CleanChars is now an option of CleanText --- modules/dresdenwetter/pages.py | 4 ++-- weboob/tools/browser2/filters.py | 31 ++++++++++++++----------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/modules/dresdenwetter/pages.py b/modules/dresdenwetter/pages.py index 1a03c8ec..2db90b31 100644 --- a/modules/dresdenwetter/pages.py +++ b/modules/dresdenwetter/pages.py @@ -18,7 +18,7 @@ # along with weboob. If not, see . from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement -from weboob.tools.browser2.filters import CleanText, Env, Regexp, Attr, CleanChars +from weboob.tools.browser2.filters import CleanText, Env, Regexp, Attr from weboob.capabilities.gauge import GaugeMeasure, GaugeSensor from weboob.capabilities.base import NotAvailable @@ -36,7 +36,7 @@ class StartPage(HTMLPage): klass = GaugeSensor obj_name = Regexp(CleanText('.'), '(.*?) {0,}: .*', "\\1") - obj_id = CleanChars(Regexp(Attr('name'), '(.*)', "dd-\\1"), " .():") + obj_id = CleanText(Regexp(Attr('name'), '(.*)', "dd-\\1"), " .():") obj_unit = Env('unit') obj_lastvalue = Env('lastvalue') obj_gaugeid = u"wetter" diff --git a/weboob/tools/browser2/filters.py b/weboob/tools/browser2/filters.py index 3be8e870..f648531a 100644 --- a/weboob/tools/browser2/filters.py +++ b/weboob/tools/browser2/filters.py @@ -121,14 +121,20 @@ class CleanText(Filter): """ Get a cleaned text from an element. - It replaces all tabs and multiple spaces to one space and strip the result + It first replaces all tabs and multiple spaces to one space and strip the result string. + Second, it replaces all symbols given in second argument. """ + def __init__(self, selector, symbols=''): + super(CleanText, self).__init__(selector) + self.symbols = symbols + def filter(self, txt): if isinstance(txt, (tuple,list)): txt = ' '.join(map(self.clean, txt)) - return self.clean(txt) + txt = self.clean(txt) + return self.remove(txt, self.symbols) @classmethod def clean(self, txt): @@ -138,6 +144,12 @@ class CleanText(Filter): txt = re.sub(u'[\s\xa0\t]+', u' ', txt) # 'foo bar' return txt.strip() + @classmethod + def remove(self, txt, symbols): + for symbol in symbols: + txt = txt.replace(symbol, '') + return txt + class CleanDecimal(CleanText): """ Get a cleaned Decimal value from an element. @@ -168,21 +180,6 @@ class Attr(_Filter): def __call__(self, item): return item.use_selector(getattr(item, 'obj_%s' % self.name)) -class CleanChars(Filter): - """ - Remove chars. - """ - def __init__(self, selector, symbols): - super(CleanChars, self).__init__(selector) - self.symbols = symbols - - def filter(self, txt): - if isinstance(txt, (tuple,list)): - txt = ' '.join([t.strip() for t in txt.itertext()]) - - for symbol in self.symbols: - txt = txt.replace(symbol, '') - return txt class Regexp(Filter): """