CleanChars is now an option of CleanText

This commit is contained in:
Florent 2014-03-18 17:08:30 +01:00
commit c4dfb49033
2 changed files with 16 additions and 19 deletions

View file

@ -18,7 +18,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement
from weboob.tools.browser2.filters import CleanText, Env, Regexp, Attr, CleanChars from weboob.tools.browser2.filters import CleanText, Env, Regexp, Attr
from weboob.capabilities.gauge import GaugeMeasure, GaugeSensor from weboob.capabilities.gauge import GaugeMeasure, GaugeSensor
from weboob.capabilities.base import NotAvailable from weboob.capabilities.base import NotAvailable
@ -36,7 +36,7 @@ class StartPage(HTMLPage):
klass = GaugeSensor klass = GaugeSensor
obj_name = Regexp(CleanText('.'), '(.*?) {0,}: .*', "\\1") obj_name = Regexp(CleanText('.'), '(.*?) {0,}: .*', "\\1")
obj_id = CleanChars(Regexp(Attr('name'), '(.*)', "dd-\\1"), " .():") obj_id = CleanText(Regexp(Attr('name'), '(.*)', "dd-\\1"), " .():")
obj_unit = Env('unit') obj_unit = Env('unit')
obj_lastvalue = Env('lastvalue') obj_lastvalue = Env('lastvalue')
obj_gaugeid = u"wetter" obj_gaugeid = u"wetter"

View file

@ -121,14 +121,20 @@ class CleanText(Filter):
""" """
Get a cleaned text from an element. Get a cleaned text from an element.
It replaces all tabs and multiple spaces to one space and strip the result It first replaces all tabs and multiple spaces to one space and strip the result
string. string.
Second, it replaces all symbols given in second argument.
""" """
def __init__(self, selector, symbols=''):
super(CleanText, self).__init__(selector)
self.symbols = symbols
def filter(self, txt): def filter(self, txt):
if isinstance(txt, (tuple,list)): if isinstance(txt, (tuple,list)):
txt = ' '.join(map(self.clean, txt)) txt = ' '.join(map(self.clean, txt))
return self.clean(txt) txt = self.clean(txt)
return self.remove(txt, self.symbols)
@classmethod @classmethod
def clean(self, txt): def clean(self, txt):
@ -138,6 +144,12 @@ class CleanText(Filter):
txt = re.sub(u'[\s\xa0\t]+', u' ', txt) # 'foo bar' txt = re.sub(u'[\s\xa0\t]+', u' ', txt) # 'foo bar'
return txt.strip() return txt.strip()
@classmethod
def remove(self, txt, symbols):
for symbol in symbols:
txt = txt.replace(symbol, '')
return txt
class CleanDecimal(CleanText): class CleanDecimal(CleanText):
""" """
Get a cleaned Decimal value from an element. Get a cleaned Decimal value from an element.
@ -168,21 +180,6 @@ class Attr(_Filter):
def __call__(self, item): def __call__(self, item):
return item.use_selector(getattr(item, 'obj_%s' % self.name)) return item.use_selector(getattr(item, 'obj_%s' % self.name))
class CleanChars(Filter):
"""
Remove chars.
"""
def __init__(self, selector, symbols):
super(CleanChars, self).__init__(selector)
self.symbols = symbols
def filter(self, txt):
if isinstance(txt, (tuple,list)):
txt = ' '.join([t.strip() for t in txt.itertext()])
for symbol in self.symbols:
txt = txt.replace(symbol, '')
return txt
class Regexp(Filter): class Regexp(Filter):
""" """