diff --git a/weboob/tools/browser2/filters.py b/weboob/tools/browser2/filters.py index c87ef0a1..9e91a1c3 100644 --- a/weboob/tools/browser2/filters.py +++ b/weboob/tools/browser2/filters.py @@ -19,17 +19,17 @@ from __future__ import absolute_import -from dateutil.parser import parse as parse_date import datetime -from decimal import Decimal, InvalidOperation import re -import lxml.html as html +from decimal import Decimal, InvalidOperation +import lxml.html as html +from dateutil.parser import parse as parse_date + +from weboob.capabilities.base import empty +from weboob.tools.compat import basestring from weboob.tools.exceptions import ParseError from weboob.tools.misc import html2text -from weboob.tools.compat import basestring -from weboob.capabilities.base import empty - _NO_DEFAULT = object() @@ -117,6 +117,7 @@ class Env(_Filter): It is used for example to get page parameters, or when there is a parse() method on ItemElement. """ + def __init__(self, name): super(Env, self).__init__() self.name = name @@ -157,6 +158,7 @@ class TableCell(_Filter): return self.default_or_raise(ColumnNotFound('Unable to find column %s' % ' or '.join(self.names))) + class Dict(Filter): @classmethod def select(cls, selector, item): @@ -181,10 +183,11 @@ class Dict(Filter): def filter(self, txt): return txt + class CleanHTML(Filter): def filter(self, txt): - if isinstance(txt, (tuple,list)): - return ' '.join([self.clean(item) for item in txt]) + if isinstance(txt, (tuple, list)): + return u' '.join([self.clean(item) for item in txt]) return self.clean(txt) @classmethod @@ -194,7 +197,7 @@ class CleanHTML(Filter): class RawText(Filter): def filter(self, el): - if isinstance(el, (tuple,list)): + if isinstance(el, (tuple, list)): return u' '.join([self.filter(e) for e in el]) if el.text is None: @@ -211,6 +214,7 @@ class CleanText(Filter): string. Second, it replaces all symbols given in second argument. """ + def __init__(self, selector, symbols='', replace=[], childs=True, **kwargs): super(CleanText, self).__init__(selector, **kwargs) self.symbols = symbols @@ -218,8 +222,8 @@ class CleanText(Filter): self.childs = childs def filter(self, txt): - if isinstance(txt, (tuple,list)): - txt = ' '.join([self.clean(item, childs=self.childs) for item in txt]) + if isinstance(txt, (tuple, list)): + txt = u' '.join([self.clean(item, childs=self.childs) for item in txt]) txt = self.clean(txt, childs=self.childs) txt = self.remove(txt, self.symbols) @@ -259,6 +263,7 @@ class CleanDecimal(CleanText): """ Get a cleaned Decimal value from an element. """ + def __init__(self, selector, replace_dots=True, default=_NO_DEFAULT): super(CleanDecimal, self).__init__(selector, default=default) self.replace_dots = replace_dots @@ -266,7 +271,7 @@ class CleanDecimal(CleanText): def filter(self, text): text = super(CleanDecimal, self).filter(text) if self.replace_dots: - text = text.replace('.','').replace(',','.') + text = text.replace('.', '').replace(',', '.') try: return Decimal(re.sub(r'[^\d\-\.]', '', text)) except InvalidOperation as e: @@ -293,6 +298,7 @@ class Link(Attr): If the tag is not found, an exception IndexError is raised. """ + def __init__(self, selector, default=_NO_DEFAULT): super(Link, self).__init__(selector, 'href', default=default) @@ -301,6 +307,7 @@ class Field(_Filter): """ Get the attribute of object. """ + def __init__(self, name): super(Field, self).__init__() self.name = name @@ -318,6 +325,7 @@ class Regexp(Filter): >>> f(etree.fromstring('

Date: 13/08/1988

')) u'1988-08-13' """ + def __init__(self, selector, pattern, template=None, flags=0, default=_NO_DEFAULT): super(Regexp, self).__init__(selector, default=default) self.pattern = pattern @@ -325,8 +333,8 @@ class Regexp(Filter): self.template = template def filter(self, txt): - if isinstance(txt, (tuple,list)): - txt = ' '.join([t.strip() for t in txt.itertext()]) + if isinstance(txt, (tuple, list)): + txt = u' '.join([t.strip() for t in txt.itertext()]) mobj = self.regex.search(txt) if not mobj: @@ -339,6 +347,7 @@ class Regexp(Filter): class Map(Filter): + def __init__(self, selector, map_dict, default=_NO_DEFAULT): super(Map, self).__init__(selector, default=default) self.map_dict = map_dict @@ -361,8 +370,8 @@ class DateTime(Filter): return self.default_or_raise(ParseError('Unable to parse %r' % txt)) try: if self.translations: - for search, repl in self.translations: - txt = search.sub(repl, txt) + for search, repl in self.translations: + txt = search.sub(repl, txt) return parse_date(txt, dayfirst=self.dayfirst) except ValueError as e: return self.default_or_raise(ParseError('Unable to parse %r: %s' % (txt, e))) @@ -398,7 +407,7 @@ class DateGuesser(Filter): if len(values) == 2: day, month = map(int, values) else: - raise ParseError('Unable to take (day,month) tuple from %r' % values) + raise ParseError('Unable to take (day, month) tuple from %r' % values) return date_guesser.guess_date(day, month, **self.kwargs)