split filters into several files

This commit is contained in:
Romain Bignon 2014-07-10 18:21:00 +02:00
commit c90b5844e4
7 changed files with 167 additions and 78 deletions

View file

@ -20,11 +20,18 @@
import re
import sys
from copy import deepcopy
from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound
from weboob.tools.log import getLogger
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.browser2.page import NextPage
from .filters.standard import _Filter, CleanText
from .filters.html import AttributeNotFound, XPathNotFound
__all__ = ['DataError', 'AbstractElement', 'ListElement', 'ItemElement', 'TableElement', 'SkipItem']
class DataError(Exception):
"""
Returned data from pages are incoherent.

View file

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .standard import * # NOQA
from .html import * # NOQA
from .json import * # NOQA

View file

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .standard import _Selector, _NO_DEFAULT, Filter, FilterError
__all__ = ['CSS', 'XPath', 'XPathNotFound', 'AttributeNotFound', 'Attr', 'Link']
class XPathNotFound(FilterError):
pass
class AttributeNotFound(FilterError):
pass
class CSS(_Selector):
@classmethod
def select(cls, selector, item):
return item.cssselect(selector)
class XPath(_Selector):
pass
class Attr(Filter):
def __init__(self, selector, attr, default=_NO_DEFAULT):
super(Attr, self).__init__(selector, default=default)
self.attr = attr
def filter(self, el):
try:
return u'%s' % el[0].attrib[self.attr]
except IndexError:
return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector))
except KeyError:
return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr)))
class Link(Attr):
"""
Get the link uri of an element.
If the <a> tag is not found, an exception IndexError is raised.
"""
def __init__(self, selector=None, default=_NO_DEFAULT):
super(Link, self).__init__(selector, 'href', default=default)

View file

@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2014 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .standard import _Selector, _NO_DEFAULT
__all__ = ['Dict']
class _DictMeta(type):
def __getitem__(cls, name):
return cls(name)
class Dict(_Selector):
__metaclass__ = _DictMeta
def __init__(self, selector=None, default=_NO_DEFAULT):
super(Dict, self).__init__(self, default=default)
self.selector = selector.split('/') if selector is not None else []
def __getitem__(self, name):
self.selector.append(name)
return self
@classmethod
def select(cls, selector, item):
if isinstance(item, dict):
content = item
else:
content = item.el
for el in selector:
if el not in content:
return None
content = content.get(el)
return content

View file

@ -33,22 +33,21 @@ from weboob.tools.html import html2text
_NO_DEFAULT = object()
__all__ = ['FilterError', 'ColumnNotFound', 'RegexpError', 'ItemNotFound',
'Filter', 'Base', 'Env', 'TableCell', 'CleanHTML', 'RawText',
'CleanText', 'Lower', 'CleanDecimal', 'Field', 'Regexp', 'Map',
'DateTime', 'Date', 'Time', 'DateGuesser', 'Duration',
'MultiFilter', 'CombineDate', 'Format', 'Join']
class FilterError(ParseError):
pass
class XPathNotFound(FilterError):
pass
class ColumnNotFound(FilterError):
pass
class AttributeNotFound(FilterError):
pass
class RegexpError(FilterError):
pass
@ -127,49 +126,6 @@ class _Selector(Filter):
return self.default_or_raise(ParseError('Element %r not found' % self.selector))
class _DictMeta(type):
def __getitem__(cls, name):
return cls(name)
class Dict(_Selector):
__metaclass__ = _DictMeta
def __init__(self, selector=None, default=_NO_DEFAULT):
super(Dict, self).__init__(self, default=default)
self.selector = selector.split('/') if selector is not None else []
def __getitem__(self, name):
self.selector.append(name)
return self
@classmethod
def select(cls, selector, item):
if isinstance(item, dict):
content = item
else:
content = item.el
for el in selector:
if el not in content:
return None
content = content.get(el)
return content
class CSS(_Selector):
@classmethod
def select(cls, selector, item):
return item.cssselect(selector)
class XPath(_Selector):
pass
class Base(Filter):
"""
Change the base element used in filters.
@ -343,31 +299,6 @@ class CleanDecimal(CleanText):
return self.default_or_raise(e)
class Attr(Filter):
def __init__(self, selector, attr, default=_NO_DEFAULT):
super(Attr, self).__init__(selector, default=default)
self.attr = attr
def filter(self, el):
try:
return u'%s' % el[0].attrib[self.attr]
except IndexError:
return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector))
except KeyError:
return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr)))
class Link(Attr):
"""
Get the link uri of an element.
If the <a> tag is not found, an exception IndexError is raised.
"""
def __init__(self, selector=None, default=_NO_DEFAULT):
super(Link, self).__init__(selector, 'href', default=default)
class Field(_Filter):
"""
Get the attribute of object.