split filters into several files
This commit is contained in:
parent
d9e04292c4
commit
c90b5844e4
7 changed files with 167 additions and 78 deletions
|
|
@ -26,9 +26,10 @@ from cStringIO import StringIO
|
|||
from weboob.capabilities.bank import Account
|
||||
from weboob.tools.browser2.page import HTMLPage, method, LoggedPage
|
||||
from weboob.tools.browser2.elements import ListElement, ItemElement
|
||||
from weboob.tools.browser2.filters import ParseError, CleanText, Regexp, Attr, CleanDecimal, Env
|
||||
from weboob.tools.browser2.filters import CleanText, Regexp, Attr, CleanDecimal, Env
|
||||
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
|
||||
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
|
||||
from weboob.tools.exceptions import ParseError
|
||||
|
||||
|
||||
__all__ = ['LoginPage', 'IndexPage', 'AccountsPage', 'OperationsPage']
|
||||
|
|
|
|||
|
|
@ -27,7 +27,8 @@ from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardEr
|
|||
|
||||
from weboob.tools.browser2.page import HTMLPage, method, LoggedPage, pagination
|
||||
from weboob.tools.browser2.elements import ListElement, ItemElement
|
||||
from weboob.tools.browser2.filters import Env, CleanDecimal, ParseError
|
||||
from weboob.tools.browser2.filters import Env, CleanDecimal
|
||||
from weboob.tools.exceptions import ParseError
|
||||
|
||||
__all__ = ['LoginPage', 'IndexPage', 'OperationsPage']
|
||||
|
||||
|
|
|
|||
|
|
@ -20,11 +20,18 @@
|
|||
import re
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound
|
||||
|
||||
from weboob.tools.log import getLogger
|
||||
from weboob.tools.ordereddict import OrderedDict
|
||||
from weboob.tools.browser2.page import NextPage
|
||||
|
||||
from .filters.standard import _Filter, CleanText
|
||||
from .filters.html import AttributeNotFound, XPathNotFound
|
||||
|
||||
|
||||
__all__ = ['DataError', 'AbstractElement', 'ListElement', 'ItemElement', 'TableElement', 'SkipItem']
|
||||
|
||||
|
||||
class DataError(Exception):
|
||||
"""
|
||||
Returned data from pages are incoherent.
|
||||
|
|
|
|||
22
weboob/tools/browser2/filters/__init__.py
Normal file
22
weboob/tools/browser2/filters/__init__.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2014 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from .standard import * # NOQA
|
||||
from .html import * # NOQA
|
||||
from .json import * # NOQA
|
||||
67
weboob/tools/browser2/filters/html.py
Normal file
67
weboob/tools/browser2/filters/html.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2014 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from .standard import _Selector, _NO_DEFAULT, Filter, FilterError
|
||||
|
||||
|
||||
__all__ = ['CSS', 'XPath', 'XPathNotFound', 'AttributeNotFound', 'Attr', 'Link']
|
||||
|
||||
|
||||
class XPathNotFound(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class AttributeNotFound(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class CSS(_Selector):
|
||||
@classmethod
|
||||
def select(cls, selector, item):
|
||||
return item.cssselect(selector)
|
||||
|
||||
|
||||
class XPath(_Selector):
|
||||
pass
|
||||
|
||||
|
||||
class Attr(Filter):
|
||||
def __init__(self, selector, attr, default=_NO_DEFAULT):
|
||||
super(Attr, self).__init__(selector, default=default)
|
||||
self.attr = attr
|
||||
|
||||
def filter(self, el):
|
||||
try:
|
||||
return u'%s' % el[0].attrib[self.attr]
|
||||
except IndexError:
|
||||
return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector))
|
||||
except KeyError:
|
||||
return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr)))
|
||||
|
||||
|
||||
class Link(Attr):
|
||||
"""
|
||||
Get the link uri of an element.
|
||||
|
||||
If the <a> tag is not found, an exception IndexError is raised.
|
||||
"""
|
||||
|
||||
def __init__(self, selector=None, default=_NO_DEFAULT):
|
||||
super(Link, self).__init__(selector, 'href', default=default)
|
||||
60
weboob/tools/browser2/filters/json.py
Normal file
60
weboob/tools/browser2/filters/json.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2014 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from .standard import _Selector, _NO_DEFAULT
|
||||
|
||||
|
||||
__all__ = ['Dict']
|
||||
|
||||
|
||||
class _DictMeta(type):
|
||||
def __getitem__(cls, name):
|
||||
return cls(name)
|
||||
|
||||
|
||||
class Dict(_Selector):
|
||||
__metaclass__ = _DictMeta
|
||||
|
||||
def __init__(self, selector=None, default=_NO_DEFAULT):
|
||||
super(Dict, self).__init__(self, default=default)
|
||||
self.selector = selector.split('/') if selector is not None else []
|
||||
|
||||
def __getitem__(self, name):
|
||||
self.selector.append(name)
|
||||
return self
|
||||
|
||||
|
||||
@classmethod
|
||||
def select(cls, selector, item):
|
||||
if isinstance(item, dict):
|
||||
content = item
|
||||
else:
|
||||
content = item.el
|
||||
|
||||
for el in selector:
|
||||
if el not in content:
|
||||
return None
|
||||
|
||||
content = content.get(el)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
|
||||
|
|
@ -33,22 +33,21 @@ from weboob.tools.html import html2text
|
|||
_NO_DEFAULT = object()
|
||||
|
||||
|
||||
__all__ = ['FilterError', 'ColumnNotFound', 'RegexpError', 'ItemNotFound',
|
||||
'Filter', 'Base', 'Env', 'TableCell', 'CleanHTML', 'RawText',
|
||||
'CleanText', 'Lower', 'CleanDecimal', 'Field', 'Regexp', 'Map',
|
||||
'DateTime', 'Date', 'Time', 'DateGuesser', 'Duration',
|
||||
'MultiFilter', 'CombineDate', 'Format', 'Join']
|
||||
|
||||
|
||||
class FilterError(ParseError):
|
||||
pass
|
||||
|
||||
|
||||
class XPathNotFound(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class ColumnNotFound(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class AttributeNotFound(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class RegexpError(FilterError):
|
||||
pass
|
||||
|
||||
|
|
@ -127,49 +126,6 @@ class _Selector(Filter):
|
|||
return self.default_or_raise(ParseError('Element %r not found' % self.selector))
|
||||
|
||||
|
||||
class _DictMeta(type):
|
||||
def __getitem__(cls, name):
|
||||
return cls(name)
|
||||
|
||||
|
||||
class Dict(_Selector):
|
||||
__metaclass__ = _DictMeta
|
||||
|
||||
def __init__(self, selector=None, default=_NO_DEFAULT):
|
||||
super(Dict, self).__init__(self, default=default)
|
||||
self.selector = selector.split('/') if selector is not None else []
|
||||
|
||||
def __getitem__(self, name):
|
||||
self.selector.append(name)
|
||||
return self
|
||||
|
||||
|
||||
@classmethod
|
||||
def select(cls, selector, item):
|
||||
if isinstance(item, dict):
|
||||
content = item
|
||||
else:
|
||||
content = item.el
|
||||
|
||||
for el in selector:
|
||||
if el not in content:
|
||||
return None
|
||||
|
||||
content = content.get(el)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
class CSS(_Selector):
|
||||
@classmethod
|
||||
def select(cls, selector, item):
|
||||
return item.cssselect(selector)
|
||||
|
||||
|
||||
class XPath(_Selector):
|
||||
pass
|
||||
|
||||
|
||||
class Base(Filter):
|
||||
"""
|
||||
Change the base element used in filters.
|
||||
|
|
@ -343,31 +299,6 @@ class CleanDecimal(CleanText):
|
|||
return self.default_or_raise(e)
|
||||
|
||||
|
||||
class Attr(Filter):
|
||||
def __init__(self, selector, attr, default=_NO_DEFAULT):
|
||||
super(Attr, self).__init__(selector, default=default)
|
||||
self.attr = attr
|
||||
|
||||
def filter(self, el):
|
||||
try:
|
||||
return u'%s' % el[0].attrib[self.attr]
|
||||
except IndexError:
|
||||
return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector))
|
||||
except KeyError:
|
||||
return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr)))
|
||||
|
||||
|
||||
class Link(Attr):
|
||||
"""
|
||||
Get the link uri of an element.
|
||||
|
||||
If the <a> tag is not found, an exception IndexError is raised.
|
||||
"""
|
||||
|
||||
def __init__(self, selector=None, default=_NO_DEFAULT):
|
||||
super(Link, self).__init__(selector, 'href', default=default)
|
||||
|
||||
|
||||
class Field(_Filter):
|
||||
"""
|
||||
Get the attribute of object.
|
||||
Loading…
Add table
Add a link
Reference in a new issue