split filters into several files
This commit is contained in:
parent
d9e04292c4
commit
c90b5844e4
7 changed files with 167 additions and 78 deletions
|
|
@ -26,9 +26,10 @@ from cStringIO import StringIO
|
||||||
from weboob.capabilities.bank import Account
|
from weboob.capabilities.bank import Account
|
||||||
from weboob.tools.browser2.page import HTMLPage, method, LoggedPage
|
from weboob.tools.browser2.page import HTMLPage, method, LoggedPage
|
||||||
from weboob.tools.browser2.elements import ListElement, ItemElement
|
from weboob.tools.browser2.elements import ListElement, ItemElement
|
||||||
from weboob.tools.browser2.filters import ParseError, CleanText, Regexp, Attr, CleanDecimal, Env
|
from weboob.tools.browser2.filters import CleanText, Regexp, Attr, CleanDecimal, Env
|
||||||
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
|
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
|
||||||
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
|
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
|
||||||
|
from weboob.tools.exceptions import ParseError
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['LoginPage', 'IndexPage', 'AccountsPage', 'OperationsPage']
|
__all__ = ['LoginPage', 'IndexPage', 'AccountsPage', 'OperationsPage']
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,8 @@ from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardEr
|
||||||
|
|
||||||
from weboob.tools.browser2.page import HTMLPage, method, LoggedPage, pagination
|
from weboob.tools.browser2.page import HTMLPage, method, LoggedPage, pagination
|
||||||
from weboob.tools.browser2.elements import ListElement, ItemElement
|
from weboob.tools.browser2.elements import ListElement, ItemElement
|
||||||
from weboob.tools.browser2.filters import Env, CleanDecimal, ParseError
|
from weboob.tools.browser2.filters import Env, CleanDecimal
|
||||||
|
from weboob.tools.exceptions import ParseError
|
||||||
|
|
||||||
__all__ = ['LoginPage', 'IndexPage', 'OperationsPage']
|
__all__ = ['LoginPage', 'IndexPage', 'OperationsPage']
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,11 +20,18 @@
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound
|
|
||||||
from weboob.tools.log import getLogger
|
from weboob.tools.log import getLogger
|
||||||
from weboob.tools.ordereddict import OrderedDict
|
from weboob.tools.ordereddict import OrderedDict
|
||||||
from weboob.tools.browser2.page import NextPage
|
from weboob.tools.browser2.page import NextPage
|
||||||
|
|
||||||
|
from .filters.standard import _Filter, CleanText
|
||||||
|
from .filters.html import AttributeNotFound, XPathNotFound
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['DataError', 'AbstractElement', 'ListElement', 'ItemElement', 'TableElement', 'SkipItem']
|
||||||
|
|
||||||
|
|
||||||
class DataError(Exception):
|
class DataError(Exception):
|
||||||
"""
|
"""
|
||||||
Returned data from pages are incoherent.
|
Returned data from pages are incoherent.
|
||||||
|
|
|
||||||
22
weboob/tools/browser2/filters/__init__.py
Normal file
22
weboob/tools/browser2/filters/__init__.py
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2014 Romain Bignon
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
from .standard import * # NOQA
|
||||||
|
from .html import * # NOQA
|
||||||
|
from .json import * # NOQA
|
||||||
67
weboob/tools/browser2/filters/html.py
Normal file
67
weboob/tools/browser2/filters/html.py
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2014 Romain Bignon
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from .standard import _Selector, _NO_DEFAULT, Filter, FilterError
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['CSS', 'XPath', 'XPathNotFound', 'AttributeNotFound', 'Attr', 'Link']
|
||||||
|
|
||||||
|
|
||||||
|
class XPathNotFound(FilterError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class AttributeNotFound(FilterError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CSS(_Selector):
|
||||||
|
@classmethod
|
||||||
|
def select(cls, selector, item):
|
||||||
|
return item.cssselect(selector)
|
||||||
|
|
||||||
|
|
||||||
|
class XPath(_Selector):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Attr(Filter):
|
||||||
|
def __init__(self, selector, attr, default=_NO_DEFAULT):
|
||||||
|
super(Attr, self).__init__(selector, default=default)
|
||||||
|
self.attr = attr
|
||||||
|
|
||||||
|
def filter(self, el):
|
||||||
|
try:
|
||||||
|
return u'%s' % el[0].attrib[self.attr]
|
||||||
|
except IndexError:
|
||||||
|
return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector))
|
||||||
|
except KeyError:
|
||||||
|
return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr)))
|
||||||
|
|
||||||
|
|
||||||
|
class Link(Attr):
|
||||||
|
"""
|
||||||
|
Get the link uri of an element.
|
||||||
|
|
||||||
|
If the <a> tag is not found, an exception IndexError is raised.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, selector=None, default=_NO_DEFAULT):
|
||||||
|
super(Link, self).__init__(selector, 'href', default=default)
|
||||||
60
weboob/tools/browser2/filters/json.py
Normal file
60
weboob/tools/browser2/filters/json.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2014 Romain Bignon
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
from .standard import _Selector, _NO_DEFAULT
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['Dict']
|
||||||
|
|
||||||
|
|
||||||
|
class _DictMeta(type):
|
||||||
|
def __getitem__(cls, name):
|
||||||
|
return cls(name)
|
||||||
|
|
||||||
|
|
||||||
|
class Dict(_Selector):
|
||||||
|
__metaclass__ = _DictMeta
|
||||||
|
|
||||||
|
def __init__(self, selector=None, default=_NO_DEFAULT):
|
||||||
|
super(Dict, self).__init__(self, default=default)
|
||||||
|
self.selector = selector.split('/') if selector is not None else []
|
||||||
|
|
||||||
|
def __getitem__(self, name):
|
||||||
|
self.selector.append(name)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def select(cls, selector, item):
|
||||||
|
if isinstance(item, dict):
|
||||||
|
content = item
|
||||||
|
else:
|
||||||
|
content = item.el
|
||||||
|
|
||||||
|
for el in selector:
|
||||||
|
if el not in content:
|
||||||
|
return None
|
||||||
|
|
||||||
|
content = content.get(el)
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -33,22 +33,21 @@ from weboob.tools.html import html2text
|
||||||
_NO_DEFAULT = object()
|
_NO_DEFAULT = object()
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['FilterError', 'ColumnNotFound', 'RegexpError', 'ItemNotFound',
|
||||||
|
'Filter', 'Base', 'Env', 'TableCell', 'CleanHTML', 'RawText',
|
||||||
|
'CleanText', 'Lower', 'CleanDecimal', 'Field', 'Regexp', 'Map',
|
||||||
|
'DateTime', 'Date', 'Time', 'DateGuesser', 'Duration',
|
||||||
|
'MultiFilter', 'CombineDate', 'Format', 'Join']
|
||||||
|
|
||||||
|
|
||||||
class FilterError(ParseError):
|
class FilterError(ParseError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class XPathNotFound(FilterError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class ColumnNotFound(FilterError):
|
class ColumnNotFound(FilterError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class AttributeNotFound(FilterError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class RegexpError(FilterError):
|
class RegexpError(FilterError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -127,49 +126,6 @@ class _Selector(Filter):
|
||||||
return self.default_or_raise(ParseError('Element %r not found' % self.selector))
|
return self.default_or_raise(ParseError('Element %r not found' % self.selector))
|
||||||
|
|
||||||
|
|
||||||
class _DictMeta(type):
|
|
||||||
def __getitem__(cls, name):
|
|
||||||
return cls(name)
|
|
||||||
|
|
||||||
|
|
||||||
class Dict(_Selector):
|
|
||||||
__metaclass__ = _DictMeta
|
|
||||||
|
|
||||||
def __init__(self, selector=None, default=_NO_DEFAULT):
|
|
||||||
super(Dict, self).__init__(self, default=default)
|
|
||||||
self.selector = selector.split('/') if selector is not None else []
|
|
||||||
|
|
||||||
def __getitem__(self, name):
|
|
||||||
self.selector.append(name)
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def select(cls, selector, item):
|
|
||||||
if isinstance(item, dict):
|
|
||||||
content = item
|
|
||||||
else:
|
|
||||||
content = item.el
|
|
||||||
|
|
||||||
for el in selector:
|
|
||||||
if el not in content:
|
|
||||||
return None
|
|
||||||
|
|
||||||
content = content.get(el)
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
class CSS(_Selector):
|
|
||||||
@classmethod
|
|
||||||
def select(cls, selector, item):
|
|
||||||
return item.cssselect(selector)
|
|
||||||
|
|
||||||
|
|
||||||
class XPath(_Selector):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Base(Filter):
|
class Base(Filter):
|
||||||
"""
|
"""
|
||||||
Change the base element used in filters.
|
Change the base element used in filters.
|
||||||
|
|
@ -343,31 +299,6 @@ class CleanDecimal(CleanText):
|
||||||
return self.default_or_raise(e)
|
return self.default_or_raise(e)
|
||||||
|
|
||||||
|
|
||||||
class Attr(Filter):
|
|
||||||
def __init__(self, selector, attr, default=_NO_DEFAULT):
|
|
||||||
super(Attr, self).__init__(selector, default=default)
|
|
||||||
self.attr = attr
|
|
||||||
|
|
||||||
def filter(self, el):
|
|
||||||
try:
|
|
||||||
return u'%s' % el[0].attrib[self.attr]
|
|
||||||
except IndexError:
|
|
||||||
return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector))
|
|
||||||
except KeyError:
|
|
||||||
return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr)))
|
|
||||||
|
|
||||||
|
|
||||||
class Link(Attr):
|
|
||||||
"""
|
|
||||||
Get the link uri of an element.
|
|
||||||
|
|
||||||
If the <a> tag is not found, an exception IndexError is raised.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, selector=None, default=_NO_DEFAULT):
|
|
||||||
super(Link, self).__init__(selector, 'href', default=default)
|
|
||||||
|
|
||||||
|
|
||||||
class Field(_Filter):
|
class Field(_Filter):
|
||||||
"""
|
"""
|
||||||
Get the attribute of object.
|
Get the attribute of object.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue