diff --git a/modules/banqueaccord/pages.py b/modules/banqueaccord/pages.py index 41f52ad6..01ffb29d 100644 --- a/modules/banqueaccord/pages.py +++ b/modules/banqueaccord/pages.py @@ -26,9 +26,10 @@ from cStringIO import StringIO from weboob.capabilities.bank import Account from weboob.tools.browser2.page import HTMLPage, method, LoggedPage from weboob.tools.browser2.elements import ListElement, ItemElement -from weboob.tools.browser2.filters import ParseError, CleanText, Regexp, Attr, CleanDecimal, Env +from weboob.tools.browser2.filters import CleanText, Regexp, Attr, CleanDecimal, Env from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError from weboob.tools.capabilities.bank.transactions import FrenchTransaction +from weboob.tools.exceptions import ParseError __all__ = ['LoginPage', 'IndexPage', 'AccountsPage', 'OperationsPage'] diff --git a/modules/oney/pages.py b/modules/oney/pages.py index 5e4d6577..2dcf1172 100644 --- a/modules/oney/pages.py +++ b/modules/oney/pages.py @@ -27,7 +27,8 @@ from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardEr from weboob.tools.browser2.page import HTMLPage, method, LoggedPage, pagination from weboob.tools.browser2.elements import ListElement, ItemElement -from weboob.tools.browser2.filters import Env, CleanDecimal, ParseError +from weboob.tools.browser2.filters import Env, CleanDecimal +from weboob.tools.exceptions import ParseError __all__ = ['LoginPage', 'IndexPage', 'OperationsPage'] diff --git a/weboob/tools/browser2/elements.py b/weboob/tools/browser2/elements.py index 881b98cc..92c0c073 100644 --- a/weboob/tools/browser2/elements.py +++ b/weboob/tools/browser2/elements.py @@ -20,11 +20,18 @@ import re import sys from copy import deepcopy -from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound + from weboob.tools.log import getLogger from weboob.tools.ordereddict import OrderedDict from weboob.tools.browser2.page import NextPage +from .filters.standard import _Filter, CleanText +from .filters.html import AttributeNotFound, XPathNotFound + + +__all__ = ['DataError', 'AbstractElement', 'ListElement', 'ItemElement', 'TableElement', 'SkipItem'] + + class DataError(Exception): """ Returned data from pages are incoherent. diff --git a/weboob/tools/browser2/filters/__init__.py b/weboob/tools/browser2/filters/__init__.py new file mode 100644 index 00000000..ace9f160 --- /dev/null +++ b/weboob/tools/browser2/filters/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .standard import * # NOQA +from .html import * # NOQA +from .json import * # NOQA diff --git a/weboob/tools/browser2/filters/html.py b/weboob/tools/browser2/filters/html.py new file mode 100644 index 00000000..f7a4b1dd --- /dev/null +++ b/weboob/tools/browser2/filters/html.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .standard import _Selector, _NO_DEFAULT, Filter, FilterError + + +__all__ = ['CSS', 'XPath', 'XPathNotFound', 'AttributeNotFound', 'Attr', 'Link'] + + +class XPathNotFound(FilterError): + pass + + +class AttributeNotFound(FilterError): + pass + + +class CSS(_Selector): + @classmethod + def select(cls, selector, item): + return item.cssselect(selector) + + +class XPath(_Selector): + pass + + +class Attr(Filter): + def __init__(self, selector, attr, default=_NO_DEFAULT): + super(Attr, self).__init__(selector, default=default) + self.attr = attr + + def filter(self, el): + try: + return u'%s' % el[0].attrib[self.attr] + except IndexError: + return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector)) + except KeyError: + return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr))) + + +class Link(Attr): + """ + Get the link uri of an element. + + If the tag is not found, an exception IndexError is raised. + """ + + def __init__(self, selector=None, default=_NO_DEFAULT): + super(Link, self).__init__(selector, 'href', default=default) diff --git a/weboob/tools/browser2/filters/json.py b/weboob/tools/browser2/filters/json.py new file mode 100644 index 00000000..d05eacbb --- /dev/null +++ b/weboob/tools/browser2/filters/json.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .standard import _Selector, _NO_DEFAULT + + +__all__ = ['Dict'] + + +class _DictMeta(type): + def __getitem__(cls, name): + return cls(name) + + +class Dict(_Selector): + __metaclass__ = _DictMeta + + def __init__(self, selector=None, default=_NO_DEFAULT): + super(Dict, self).__init__(self, default=default) + self.selector = selector.split('/') if selector is not None else [] + + def __getitem__(self, name): + self.selector.append(name) + return self + + + @classmethod + def select(cls, selector, item): + if isinstance(item, dict): + content = item + else: + content = item.el + + for el in selector: + if el not in content: + return None + + content = content.get(el) + + return content + + + diff --git a/weboob/tools/browser2/filters.py b/weboob/tools/browser2/filters/standard.py similarity index 88% rename from weboob/tools/browser2/filters.py rename to weboob/tools/browser2/filters/standard.py index 09ef1d34..15558302 100644 --- a/weboob/tools/browser2/filters.py +++ b/weboob/tools/browser2/filters/standard.py @@ -33,22 +33,21 @@ from weboob.tools.html import html2text _NO_DEFAULT = object() +__all__ = ['FilterError', 'ColumnNotFound', 'RegexpError', 'ItemNotFound', + 'Filter', 'Base', 'Env', 'TableCell', 'CleanHTML', 'RawText', + 'CleanText', 'Lower', 'CleanDecimal', 'Field', 'Regexp', 'Map', + 'DateTime', 'Date', 'Time', 'DateGuesser', 'Duration', + 'MultiFilter', 'CombineDate', 'Format', 'Join'] + + class FilterError(ParseError): pass -class XPathNotFound(FilterError): - pass - - class ColumnNotFound(FilterError): pass -class AttributeNotFound(FilterError): - pass - - class RegexpError(FilterError): pass @@ -127,49 +126,6 @@ class _Selector(Filter): return self.default_or_raise(ParseError('Element %r not found' % self.selector)) -class _DictMeta(type): - def __getitem__(cls, name): - return cls(name) - - -class Dict(_Selector): - __metaclass__ = _DictMeta - - def __init__(self, selector=None, default=_NO_DEFAULT): - super(Dict, self).__init__(self, default=default) - self.selector = selector.split('/') if selector is not None else [] - - def __getitem__(self, name): - self.selector.append(name) - return self - - - @classmethod - def select(cls, selector, item): - if isinstance(item, dict): - content = item - else: - content = item.el - - for el in selector: - if el not in content: - return None - - content = content.get(el) - - return content - - -class CSS(_Selector): - @classmethod - def select(cls, selector, item): - return item.cssselect(selector) - - -class XPath(_Selector): - pass - - class Base(Filter): """ Change the base element used in filters. @@ -343,31 +299,6 @@ class CleanDecimal(CleanText): return self.default_or_raise(e) -class Attr(Filter): - def __init__(self, selector, attr, default=_NO_DEFAULT): - super(Attr, self).__init__(selector, default=default) - self.attr = attr - - def filter(self, el): - try: - return u'%s' % el[0].attrib[self.attr] - except IndexError: - return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector)) - except KeyError: - return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr))) - - -class Link(Attr): - """ - Get the link uri of an element. - - If the tag is not found, an exception IndexError is raised. - """ - - def __init__(self, selector=None, default=_NO_DEFAULT): - super(Link, self).__init__(selector, 'href', default=default) - - class Field(_Filter): """ Get the attribute of object.