diff --git a/modules/alloresto/pages.py b/modules/alloresto/pages.py
index 3000a156..48367911 100644
--- a/modules/alloresto/pages.py
+++ b/modules/alloresto/pages.py
@@ -21,7 +21,8 @@
import datetime
from decimal import Decimal
-from weboob.tools.browser2.page import HTMLPage, LoggedPage, method, ItemElement
+from weboob.tools.browser2.page import HTMLPage, LoggedPage, method
+from weboob.tools.browser2.elements import ItemElement
from weboob.tools.browser2.filters import CleanDecimal, CleanText, Filter, TableCell
from weboob.capabilities.bank import Account
from weboob.tools.capabilities.bank.transactions import FrenchTransaction as Transaction
diff --git a/modules/banqueaccord/pages.py b/modules/banqueaccord/pages.py
index f0cdc54b..41f52ad6 100644
--- a/modules/banqueaccord/pages.py
+++ b/modules/banqueaccord/pages.py
@@ -24,7 +24,8 @@ import re
from cStringIO import StringIO
from weboob.capabilities.bank import Account
-from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, LoggedPage
+from weboob.tools.browser2.page import HTMLPage, method, LoggedPage
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import ParseError, CleanText, Regexp, Attr, CleanDecimal, Env
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
diff --git a/modules/biplan/pages.py b/modules/biplan/pages.py
index 90f92050..a9f5e508 100644
--- a/modules/biplan/pages.py
+++ b/modules/biplan/pages.py
@@ -23,7 +23,8 @@ from datetime import datetime, time
import weboob.tools.date as date_util
from .calendar import BiplanCalendarEventConcert, BiplanCalendarEventTheatre
-from weboob.tools.browser2.page import HTMLPage, method, ItemElement, SkipItem, ListElement
+from weboob.tools.browser2.elements import ItemElement, SkipItem, ListElement
+from weboob.tools.browser2.page import HTMLPage, method
from weboob.tools.browser2.filters import Filter, Link, CleanText, Env, Regexp, CombineDate, CleanHTML
diff --git a/modules/carrefourbanque/pages.py b/modules/carrefourbanque/pages.py
index dc89c1c3..6301423f 100644
--- a/modules/carrefourbanque/pages.py
+++ b/modules/carrefourbanque/pages.py
@@ -20,7 +20,8 @@
import re
-from weboob.tools.browser2.page import HTMLPage, ListElement, ItemElement, method, LoggedPage
+from weboob.tools.browser2.page import HTMLPage, method, LoggedPage
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import Regexp, CleanText, CleanDecimal, Format, Link
from weboob.capabilities.bank import Account
diff --git a/modules/cci/pages.py b/modules/cci/pages.py
index 88b2cbbd..5173d3c8 100644
--- a/modules/cci/pages.py
+++ b/modules/cci/pages.py
@@ -17,7 +17,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-from weboob.tools.browser2.page import HTMLPage, method, ItemElement, TableElement
+from weboob.tools.browser2.page import HTMLPage, method
+from weboob.tools.browser2.elements import ItemElement, TableElement
from weboob.tools.browser2.filters import Filter, Link, CleanText, Format, Env, DateTime, CleanHTML, TableCell, Join
from weboob.capabilities.job import BaseJobAdvert
diff --git a/modules/creditmutuel/pages.py b/modules/creditmutuel/pages.py
index 22cfda26..d63666f2 100644
--- a/modules/creditmutuel/pages.py
+++ b/modules/creditmutuel/pages.py
@@ -27,7 +27,8 @@ from decimal import Decimal
import re
from dateutil.relativedelta import relativedelta
-from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, SkipItem, FormNotFound, LoggedPage
+from weboob.tools.browser2.page import HTMLPage, method, FormNotFound, LoggedPage
+from weboob.tools.browser2.elements import ListElement, ItemElement, SkipItem
from weboob.tools.browser2.filters import Filter, Env, CleanText, CleanDecimal, Link, Field, TableCell
from weboob.tools.exceptions import BrowserIncorrectPassword
from weboob.capabilities import NotAvailable
diff --git a/modules/dresdenwetter/pages.py b/modules/dresdenwetter/pages.py
index 32010409..0d211fcd 100644
--- a/modules/dresdenwetter/pages.py
+++ b/modules/dresdenwetter/pages.py
@@ -17,7 +17,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement
+from weboob.tools.browser2.page import HTMLPage, method
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import CleanText, Regexp, Field, Filter
from weboob.capabilities.gauge import GaugeMeasure, GaugeSensor
from weboob.capabilities.base import NotAvailable
diff --git a/modules/feedly/pages.py b/modules/feedly/pages.py
index 803184d0..8de43bab 100644
--- a/modules/feedly/pages.py
+++ b/modules/feedly/pages.py
@@ -21,7 +21,8 @@ from datetime import datetime
from weboob.capabilities.messages import Message
from weboob.capabilities.collection import Collection
-from weboob.tools.browser2.page import JsonPage, ListElement, method, ItemElement
+from weboob.tools.browser2.page import JsonPage, method
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import CleanText, Dict, Format, CleanHTML
__all__ = ['TokenPage', 'ContentsPage', 'PreferencesPage']
diff --git a/modules/francetelevisions/pages.py b/modules/francetelevisions/pages.py
index 1c81725b..c01d8ccd 100644
--- a/modules/francetelevisions/pages.py
+++ b/modules/francetelevisions/pages.py
@@ -22,7 +22,8 @@ from weboob.capabilities.video import BaseVideo
from datetime import timedelta
-from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, JsonPage
+from weboob.tools.browser2.page import HTMLPage, method, JsonPage
+from weboob.tools.browser2.elements import ItemElement, ListElement
from weboob.tools.browser2.filters import Filter, Link, CleanText, Regexp, Attr, Format, DateTime, Env, Dict, Duration, XPath
diff --git a/modules/freemobile/pages/history.py b/modules/freemobile/pages/history.py
index 19910e0e..1eab151e 100644
--- a/modules/freemobile/pages/history.py
+++ b/modules/freemobile/pages/history.py
@@ -24,7 +24,8 @@ import lxml.html as html
from datetime import datetime
from decimal import Decimal
-from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, LoggedPage
+from weboob.tools.browser2.page import HTMLPage, method, LoggedPage
+from weboob.tools.browser2.elements import ItemElement, ListElement
from weboob.tools.browser2.filters import Date, CleanText, Attr, Filter,\
CleanDecimal, Regexp, Field, DateTime, Format, Env
from weboob.capabilities.bill import Detail, Bill
diff --git a/modules/freemobile/pages/homepage.py b/modules/freemobile/pages/homepage.py
index fec6d14a..72b34f12 100644
--- a/modules/freemobile/pages/homepage.py
+++ b/modules/freemobile/pages/homepage.py
@@ -19,7 +19,8 @@
from .history import BadUTF8Page
from weboob.capabilities.bill import Subscription
-from weboob.tools.browser2.page import method, ListElement, ItemElement
+from weboob.tools.browser2.page import method
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import CleanText, Attr, Field, Format, Filter
__all__ = ['HomePage']
diff --git a/modules/groupamaes/pages.py b/modules/groupamaes/pages.py
index 55b2eca3..f7e3c4fe 100644
--- a/modules/groupamaes/pages.py
+++ b/modules/groupamaes/pages.py
@@ -18,7 +18,8 @@
# along with weboob. If not, see .
-from weboob.tools.browser2.page import HTMLPage, method, TableElement, ItemElement, LoggedPage
+from weboob.tools.browser2.page import HTMLPage, method, LoggedPage
+from weboob.tools.browser2.elements import TableElement, ItemElement
from weboob.tools.browser2.filters import CleanText, CleanDecimal, TableCell, Date
from weboob.capabilities.bank import Account, Transaction
from weboob.tools.date import LinearDateGuesser
diff --git a/modules/hsbc/pages.py b/modules/hsbc/pages.py
index 09e437b9..42309e3e 100644
--- a/modules/hsbc/pages.py
+++ b/modules/hsbc/pages.py
@@ -25,7 +25,8 @@ from weboob.capabilities.bank import Account
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
from weboob.tools.exceptions import BrowserIncorrectPassword
-from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, SkipItem, LoggedPage, pagination
+from weboob.tools.browser2.elements import ListElement, ItemElement, SkipItem
+from weboob.tools.browser2.page import HTMLPage, method, LoggedPage, pagination
from weboob.tools.browser2.filters import Filter, Env, CleanText, CleanDecimal, Link, Field, DateGuesser, TableCell
diff --git a/modules/hybride/pages.py b/modules/hybride/pages.py
index 1a71f54e..b6ea3f8a 100644
--- a/modules/hybride/pages.py
+++ b/modules/hybride/pages.py
@@ -23,7 +23,8 @@ from .calendar import HybrideCalendarEvent
import weboob.tools.date as date_util
import re
-from weboob.tools.browser2.page import HTMLPage, method, ItemElement, SkipItem, ListElement
+from weboob.tools.browser2.page import HTMLPage, method
+from weboob.tools.browser2.elements import ItemElement, SkipItem, ListElement
from weboob.tools.browser2.filters import Filter, Link, CleanText, Env
diff --git a/modules/indeed/pages.py b/modules/indeed/pages.py
index 88d13720..a73ddd5d 100644
--- a/modules/indeed/pages.py
+++ b/modules/indeed/pages.py
@@ -19,7 +19,8 @@
from datetime import timedelta, datetime
import re
-from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, pagination
+from weboob.tools.browser2.page import HTMLPage, method, pagination
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import Filter, CleanText, Regexp, Format, Env, CleanHTML, Attr
from weboob.capabilities.job import BaseJobAdvert
diff --git a/modules/ing/pages/accounts_list.py b/modules/ing/pages/accounts_list.py
index 4daf512c..58dcb483 100644
--- a/modules/ing/pages/accounts_list.py
+++ b/modules/ing/pages/accounts_list.py
@@ -24,7 +24,8 @@ import re
from weboob.capabilities.bank import Account
from weboob.capabilities.base import NotAvailable
-from weboob.tools.browser2.page import HTMLPage, LoggedPage, method, ListElement, ItemElement
+from weboob.tools.browser2.page import HTMLPage, LoggedPage, method
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import Attr, CleanText, CleanDecimal, Filter, Field, MultiFilter, Date, Lower
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
diff --git a/modules/ing/pages/bills.py b/modules/ing/pages/bills.py
index 0491ee69..bafecaeb 100644
--- a/modules/ing/pages/bills.py
+++ b/modules/ing/pages/bills.py
@@ -20,7 +20,8 @@
from weboob.capabilities.bill import Bill, Subscription
from weboob.tools.browser2 import HTMLPage, LoggedPage
from weboob.tools.browser2.filters import Filter, Attr, CleanText, Format, Field, Env
-from weboob.tools.browser2.page import ListElement, ItemElement, method, pagination
+from weboob.tools.browser2.page import method, pagination
+from weboob.tools.browser2.elements import ListElement, ItemElement
__all__ = ['BillsPage']
diff --git a/modules/ing/pages/titre.py b/modules/ing/pages/titre.py
index a1463bec..aa5913b8 100644
--- a/modules/ing/pages/titre.py
+++ b/modules/ing/pages/titre.py
@@ -21,7 +21,8 @@
from decimal import Decimal
from weboob.capabilities.bank import Investment
-from weboob.tools.browser2.page import RawPage, HTMLPage, method, ListElement, ItemElement
+from weboob.tools.browser2.page import RawPage, HTMLPage, method
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import CleanDecimal, CleanText, Date
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
diff --git a/modules/ing/pages/transfer.py b/modules/ing/pages/transfer.py
index c09821dd..0ca8da6c 100644
--- a/modules/ing/pages/transfer.py
+++ b/modules/ing/pages/transfer.py
@@ -18,7 +18,8 @@
# along with weboob. If not, see .
from weboob.capabilities.bank import Recipient, AccountNotFound, Transfer
-from weboob.tools.browser2.page import HTMLPage, LoggedPage, ListElement, ItemElement, method
+from weboob.tools.browser2.page import HTMLPage, LoggedPage, method
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import CleanText, CleanDecimal, Attr, Format
from .login import INGVirtKeyboard
diff --git a/modules/oney/pages.py b/modules/oney/pages.py
index 0d557f23..5e4d6577 100644
--- a/modules/oney/pages.py
+++ b/modules/oney/pages.py
@@ -25,7 +25,8 @@ import requests
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
-from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, LoggedPage, pagination
+from weboob.tools.browser2.page import HTMLPage, method, LoggedPage, pagination
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import Env, CleanDecimal, ParseError
__all__ = ['LoginPage', 'IndexPage', 'OperationsPage']
diff --git a/modules/pastealacon/browser.py b/modules/pastealacon/browser.py
index db0a9741..b672052b 100644
--- a/modules/pastealacon/browser.py
+++ b/modules/pastealacon/browser.py
@@ -21,7 +21,8 @@ import re
from weboob.capabilities.paste import BasePaste, PasteNotFound
from weboob.tools.browser2.filters import CleanText, DateTime, Env, RawText, Regexp
-from weboob.tools.browser2.page import HTMLPage, ItemElement, method, PagesBrowser, URL
+from weboob.tools.browser2.page import HTMLPage, method, PagesBrowser, URL
+from weboob.tools.browser2.elements import ItemElement
from weboob.tools.exceptions import BrowserHTTPNotFound
diff --git a/modules/pastebin/browser.py b/modules/pastebin/browser.py
index 835c4747..da97b8f4 100644
--- a/modules/pastebin/browser.py
+++ b/modules/pastebin/browser.py
@@ -23,7 +23,8 @@ import re
from weboob.capabilities.paste import BasePaste, PasteNotFound
from weboob.tools.browser2 import HTMLPage, LoginBrowser, need_login, URL
from weboob.tools.browser2.filters import Attr, Base, CleanText, DateTime, Env, Filter, FilterError, RawText
-from weboob.tools.browser2.page import ItemElement, method, RawPage
+from weboob.tools.browser2.page import method, RawPage
+from weboob.tools.browser2.elements import ItemElement
from weboob.tools.exceptions import BrowserHTTPNotFound, BrowserIncorrectPassword, BrowserUnavailable
diff --git a/modules/poivy/pages.py b/modules/poivy/pages.py
index 7f29d0bf..ab85e294 100644
--- a/modules/poivy/pages.py
+++ b/modules/poivy/pages.py
@@ -18,7 +18,8 @@
# along with weboob. If not, see .
from weboob.tools.exceptions import BrowserBanned
-from weboob.tools.browser2.page import HTMLPage, LoggedPage, method, ListElement, ItemElement, pagination
+from weboob.tools.browser2.page import HTMLPage, LoggedPage, method, pagination
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import CleanText, CleanDecimal, Field, Attr, DateTime, Link, Format
from weboob.capabilities.bill import Subscription, Detail
diff --git a/modules/regionsjob/pages.py b/modules/regionsjob/pages.py
index b0ee3199..78519d35 100644
--- a/modules/regionsjob/pages.py
+++ b/modules/regionsjob/pages.py
@@ -17,7 +17,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-from weboob.tools.browser2.page import HTMLPage, method, ItemElement, SkipItem, ListElement
+from weboob.tools.browser2.page import HTMLPage, method
+from weboob.tools.browser2.elements import ItemElement, SkipItem, ListElement
from weboob.tools.browser2.filters import Link, CleanText, Regexp, Format, Env, DateGuesser, CleanHTML, DateTime
from weboob.tools.date import LinearDateGuesser
from weboob.capabilities.job import BaseJobAdvert
diff --git a/modules/sachsen/pages.py b/modules/sachsen/pages.py
index 2767a19b..826a387f 100644
--- a/modules/sachsen/pages.py
+++ b/modules/sachsen/pages.py
@@ -17,7 +17,8 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement
+from weboob.tools.browser2.page import HTMLPage, method
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import Env, CleanText, Regexp, Field, DateTime, Map, Attr
from weboob.capabilities.gauge import Gauge, GaugeMeasure, GaugeSensor
from weboob.capabilities.base import NotAvailable, NotLoaded
diff --git a/modules/senscritique/pages.py b/modules/senscritique/pages.py
index 5f66d41a..a165e1c1 100644
--- a/modules/senscritique/pages.py
+++ b/modules/senscritique/pages.py
@@ -22,7 +22,8 @@ from .calendar import SensCritiquenCalendarEvent
from datetime import date, datetime, time, timedelta
-from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, JsonPage
+from weboob.tools.browser2.page import HTMLPage, method, JsonPage
+from weboob.tools.browser2.elements import ItemElement, ListElement
from weboob.tools.browser2.filters import Filter, Link, CleanText, Regexp, Attr, Join, Format
diff --git a/modules/twitter/pages.py b/modules/twitter/pages.py
index 848bdd77..2164353b 100644
--- a/modules/twitter/pages.py
+++ b/modules/twitter/pages.py
@@ -23,7 +23,8 @@ from io import StringIO
import lxml.html as html
import urllib
-from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination
+from weboob.tools.browser2.page import HTMLPage, JsonPage, method, FormNotFound, pagination
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter
from weboob.capabilities.messages import Thread, Message
from weboob.capabilities.base import BaseObject
diff --git a/modules/vlille/pages.py b/modules/vlille/pages.py
index afab9dc0..2cd3c86f 100644
--- a/modules/vlille/pages.py
+++ b/modules/vlille/pages.py
@@ -18,7 +18,8 @@
# along with weboob. If not, see .
-from weboob.tools.browser2.page import HTMLPage, XMLPage, method, ListElement, ItemElement, TableElement
+from weboob.tools.browser2.page import HTMLPage, XMLPage, method
+from weboob.tools.browser2.elements import ListElement, ItemElement, TableElement
from weboob.tools.browser2.filters import CleanText, TableCell, Filter
from weboob.capabilities.gauge import Gauge, GaugeMeasure, GaugeSensor
diff --git a/modules/youjizz/pages/index.py b/modules/youjizz/pages/index.py
index 280761b7..6f401ec6 100644
--- a/modules/youjizz/pages/index.py
+++ b/modules/youjizz/pages/index.py
@@ -19,7 +19,8 @@
from weboob.tools.browser2 import HTMLPage
-from weboob.tools.browser2.page import ListElement, method, ItemElement, pagination
+from weboob.tools.browser2.page import method, pagination
+from weboob.tools.browser2.elements import ListElement, ItemElement
from weboob.tools.browser2.filters import Link, CleanText, Duration, Regexp, CSS
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.image import BaseImage
diff --git a/modules/youjizz/pages/video.py b/modules/youjizz/pages/video.py
index b64f0265..1b59b5ca 100644
--- a/modules/youjizz/pages/video.py
+++ b/modules/youjizz/pages/video.py
@@ -20,8 +20,8 @@
import re
-from weboob.tools.browser2 import HTMLPage
-from weboob.tools.browser2.page import method, ItemElement
+from weboob.tools.browser2.page import method, HTMLPage
+from weboob.tools.browser2.elements import ItemElement
from weboob.tools.browser2.filters import CleanText, Env, Duration
from weboob.capabilities.video import BaseVideo
from weboob.tools.misc import to_unicode
diff --git a/weboob/tools/browser2/elements.py b/weboob/tools/browser2/elements.py
new file mode 100644
index 00000000..97d545c3
--- /dev/null
+++ b/weboob/tools/browser2/elements.py
@@ -0,0 +1,241 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+import re
+import sys
+from copy import deepcopy
+from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound
+from weboob.tools.log import getLogger
+from weboob.tools.browser2.page import NextPage
+
+class DataError(Exception):
+ """
+ Returned data from pages are incoherent.
+ """
+
+
+class AbstractElement(object):
+ def __init__(self, page, parent=None, el=None):
+ self.page = page
+ self.parent = parent
+ if el is not None:
+ self.el = el
+ elif parent is not None:
+ self.el = parent.el
+ else:
+ self.el = page.doc
+
+ if parent is not None:
+ self.env = deepcopy(parent.env)
+ else:
+ self.env = deepcopy(page.params)
+
+ def use_selector(self, func):
+ if isinstance(func, _Filter):
+ value = func(self)
+ elif callable(func):
+ value = func()
+ else:
+ value = deepcopy(func)
+
+ return value
+
+ def parse(self, obj):
+ pass
+
+ def cssselect(self, *args, **kwargs):
+ return self.el.cssselect(*args, **kwargs)
+
+ def xpath(self, *args, **kwargs):
+ return self.el.xpath(*args, **kwargs)
+
+
+class SkipItem(Exception):
+ """
+ Raise this exception in an :class:`ItemElement` subclass to skip an item.
+ """
+
+
+class _ItemElementMeta(type):
+ """
+ Private meta-class used to keep order of obj_* attributes in :class:`ItemElement`.
+ """
+ def __new__(mcs, name, bases, attrs):
+ _attrs = []
+ for base in bases:
+ if hasattr(base, '_attrs'):
+ _attrs += base._attrs
+
+ filters = [(re.sub('^obj_', '', attr_name), attrs[attr_name]) for attr_name, obj in attrs.items() if attr_name.startswith('obj_')]
+ # constants first, then filters, then methods
+ filters.sort(key=lambda x: x[1]._creation_counter if hasattr(x[1], '_creation_counter') else (sys.maxint if callable(x[1]) else 0))
+
+ new_class = super(_ItemElementMeta, mcs).__new__(mcs, name, bases, attrs)
+ new_class._attrs = _attrs + [f[0] for f in filters]
+ return new_class
+
+
+class ItemElement(AbstractElement):
+ __metaclass__ = _ItemElementMeta
+
+ _attrs = None
+ klass = None
+ condition = None
+ validate = None
+
+ class Index(object):
+ pass
+
+ def __init__(self, *args, **kwargs):
+ super(ItemElement, self).__init__(*args, **kwargs)
+ self.obj = None
+
+ def build_object(self):
+ if self.klass is None:
+ return
+ return self.klass()
+
+ def __call__(self, obj=None):
+ if obj is not None:
+ self.obj = obj
+
+ for obj in self:
+ return obj
+
+ def __iter__(self):
+ if self.condition is not None and not self.condition():
+ return
+
+ try:
+ if self.obj is None:
+ self.obj = self.build_object()
+ self.parse(self.el)
+ for attr in self._attrs:
+ self.handle_attr(attr, getattr(self, 'obj_%s' % attr))
+ except SkipItem:
+ return
+
+ if self.validate is not None and not self.validate(self.obj):
+ return
+
+ yield self.obj
+
+ def handle_attr(self, key, func):
+ value = self.use_selector(func)
+ setattr(self.obj, key, value)
+
+
+class ListElement(AbstractElement):
+ item_xpath = None
+ flush_at_end = False
+ ignore_duplicate = False
+
+ def __init__(self, *args, **kwargs):
+ super(ListElement, self).__init__(*args, **kwargs)
+ self.logger = getLogger(self.__class__.__name__.lower())
+ self.objects = {}
+
+ def __call__(self, *args, **kwargs):
+ for key, value in kwargs.iteritems():
+ self.env[key] = value
+
+ return self.__iter__()
+
+ def __iter__(self):
+ self.parse(self.el)
+
+ if self.item_xpath is not None:
+ for el in self.el.xpath(self.item_xpath):
+ for obj in self.handle_element(el):
+ if not self.flush_at_end:
+ yield obj
+ else:
+ for obj in self.handle_element(self.el):
+ if not self.flush_at_end:
+ yield obj
+
+ if self.flush_at_end:
+ for obj in self.objects.itervalues():
+ yield obj
+
+ self.check_next_page()
+
+ def check_next_page(self):
+ if not hasattr(self, 'next_page'):
+ return
+
+ next_page = getattr(self, 'next_page')
+ try:
+ value = self.use_selector(next_page)
+ except (AttributeNotFound, XPathNotFound):
+ return
+
+ if value is None:
+ return
+
+ raise NextPage(value)
+
+
+ def store(self, obj):
+ if obj.id:
+ if obj.id in self.objects:
+ if self.ignore_duplicate:
+ self.logger.warning('There are two objects with the same ID! %s' % obj.id)
+ return
+ else:
+ raise DataError('There are two objects with the same ID! %s' % obj.id)
+ self.objects[obj.id] = obj
+ return obj
+
+ def handle_element(self, el):
+ for attrname in dir(self):
+ attr = getattr(self, attrname)
+ if isinstance(attr, type) and issubclass(attr, AbstractElement) and attr != type(self):
+ for obj in attr(self.page, self, el):
+ obj = self.store(obj)
+ if obj:
+ yield obj
+
+
+class TableElement(ListElement):
+ head_xpath = None
+ cleaner = CleanText
+
+ def __init__(self, *args, **kwargs):
+ super(TableElement, self).__init__(*args, **kwargs)
+
+ self._cols = {}
+
+ columns = {}
+ for attrname in dir(self):
+ m = re.match('col_(.*)', attrname)
+ if m:
+ cols = getattr(self, attrname)
+ if not isinstance(cols, (list,tuple)):
+ cols = [cols]
+ columns[m.group(1)] = [s.lower() for s in cols]
+
+ for colnum, el in enumerate(self.el.xpath(self.head_xpath)):
+ title = self.cleaner.clean(el).lower()
+ for name, titles in columns.iteritems():
+ if title in titles:
+ self._cols[name] = colnum
+
+ def get_colnum(self, name):
+ return self._cols.get(name, None)
diff --git a/weboob/tools/browser2/page.py b/weboob/tools/browser2/page.py
index 703e2069..eba4c2a5 100644
--- a/weboob/tools/browser2/page.py
+++ b/weboob/tools/browser2/page.py
@@ -24,15 +24,11 @@ try:
except ImportError:
from urllib import unquote
import re
-import sys
from copy import deepcopy
from io import BytesIO
import requests
-import lxml.html as html
-import lxml.etree as etree
-from weboob.tools.json import json
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.regex_helper import normalize
from weboob.tools.compat import basestring
@@ -40,7 +36,6 @@ from weboob.tools.compat import basestring
from weboob.tools.log import getLogger
from .browser import DomainBrowser
-from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound
class UrlNotResolvable(Exception):
@@ -49,12 +44,6 @@ class UrlNotResolvable(Exception):
"""
-class DataError(Exception):
- """
- Returned data from pages are incoherent.
- """
-
-
class URL(object):
"""
A description of an URL on the PagesBrowser website.
@@ -538,6 +527,7 @@ class Form(OrderedDict):
class JsonPage(BasePage):
def __init__(self, browser, response, *args, **kwargs):
super(JsonPage, self).__init__(browser, response, *args, **kwargs)
+ from weboob.tools.json import json
self.doc = json.loads(response.text)
@@ -550,6 +540,7 @@ class XMLPage(BasePage):
def __init__(self, browser, response, *args, **kwargs):
super(XMLPage, self).__init__(browser, response, *args, **kwargs)
+ import lxml.etree as etree
parser = etree.XMLParser(encoding=self.ENCODING or response.encoding)
self.doc = etree.parse(BytesIO(response.content), parser)
@@ -575,6 +566,7 @@ class HTMLPage(BasePage):
def __init__(self, browser, response, *args, **kwargs):
super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
self.encoding = self.ENCODING or response.encoding
+ import lxml.html as html
parser = html.HTMLParser(encoding=self.encoding)
self.doc = html.parse(BytesIO(response.content), parser)
@@ -613,228 +605,6 @@ def method(klass):
return inner
-class AbstractElement(object):
- def __init__(self, page, parent=None, el=None):
- self.page = page
- self.parent = parent
- if el is not None:
- self.el = el
- elif parent is not None:
- self.el = parent.el
- else:
- self.el = page.doc
-
- if parent is not None:
- self.env = deepcopy(parent.env)
- else:
- self.env = deepcopy(page.params)
-
- def use_selector(self, func):
- if isinstance(func, _Filter):
- value = func(self)
- elif callable(func):
- value = func()
- else:
- value = deepcopy(func)
-
- return value
-
- def parse(self, obj):
- pass
-
- def cssselect(self, *args, **kwargs):
- return self.el.cssselect(*args, **kwargs)
-
- def xpath(self, *args, **kwargs):
- return self.el.xpath(*args, **kwargs)
-
-
-class ListElement(AbstractElement):
- item_xpath = None
- flush_at_end = False
- ignore_duplicate = False
-
- def __init__(self, *args, **kwargs):
- super(ListElement, self).__init__(*args, **kwargs)
- self.logger = getLogger(self.__class__.__name__.lower())
- self.objects = OrderedDict()
-
- def __call__(self, *args, **kwargs):
- for key, value in kwargs.iteritems():
- self.env[key] = value
-
- return self.__iter__()
-
- def find_elements(self):
- """
- Get the nodes that will have to be processed.
- This method can be overridden if xpath filters are not
- sufficient.
- """
- if self.item_xpath is not None:
- for el in self.el.xpath(self.item_xpath):
- yield el
- else:
- yield self.el
-
- def __iter__(self):
- self.parse(self.el)
-
- for el in self.find_elements():
- for obj in self.handle_element(el):
- if not self.flush_at_end:
- yield obj
-
- if self.flush_at_end:
- for obj in self.flush():
- yield obj
-
- self.check_next_page()
-
- def flush(self):
- for obj in self.objects.itervalues():
- yield obj
-
- def check_next_page(self):
- if not hasattr(self, 'next_page'):
- return
-
- next_page = getattr(self, 'next_page')
- try:
- value = self.use_selector(next_page)
- except (AttributeNotFound, XPathNotFound):
- return
-
- if value is None:
- return
-
- raise NextPage(value)
-
-
- def store(self, obj):
- if obj.id:
- if obj.id in self.objects:
- if self.ignore_duplicate:
- self.logger.warning('There are two objects with the same ID! %s' % obj.id)
- return
- else:
- raise DataError('There are two objects with the same ID! %s' % obj.id)
- self.objects[obj.id] = obj
- return obj
-
- def handle_element(self, el):
- for attrname in dir(self):
- attr = getattr(self, attrname)
- if isinstance(attr, type) and issubclass(attr, AbstractElement) and attr != type(self):
- for obj in attr(self.page, self, el):
- obj = self.store(obj)
- if obj:
- yield obj
-
-
-class SkipItem(Exception):
- """
- Raise this exception in an :class:`ItemElement` subclass to skip an item.
- """
-
-
-class _ItemElementMeta(type):
- """
- Private meta-class used to keep order of obj_* attributes in :class:`ItemElement`.
- """
- def __new__(mcs, name, bases, attrs):
- _attrs = []
- for base in bases:
- if hasattr(base, '_attrs'):
- _attrs += base._attrs
-
- filters = [(re.sub('^obj_', '', attr_name), attrs[attr_name]) for attr_name, obj in attrs.items() if attr_name.startswith('obj_')]
- # constants first, then filters, then methods
- filters.sort(key=lambda x: x[1]._creation_counter if hasattr(x[1], '_creation_counter') else (sys.maxint if callable(x[1]) else 0))
-
- new_class = super(_ItemElementMeta, mcs).__new__(mcs, name, bases, attrs)
- new_class._attrs = _attrs + [f[0] for f in filters]
- return new_class
-
-
-class ItemElement(AbstractElement):
- __metaclass__ = _ItemElementMeta
-
- _attrs = None
- klass = None
- condition = None
- validate = None
-
- class Index(object):
- pass
-
- def __init__(self, *args, **kwargs):
- super(ItemElement, self).__init__(*args, **kwargs)
- self.obj = None
-
- def build_object(self):
- if self.klass is None:
- return
- return self.klass()
-
- def __call__(self, obj=None):
- if obj is not None:
- self.obj = obj
-
- for obj in self:
- return obj
-
- def __iter__(self):
- if self.condition is not None and not self.condition():
- return
-
- try:
- if self.obj is None:
- self.obj = self.build_object()
- self.parse(self.el)
- for attr in self._attrs:
- self.handle_attr(attr, getattr(self, 'obj_%s' % attr))
- except SkipItem:
- return
-
- if self.validate is not None and not self.validate(self.obj):
- return
-
- yield self.obj
-
- def handle_attr(self, key, func):
- value = self.use_selector(func)
- setattr(self.obj, key, value)
-
-
-class TableElement(ListElement):
- head_xpath = None
- cleaner = CleanText
-
- def __init__(self, *args, **kwargs):
- super(TableElement, self).__init__(*args, **kwargs)
-
- self._cols = {}
-
- columns = {}
- for attrname in dir(self):
- m = re.match('col_(.*)', attrname)
- if m:
- cols = getattr(self, attrname)
- if not isinstance(cols, (list,tuple)):
- cols = [cols]
- columns[m.group(1)] = [s.lower() for s in cols]
-
- for colnum, el in enumerate(self.el.xpath(self.head_xpath)):
- title = self.cleaner.clean(el).lower()
- for name, titles in columns.iteritems():
- if title in titles:
- self._cols[name] = colnum
-
- def get_colnum(self, name):
- return self._cols.get(name, None)
-
-
class LoggedPage(object):
"""
A page that only logged users can reach. If we did not get a redirection
diff --git a/weboob/tools/capabilities/bank/transactions.py b/weboob/tools/capabilities/bank/transactions.py
index a7cf9b48..6ed55a34 100644
--- a/weboob/tools/capabilities/bank/transactions.py
+++ b/weboob/tools/capabilities/bank/transactions.py
@@ -28,7 +28,7 @@ from weboob.tools.misc import to_unicode
from weboob.tools.log import getLogger
from weboob.tools.exceptions import ParseError
-from weboob.tools.browser2.page import TableElement, ItemElement
+from weboob.tools.browser2.elements import TableElement, ItemElement
from weboob.tools.browser2.filters import Filter, CleanText, CleanDecimal, TableCell