From 76cb004eb4fd071f822a7ae0ca82c6c943742363 Mon Sep 17 00:00:00 2001 From: Florent Date: Tue, 8 Jul 2014 19:59:37 +0200 Subject: [PATCH] Move ItemListTable-Element outside of page.py One of the goal is to not import all modules needed by filters by loading the page file. In the same goal, move the import of parsers in the class definition. --- modules/alloresto/pages.py | 3 +- modules/banqueaccord/pages.py | 3 +- modules/biplan/pages.py | 3 +- modules/carrefourbanque/pages.py | 3 +- modules/cci/pages.py | 3 +- modules/creditmutuel/pages.py | 3 +- modules/dresdenwetter/pages.py | 3 +- modules/feedly/pages.py | 3 +- modules/francetelevisions/pages.py | 3 +- modules/freemobile/pages/history.py | 3 +- modules/freemobile/pages/homepage.py | 3 +- modules/groupamaes/pages.py | 3 +- modules/hsbc/pages.py | 3 +- modules/hybride/pages.py | 3 +- modules/indeed/pages.py | 3 +- modules/ing/pages/accounts_list.py | 3 +- modules/ing/pages/bills.py | 3 +- modules/ing/pages/titre.py | 3 +- modules/ing/pages/transfer.py | 3 +- modules/oney/pages.py | 3 +- modules/pastealacon/browser.py | 3 +- modules/pastebin/browser.py | 3 +- modules/poivy/pages.py | 3 +- modules/regionsjob/pages.py | 3 +- modules/sachsen/pages.py | 3 +- modules/senscritique/pages.py | 3 +- modules/twitter/pages.py | 3 +- modules/vlille/pages.py | 3 +- modules/youjizz/pages/index.py | 3 +- modules/youjizz/pages/video.py | 4 +- weboob/tools/browser2/elements.py | 241 ++++++++++++++++++ weboob/tools/browser2/page.py | 236 +---------------- .../tools/capabilities/bank/transactions.py | 2 +- 33 files changed, 305 insertions(+), 265 deletions(-) create mode 100644 weboob/tools/browser2/elements.py diff --git a/modules/alloresto/pages.py b/modules/alloresto/pages.py index 3000a156..48367911 100644 --- a/modules/alloresto/pages.py +++ b/modules/alloresto/pages.py @@ -21,7 +21,8 @@ import datetime from decimal import Decimal -from weboob.tools.browser2.page import HTMLPage, LoggedPage, method, ItemElement +from weboob.tools.browser2.page import HTMLPage, LoggedPage, method +from weboob.tools.browser2.elements import ItemElement from weboob.tools.browser2.filters import CleanDecimal, CleanText, Filter, TableCell from weboob.capabilities.bank import Account from weboob.tools.capabilities.bank.transactions import FrenchTransaction as Transaction diff --git a/modules/banqueaccord/pages.py b/modules/banqueaccord/pages.py index f0cdc54b..41f52ad6 100644 --- a/modules/banqueaccord/pages.py +++ b/modules/banqueaccord/pages.py @@ -24,7 +24,8 @@ import re from cStringIO import StringIO from weboob.capabilities.bank import Account -from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, LoggedPage +from weboob.tools.browser2.page import HTMLPage, method, LoggedPage +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import ParseError, CleanText, Regexp, Attr, CleanDecimal, Env from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError from weboob.tools.capabilities.bank.transactions import FrenchTransaction diff --git a/modules/biplan/pages.py b/modules/biplan/pages.py index 90f92050..a9f5e508 100644 --- a/modules/biplan/pages.py +++ b/modules/biplan/pages.py @@ -23,7 +23,8 @@ from datetime import datetime, time import weboob.tools.date as date_util from .calendar import BiplanCalendarEventConcert, BiplanCalendarEventTheatre -from weboob.tools.browser2.page import HTMLPage, method, ItemElement, SkipItem, ListElement +from weboob.tools.browser2.elements import ItemElement, SkipItem, ListElement +from weboob.tools.browser2.page import HTMLPage, method from weboob.tools.browser2.filters import Filter, Link, CleanText, Env, Regexp, CombineDate, CleanHTML diff --git a/modules/carrefourbanque/pages.py b/modules/carrefourbanque/pages.py index dc89c1c3..6301423f 100644 --- a/modules/carrefourbanque/pages.py +++ b/modules/carrefourbanque/pages.py @@ -20,7 +20,8 @@ import re -from weboob.tools.browser2.page import HTMLPage, ListElement, ItemElement, method, LoggedPage +from weboob.tools.browser2.page import HTMLPage, method, LoggedPage +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import Regexp, CleanText, CleanDecimal, Format, Link from weboob.capabilities.bank import Account diff --git a/modules/cci/pages.py b/modules/cci/pages.py index 88b2cbbd..5173d3c8 100644 --- a/modules/cci/pages.py +++ b/modules/cci/pages.py @@ -17,7 +17,8 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.browser2.page import HTMLPage, method, ItemElement, TableElement +from weboob.tools.browser2.page import HTMLPage, method +from weboob.tools.browser2.elements import ItemElement, TableElement from weboob.tools.browser2.filters import Filter, Link, CleanText, Format, Env, DateTime, CleanHTML, TableCell, Join from weboob.capabilities.job import BaseJobAdvert diff --git a/modules/creditmutuel/pages.py b/modules/creditmutuel/pages.py index 22cfda26..d63666f2 100644 --- a/modules/creditmutuel/pages.py +++ b/modules/creditmutuel/pages.py @@ -27,7 +27,8 @@ from decimal import Decimal import re from dateutil.relativedelta import relativedelta -from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, SkipItem, FormNotFound, LoggedPage +from weboob.tools.browser2.page import HTMLPage, method, FormNotFound, LoggedPage +from weboob.tools.browser2.elements import ListElement, ItemElement, SkipItem from weboob.tools.browser2.filters import Filter, Env, CleanText, CleanDecimal, Link, Field, TableCell from weboob.tools.exceptions import BrowserIncorrectPassword from weboob.capabilities import NotAvailable diff --git a/modules/dresdenwetter/pages.py b/modules/dresdenwetter/pages.py index 32010409..0d211fcd 100644 --- a/modules/dresdenwetter/pages.py +++ b/modules/dresdenwetter/pages.py @@ -17,7 +17,8 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement +from weboob.tools.browser2.page import HTMLPage, method +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import CleanText, Regexp, Field, Filter from weboob.capabilities.gauge import GaugeMeasure, GaugeSensor from weboob.capabilities.base import NotAvailable diff --git a/modules/feedly/pages.py b/modules/feedly/pages.py index 803184d0..8de43bab 100644 --- a/modules/feedly/pages.py +++ b/modules/feedly/pages.py @@ -21,7 +21,8 @@ from datetime import datetime from weboob.capabilities.messages import Message from weboob.capabilities.collection import Collection -from weboob.tools.browser2.page import JsonPage, ListElement, method, ItemElement +from weboob.tools.browser2.page import JsonPage, method +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import CleanText, Dict, Format, CleanHTML __all__ = ['TokenPage', 'ContentsPage', 'PreferencesPage'] diff --git a/modules/francetelevisions/pages.py b/modules/francetelevisions/pages.py index 1c81725b..c01d8ccd 100644 --- a/modules/francetelevisions/pages.py +++ b/modules/francetelevisions/pages.py @@ -22,7 +22,8 @@ from weboob.capabilities.video import BaseVideo from datetime import timedelta -from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, JsonPage +from weboob.tools.browser2.page import HTMLPage, method, JsonPage +from weboob.tools.browser2.elements import ItemElement, ListElement from weboob.tools.browser2.filters import Filter, Link, CleanText, Regexp, Attr, Format, DateTime, Env, Dict, Duration, XPath diff --git a/modules/freemobile/pages/history.py b/modules/freemobile/pages/history.py index 19910e0e..1eab151e 100644 --- a/modules/freemobile/pages/history.py +++ b/modules/freemobile/pages/history.py @@ -24,7 +24,8 @@ import lxml.html as html from datetime import datetime from decimal import Decimal -from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, LoggedPage +from weboob.tools.browser2.page import HTMLPage, method, LoggedPage +from weboob.tools.browser2.elements import ItemElement, ListElement from weboob.tools.browser2.filters import Date, CleanText, Attr, Filter,\ CleanDecimal, Regexp, Field, DateTime, Format, Env from weboob.capabilities.bill import Detail, Bill diff --git a/modules/freemobile/pages/homepage.py b/modules/freemobile/pages/homepage.py index fec6d14a..72b34f12 100644 --- a/modules/freemobile/pages/homepage.py +++ b/modules/freemobile/pages/homepage.py @@ -19,7 +19,8 @@ from .history import BadUTF8Page from weboob.capabilities.bill import Subscription -from weboob.tools.browser2.page import method, ListElement, ItemElement +from weboob.tools.browser2.page import method +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import CleanText, Attr, Field, Format, Filter __all__ = ['HomePage'] diff --git a/modules/groupamaes/pages.py b/modules/groupamaes/pages.py index 55b2eca3..f7e3c4fe 100644 --- a/modules/groupamaes/pages.py +++ b/modules/groupamaes/pages.py @@ -18,7 +18,8 @@ # along with weboob. If not, see . -from weboob.tools.browser2.page import HTMLPage, method, TableElement, ItemElement, LoggedPage +from weboob.tools.browser2.page import HTMLPage, method, LoggedPage +from weboob.tools.browser2.elements import TableElement, ItemElement from weboob.tools.browser2.filters import CleanText, CleanDecimal, TableCell, Date from weboob.capabilities.bank import Account, Transaction from weboob.tools.date import LinearDateGuesser diff --git a/modules/hsbc/pages.py b/modules/hsbc/pages.py index 09e437b9..42309e3e 100644 --- a/modules/hsbc/pages.py +++ b/modules/hsbc/pages.py @@ -25,7 +25,8 @@ from weboob.capabilities.bank import Account from weboob.tools.capabilities.bank.transactions import FrenchTransaction from weboob.tools.exceptions import BrowserIncorrectPassword -from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, SkipItem, LoggedPage, pagination +from weboob.tools.browser2.elements import ListElement, ItemElement, SkipItem +from weboob.tools.browser2.page import HTMLPage, method, LoggedPage, pagination from weboob.tools.browser2.filters import Filter, Env, CleanText, CleanDecimal, Link, Field, DateGuesser, TableCell diff --git a/modules/hybride/pages.py b/modules/hybride/pages.py index 1a71f54e..b6ea3f8a 100644 --- a/modules/hybride/pages.py +++ b/modules/hybride/pages.py @@ -23,7 +23,8 @@ from .calendar import HybrideCalendarEvent import weboob.tools.date as date_util import re -from weboob.tools.browser2.page import HTMLPage, method, ItemElement, SkipItem, ListElement +from weboob.tools.browser2.page import HTMLPage, method +from weboob.tools.browser2.elements import ItemElement, SkipItem, ListElement from weboob.tools.browser2.filters import Filter, Link, CleanText, Env diff --git a/modules/indeed/pages.py b/modules/indeed/pages.py index 88d13720..a73ddd5d 100644 --- a/modules/indeed/pages.py +++ b/modules/indeed/pages.py @@ -19,7 +19,8 @@ from datetime import timedelta, datetime import re -from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, pagination +from weboob.tools.browser2.page import HTMLPage, method, pagination +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import Filter, CleanText, Regexp, Format, Env, CleanHTML, Attr from weboob.capabilities.job import BaseJobAdvert diff --git a/modules/ing/pages/accounts_list.py b/modules/ing/pages/accounts_list.py index 4daf512c..58dcb483 100644 --- a/modules/ing/pages/accounts_list.py +++ b/modules/ing/pages/accounts_list.py @@ -24,7 +24,8 @@ import re from weboob.capabilities.bank import Account from weboob.capabilities.base import NotAvailable -from weboob.tools.browser2.page import HTMLPage, LoggedPage, method, ListElement, ItemElement +from weboob.tools.browser2.page import HTMLPage, LoggedPage, method +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import Attr, CleanText, CleanDecimal, Filter, Field, MultiFilter, Date, Lower from weboob.tools.capabilities.bank.transactions import FrenchTransaction diff --git a/modules/ing/pages/bills.py b/modules/ing/pages/bills.py index 0491ee69..bafecaeb 100644 --- a/modules/ing/pages/bills.py +++ b/modules/ing/pages/bills.py @@ -20,7 +20,8 @@ from weboob.capabilities.bill import Bill, Subscription from weboob.tools.browser2 import HTMLPage, LoggedPage from weboob.tools.browser2.filters import Filter, Attr, CleanText, Format, Field, Env -from weboob.tools.browser2.page import ListElement, ItemElement, method, pagination +from weboob.tools.browser2.page import method, pagination +from weboob.tools.browser2.elements import ListElement, ItemElement __all__ = ['BillsPage'] diff --git a/modules/ing/pages/titre.py b/modules/ing/pages/titre.py index a1463bec..aa5913b8 100644 --- a/modules/ing/pages/titre.py +++ b/modules/ing/pages/titre.py @@ -21,7 +21,8 @@ from decimal import Decimal from weboob.capabilities.bank import Investment -from weboob.tools.browser2.page import RawPage, HTMLPage, method, ListElement, ItemElement +from weboob.tools.browser2.page import RawPage, HTMLPage, method +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import CleanDecimal, CleanText, Date from weboob.tools.capabilities.bank.transactions import FrenchTransaction diff --git a/modules/ing/pages/transfer.py b/modules/ing/pages/transfer.py index c09821dd..0ca8da6c 100644 --- a/modules/ing/pages/transfer.py +++ b/modules/ing/pages/transfer.py @@ -18,7 +18,8 @@ # along with weboob. If not, see . from weboob.capabilities.bank import Recipient, AccountNotFound, Transfer -from weboob.tools.browser2.page import HTMLPage, LoggedPage, ListElement, ItemElement, method +from weboob.tools.browser2.page import HTMLPage, LoggedPage, method +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import CleanText, CleanDecimal, Attr, Format from .login import INGVirtKeyboard diff --git a/modules/oney/pages.py b/modules/oney/pages.py index 0d557f23..5e4d6577 100644 --- a/modules/oney/pages.py +++ b/modules/oney/pages.py @@ -25,7 +25,8 @@ import requests from weboob.tools.capabilities.bank.transactions import FrenchTransaction from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError -from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, LoggedPage, pagination +from weboob.tools.browser2.page import HTMLPage, method, LoggedPage, pagination +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import Env, CleanDecimal, ParseError __all__ = ['LoginPage', 'IndexPage', 'OperationsPage'] diff --git a/modules/pastealacon/browser.py b/modules/pastealacon/browser.py index db0a9741..b672052b 100644 --- a/modules/pastealacon/browser.py +++ b/modules/pastealacon/browser.py @@ -21,7 +21,8 @@ import re from weboob.capabilities.paste import BasePaste, PasteNotFound from weboob.tools.browser2.filters import CleanText, DateTime, Env, RawText, Regexp -from weboob.tools.browser2.page import HTMLPage, ItemElement, method, PagesBrowser, URL +from weboob.tools.browser2.page import HTMLPage, method, PagesBrowser, URL +from weboob.tools.browser2.elements import ItemElement from weboob.tools.exceptions import BrowserHTTPNotFound diff --git a/modules/pastebin/browser.py b/modules/pastebin/browser.py index 835c4747..da97b8f4 100644 --- a/modules/pastebin/browser.py +++ b/modules/pastebin/browser.py @@ -23,7 +23,8 @@ import re from weboob.capabilities.paste import BasePaste, PasteNotFound from weboob.tools.browser2 import HTMLPage, LoginBrowser, need_login, URL from weboob.tools.browser2.filters import Attr, Base, CleanText, DateTime, Env, Filter, FilterError, RawText -from weboob.tools.browser2.page import ItemElement, method, RawPage +from weboob.tools.browser2.page import method, RawPage +from weboob.tools.browser2.elements import ItemElement from weboob.tools.exceptions import BrowserHTTPNotFound, BrowserIncorrectPassword, BrowserUnavailable diff --git a/modules/poivy/pages.py b/modules/poivy/pages.py index 7f29d0bf..ab85e294 100644 --- a/modules/poivy/pages.py +++ b/modules/poivy/pages.py @@ -18,7 +18,8 @@ # along with weboob. If not, see . from weboob.tools.exceptions import BrowserBanned -from weboob.tools.browser2.page import HTMLPage, LoggedPage, method, ListElement, ItemElement, pagination +from weboob.tools.browser2.page import HTMLPage, LoggedPage, method, pagination +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import CleanText, CleanDecimal, Field, Attr, DateTime, Link, Format from weboob.capabilities.bill import Subscription, Detail diff --git a/modules/regionsjob/pages.py b/modules/regionsjob/pages.py index b0ee3199..78519d35 100644 --- a/modules/regionsjob/pages.py +++ b/modules/regionsjob/pages.py @@ -17,7 +17,8 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.browser2.page import HTMLPage, method, ItemElement, SkipItem, ListElement +from weboob.tools.browser2.page import HTMLPage, method +from weboob.tools.browser2.elements import ItemElement, SkipItem, ListElement from weboob.tools.browser2.filters import Link, CleanText, Regexp, Format, Env, DateGuesser, CleanHTML, DateTime from weboob.tools.date import LinearDateGuesser from weboob.capabilities.job import BaseJobAdvert diff --git a/modules/sachsen/pages.py b/modules/sachsen/pages.py index 2767a19b..826a387f 100644 --- a/modules/sachsen/pages.py +++ b/modules/sachsen/pages.py @@ -17,7 +17,8 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement +from weboob.tools.browser2.page import HTMLPage, method +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import Env, CleanText, Regexp, Field, DateTime, Map, Attr from weboob.capabilities.gauge import Gauge, GaugeMeasure, GaugeSensor from weboob.capabilities.base import NotAvailable, NotLoaded diff --git a/modules/senscritique/pages.py b/modules/senscritique/pages.py index 5f66d41a..a165e1c1 100644 --- a/modules/senscritique/pages.py +++ b/modules/senscritique/pages.py @@ -22,7 +22,8 @@ from .calendar import SensCritiquenCalendarEvent from datetime import date, datetime, time, timedelta -from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, JsonPage +from weboob.tools.browser2.page import HTMLPage, method, JsonPage +from weboob.tools.browser2.elements import ItemElement, ListElement from weboob.tools.browser2.filters import Filter, Link, CleanText, Regexp, Attr, Join, Format diff --git a/modules/twitter/pages.py b/modules/twitter/pages.py index 848bdd77..2164353b 100644 --- a/modules/twitter/pages.py +++ b/modules/twitter/pages.py @@ -23,7 +23,8 @@ from io import StringIO import lxml.html as html import urllib -from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination +from weboob.tools.browser2.page import HTMLPage, JsonPage, method, FormNotFound, pagination +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter from weboob.capabilities.messages import Thread, Message from weboob.capabilities.base import BaseObject diff --git a/modules/vlille/pages.py b/modules/vlille/pages.py index afab9dc0..2cd3c86f 100644 --- a/modules/vlille/pages.py +++ b/modules/vlille/pages.py @@ -18,7 +18,8 @@ # along with weboob. If not, see . -from weboob.tools.browser2.page import HTMLPage, XMLPage, method, ListElement, ItemElement, TableElement +from weboob.tools.browser2.page import HTMLPage, XMLPage, method +from weboob.tools.browser2.elements import ListElement, ItemElement, TableElement from weboob.tools.browser2.filters import CleanText, TableCell, Filter from weboob.capabilities.gauge import Gauge, GaugeMeasure, GaugeSensor diff --git a/modules/youjizz/pages/index.py b/modules/youjizz/pages/index.py index 280761b7..6f401ec6 100644 --- a/modules/youjizz/pages/index.py +++ b/modules/youjizz/pages/index.py @@ -19,7 +19,8 @@ from weboob.tools.browser2 import HTMLPage -from weboob.tools.browser2.page import ListElement, method, ItemElement, pagination +from weboob.tools.browser2.page import method, pagination +from weboob.tools.browser2.elements import ListElement, ItemElement from weboob.tools.browser2.filters import Link, CleanText, Duration, Regexp, CSS from weboob.capabilities.base import NotAvailable from weboob.capabilities.image import BaseImage diff --git a/modules/youjizz/pages/video.py b/modules/youjizz/pages/video.py index b64f0265..1b59b5ca 100644 --- a/modules/youjizz/pages/video.py +++ b/modules/youjizz/pages/video.py @@ -20,8 +20,8 @@ import re -from weboob.tools.browser2 import HTMLPage -from weboob.tools.browser2.page import method, ItemElement +from weboob.tools.browser2.page import method, HTMLPage +from weboob.tools.browser2.elements import ItemElement from weboob.tools.browser2.filters import CleanText, Env, Duration from weboob.capabilities.video import BaseVideo from weboob.tools.misc import to_unicode diff --git a/weboob/tools/browser2/elements.py b/weboob/tools/browser2/elements.py new file mode 100644 index 00000000..97d545c3 --- /dev/null +++ b/weboob/tools/browser2/elements.py @@ -0,0 +1,241 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import re +import sys +from copy import deepcopy +from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound +from weboob.tools.log import getLogger +from weboob.tools.browser2.page import NextPage + +class DataError(Exception): + """ + Returned data from pages are incoherent. + """ + + +class AbstractElement(object): + def __init__(self, page, parent=None, el=None): + self.page = page + self.parent = parent + if el is not None: + self.el = el + elif parent is not None: + self.el = parent.el + else: + self.el = page.doc + + if parent is not None: + self.env = deepcopy(parent.env) + else: + self.env = deepcopy(page.params) + + def use_selector(self, func): + if isinstance(func, _Filter): + value = func(self) + elif callable(func): + value = func() + else: + value = deepcopy(func) + + return value + + def parse(self, obj): + pass + + def cssselect(self, *args, **kwargs): + return self.el.cssselect(*args, **kwargs) + + def xpath(self, *args, **kwargs): + return self.el.xpath(*args, **kwargs) + + +class SkipItem(Exception): + """ + Raise this exception in an :class:`ItemElement` subclass to skip an item. + """ + + +class _ItemElementMeta(type): + """ + Private meta-class used to keep order of obj_* attributes in :class:`ItemElement`. + """ + def __new__(mcs, name, bases, attrs): + _attrs = [] + for base in bases: + if hasattr(base, '_attrs'): + _attrs += base._attrs + + filters = [(re.sub('^obj_', '', attr_name), attrs[attr_name]) for attr_name, obj in attrs.items() if attr_name.startswith('obj_')] + # constants first, then filters, then methods + filters.sort(key=lambda x: x[1]._creation_counter if hasattr(x[1], '_creation_counter') else (sys.maxint if callable(x[1]) else 0)) + + new_class = super(_ItemElementMeta, mcs).__new__(mcs, name, bases, attrs) + new_class._attrs = _attrs + [f[0] for f in filters] + return new_class + + +class ItemElement(AbstractElement): + __metaclass__ = _ItemElementMeta + + _attrs = None + klass = None + condition = None + validate = None + + class Index(object): + pass + + def __init__(self, *args, **kwargs): + super(ItemElement, self).__init__(*args, **kwargs) + self.obj = None + + def build_object(self): + if self.klass is None: + return + return self.klass() + + def __call__(self, obj=None): + if obj is not None: + self.obj = obj + + for obj in self: + return obj + + def __iter__(self): + if self.condition is not None and not self.condition(): + return + + try: + if self.obj is None: + self.obj = self.build_object() + self.parse(self.el) + for attr in self._attrs: + self.handle_attr(attr, getattr(self, 'obj_%s' % attr)) + except SkipItem: + return + + if self.validate is not None and not self.validate(self.obj): + return + + yield self.obj + + def handle_attr(self, key, func): + value = self.use_selector(func) + setattr(self.obj, key, value) + + +class ListElement(AbstractElement): + item_xpath = None + flush_at_end = False + ignore_duplicate = False + + def __init__(self, *args, **kwargs): + super(ListElement, self).__init__(*args, **kwargs) + self.logger = getLogger(self.__class__.__name__.lower()) + self.objects = {} + + def __call__(self, *args, **kwargs): + for key, value in kwargs.iteritems(): + self.env[key] = value + + return self.__iter__() + + def __iter__(self): + self.parse(self.el) + + if self.item_xpath is not None: + for el in self.el.xpath(self.item_xpath): + for obj in self.handle_element(el): + if not self.flush_at_end: + yield obj + else: + for obj in self.handle_element(self.el): + if not self.flush_at_end: + yield obj + + if self.flush_at_end: + for obj in self.objects.itervalues(): + yield obj + + self.check_next_page() + + def check_next_page(self): + if not hasattr(self, 'next_page'): + return + + next_page = getattr(self, 'next_page') + try: + value = self.use_selector(next_page) + except (AttributeNotFound, XPathNotFound): + return + + if value is None: + return + + raise NextPage(value) + + + def store(self, obj): + if obj.id: + if obj.id in self.objects: + if self.ignore_duplicate: + self.logger.warning('There are two objects with the same ID! %s' % obj.id) + return + else: + raise DataError('There are two objects with the same ID! %s' % obj.id) + self.objects[obj.id] = obj + return obj + + def handle_element(self, el): + for attrname in dir(self): + attr = getattr(self, attrname) + if isinstance(attr, type) and issubclass(attr, AbstractElement) and attr != type(self): + for obj in attr(self.page, self, el): + obj = self.store(obj) + if obj: + yield obj + + +class TableElement(ListElement): + head_xpath = None + cleaner = CleanText + + def __init__(self, *args, **kwargs): + super(TableElement, self).__init__(*args, **kwargs) + + self._cols = {} + + columns = {} + for attrname in dir(self): + m = re.match('col_(.*)', attrname) + if m: + cols = getattr(self, attrname) + if not isinstance(cols, (list,tuple)): + cols = [cols] + columns[m.group(1)] = [s.lower() for s in cols] + + for colnum, el in enumerate(self.el.xpath(self.head_xpath)): + title = self.cleaner.clean(el).lower() + for name, titles in columns.iteritems(): + if title in titles: + self._cols[name] = colnum + + def get_colnum(self, name): + return self._cols.get(name, None) diff --git a/weboob/tools/browser2/page.py b/weboob/tools/browser2/page.py index 703e2069..eba4c2a5 100644 --- a/weboob/tools/browser2/page.py +++ b/weboob/tools/browser2/page.py @@ -24,15 +24,11 @@ try: except ImportError: from urllib import unquote import re -import sys from copy import deepcopy from io import BytesIO import requests -import lxml.html as html -import lxml.etree as etree -from weboob.tools.json import json from weboob.tools.ordereddict import OrderedDict from weboob.tools.regex_helper import normalize from weboob.tools.compat import basestring @@ -40,7 +36,6 @@ from weboob.tools.compat import basestring from weboob.tools.log import getLogger from .browser import DomainBrowser -from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound class UrlNotResolvable(Exception): @@ -49,12 +44,6 @@ class UrlNotResolvable(Exception): """ -class DataError(Exception): - """ - Returned data from pages are incoherent. - """ - - class URL(object): """ A description of an URL on the PagesBrowser website. @@ -538,6 +527,7 @@ class Form(OrderedDict): class JsonPage(BasePage): def __init__(self, browser, response, *args, **kwargs): super(JsonPage, self).__init__(browser, response, *args, **kwargs) + from weboob.tools.json import json self.doc = json.loads(response.text) @@ -550,6 +540,7 @@ class XMLPage(BasePage): def __init__(self, browser, response, *args, **kwargs): super(XMLPage, self).__init__(browser, response, *args, **kwargs) + import lxml.etree as etree parser = etree.XMLParser(encoding=self.ENCODING or response.encoding) self.doc = etree.parse(BytesIO(response.content), parser) @@ -575,6 +566,7 @@ class HTMLPage(BasePage): def __init__(self, browser, response, *args, **kwargs): super(HTMLPage, self).__init__(browser, response, *args, **kwargs) self.encoding = self.ENCODING or response.encoding + import lxml.html as html parser = html.HTMLParser(encoding=self.encoding) self.doc = html.parse(BytesIO(response.content), parser) @@ -613,228 +605,6 @@ def method(klass): return inner -class AbstractElement(object): - def __init__(self, page, parent=None, el=None): - self.page = page - self.parent = parent - if el is not None: - self.el = el - elif parent is not None: - self.el = parent.el - else: - self.el = page.doc - - if parent is not None: - self.env = deepcopy(parent.env) - else: - self.env = deepcopy(page.params) - - def use_selector(self, func): - if isinstance(func, _Filter): - value = func(self) - elif callable(func): - value = func() - else: - value = deepcopy(func) - - return value - - def parse(self, obj): - pass - - def cssselect(self, *args, **kwargs): - return self.el.cssselect(*args, **kwargs) - - def xpath(self, *args, **kwargs): - return self.el.xpath(*args, **kwargs) - - -class ListElement(AbstractElement): - item_xpath = None - flush_at_end = False - ignore_duplicate = False - - def __init__(self, *args, **kwargs): - super(ListElement, self).__init__(*args, **kwargs) - self.logger = getLogger(self.__class__.__name__.lower()) - self.objects = OrderedDict() - - def __call__(self, *args, **kwargs): - for key, value in kwargs.iteritems(): - self.env[key] = value - - return self.__iter__() - - def find_elements(self): - """ - Get the nodes that will have to be processed. - This method can be overridden if xpath filters are not - sufficient. - """ - if self.item_xpath is not None: - for el in self.el.xpath(self.item_xpath): - yield el - else: - yield self.el - - def __iter__(self): - self.parse(self.el) - - for el in self.find_elements(): - for obj in self.handle_element(el): - if not self.flush_at_end: - yield obj - - if self.flush_at_end: - for obj in self.flush(): - yield obj - - self.check_next_page() - - def flush(self): - for obj in self.objects.itervalues(): - yield obj - - def check_next_page(self): - if not hasattr(self, 'next_page'): - return - - next_page = getattr(self, 'next_page') - try: - value = self.use_selector(next_page) - except (AttributeNotFound, XPathNotFound): - return - - if value is None: - return - - raise NextPage(value) - - - def store(self, obj): - if obj.id: - if obj.id in self.objects: - if self.ignore_duplicate: - self.logger.warning('There are two objects with the same ID! %s' % obj.id) - return - else: - raise DataError('There are two objects with the same ID! %s' % obj.id) - self.objects[obj.id] = obj - return obj - - def handle_element(self, el): - for attrname in dir(self): - attr = getattr(self, attrname) - if isinstance(attr, type) and issubclass(attr, AbstractElement) and attr != type(self): - for obj in attr(self.page, self, el): - obj = self.store(obj) - if obj: - yield obj - - -class SkipItem(Exception): - """ - Raise this exception in an :class:`ItemElement` subclass to skip an item. - """ - - -class _ItemElementMeta(type): - """ - Private meta-class used to keep order of obj_* attributes in :class:`ItemElement`. - """ - def __new__(mcs, name, bases, attrs): - _attrs = [] - for base in bases: - if hasattr(base, '_attrs'): - _attrs += base._attrs - - filters = [(re.sub('^obj_', '', attr_name), attrs[attr_name]) for attr_name, obj in attrs.items() if attr_name.startswith('obj_')] - # constants first, then filters, then methods - filters.sort(key=lambda x: x[1]._creation_counter if hasattr(x[1], '_creation_counter') else (sys.maxint if callable(x[1]) else 0)) - - new_class = super(_ItemElementMeta, mcs).__new__(mcs, name, bases, attrs) - new_class._attrs = _attrs + [f[0] for f in filters] - return new_class - - -class ItemElement(AbstractElement): - __metaclass__ = _ItemElementMeta - - _attrs = None - klass = None - condition = None - validate = None - - class Index(object): - pass - - def __init__(self, *args, **kwargs): - super(ItemElement, self).__init__(*args, **kwargs) - self.obj = None - - def build_object(self): - if self.klass is None: - return - return self.klass() - - def __call__(self, obj=None): - if obj is not None: - self.obj = obj - - for obj in self: - return obj - - def __iter__(self): - if self.condition is not None and not self.condition(): - return - - try: - if self.obj is None: - self.obj = self.build_object() - self.parse(self.el) - for attr in self._attrs: - self.handle_attr(attr, getattr(self, 'obj_%s' % attr)) - except SkipItem: - return - - if self.validate is not None and not self.validate(self.obj): - return - - yield self.obj - - def handle_attr(self, key, func): - value = self.use_selector(func) - setattr(self.obj, key, value) - - -class TableElement(ListElement): - head_xpath = None - cleaner = CleanText - - def __init__(self, *args, **kwargs): - super(TableElement, self).__init__(*args, **kwargs) - - self._cols = {} - - columns = {} - for attrname in dir(self): - m = re.match('col_(.*)', attrname) - if m: - cols = getattr(self, attrname) - if not isinstance(cols, (list,tuple)): - cols = [cols] - columns[m.group(1)] = [s.lower() for s in cols] - - for colnum, el in enumerate(self.el.xpath(self.head_xpath)): - title = self.cleaner.clean(el).lower() - for name, titles in columns.iteritems(): - if title in titles: - self._cols[name] = colnum - - def get_colnum(self, name): - return self._cols.get(name, None) - - class LoggedPage(object): """ A page that only logged users can reach. If we did not get a redirection diff --git a/weboob/tools/capabilities/bank/transactions.py b/weboob/tools/capabilities/bank/transactions.py index a7cf9b48..6ed55a34 100644 --- a/weboob/tools/capabilities/bank/transactions.py +++ b/weboob/tools/capabilities/bank/transactions.py @@ -28,7 +28,7 @@ from weboob.tools.misc import to_unicode from weboob.tools.log import getLogger from weboob.tools.exceptions import ParseError -from weboob.tools.browser2.page import TableElement, ItemElement +from weboob.tools.browser2.elements import TableElement, ItemElement from weboob.tools.browser2.filters import Filter, CleanText, CleanDecimal, TableCell