keep order in obj_* attributes, and changes in API

This commit is contained in:
Romain Bignon 2014-03-10 22:51:49 +01:00
commit fd8136ebfe
4 changed files with 88 additions and 42 deletions

View file

@ -24,7 +24,7 @@ import re
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, SkipItem, FormNotFound, TableElement from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, SkipItem, FormNotFound, TableElement
from weboob.tools.browser2.filters import Filter, Env, CleanText, CleanDecimal, Link, TableCell from weboob.tools.browser2.filters import Filter, Env, CleanText, CleanDecimal, Link, TableCell, Attr
from weboob.tools.browser import BrowserIncorrectPassword from weboob.tools.browser import BrowserIncorrectPassword
from weboob.capabilities import NotAvailable from weboob.capabilities import NotAvailable
from weboob.capabilities.bank import Account from weboob.capabilities.bank import Account
@ -78,11 +78,11 @@ class AccountsPage(LoggedPage, HTMLPage):
class item(ItemElement): class item(ItemElement):
klass = Account klass = Account
def __filter__(self, el): def condition(self):
if len(el.xpath('./td')) < 2: if len(self.el.xpath('./td')) < 2:
return False return False
first_td = el.xpath('./td')[0] first_td = self.el.xpath('./td')[0]
return ((first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g') return ((first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g')
and first_td.find('a') is not None) and first_td.find('a') is not None)
@ -90,6 +90,12 @@ class AccountsPage(LoggedPage, HTMLPage):
def filter(self, text): def filter(self, text):
return text.lstrip(' 0123456789').title() return text.lstrip(' 0123456789').title()
class Type(Filter):
def filter(self, label):
for pattern, actype in AccountsPage.TYPES.iteritems():
if label.startswith(pattern):
return actype
obj_id = Env('id') obj_id = Env('id')
obj_label = Label(CleanText('./td[1]/a')) obj_label = Label(CleanText('./td[1]/a'))
obj_balance = CleanDecimal('./td[2] | ./td[3]') obj_balance = CleanDecimal('./td[2] | ./td[3]')
@ -98,11 +104,7 @@ class AccountsPage(LoggedPage, HTMLPage):
obj_currency = FrenchTransaction.Currency('./td[2] | ./td[3]') obj_currency = FrenchTransaction.Currency('./td[2] | ./td[3]')
obj__link_id = Link('./td[1]/a') obj__link_id = Link('./td[1]/a')
obj__card_links = [] obj__card_links = []
obj_type = Type(Attr('label'))
def obj_type(self):
for pattern, actype in AccountsPage.TYPES.iteritems():
if self.obj.label.startswith(pattern):
return actype
def parse(self, el): def parse(self, el):
link = el.xpath('./td[1]/a')[0].get('href', '') link = el.xpath('./td[1]/a')[0].get('href', '')
@ -196,7 +198,7 @@ class OperationsPage(LoggedPage, HTMLPage):
class item(ItemElement): class item(ItemElement):
klass = Transaction klass = Transaction
__filter__ = lambda el: len(el.xpath('./td')) >= 4 and len(el.xpath('./td[@class="i g" or @class="p g" or contains(@class, "_c1 c _c1")]')) > 0 condition = lambda self: len(self.el.xpath('./td')) >= 4 and len(self.el.xpath('./td[@class="i g" or @class="p g" or contains(@class, "_c1 c _c1")]')) > 0
class OwnRaw(Filter): class OwnRaw(Filter):
def __call__(self, item): def __call__(self, item):
@ -209,9 +211,9 @@ class OperationsPage(LoggedPage, HTMLPage):
return u' '.join(parts) return u' '.join(parts)
obj_raw = Transaction.Raw(OwnRaw())
obj_date = Transaction.Date(TableCell('date')) obj_date = Transaction.Date(TableCell('date'))
obj_vdate = Transaction.Date(TableCell('vdate', 'date')) obj_vdate = Transaction.Date(TableCell('vdate', 'date'))
obj_raw = Transaction.Raw(OwnRaw())
obj_amount = Transaction.Amount(TableCell('credit'), TableCell('debit')) obj_amount = Transaction.Amount(TableCell('credit'), TableCell('debit'))
def find_amount(self, title): def find_amount(self, title):
@ -238,7 +240,7 @@ class ComingPage(OperationsPage, LoggedPage):
class item(ItemElement): class item(ItemElement):
klass = Transaction klass = Transaction
__filter__ = lambda el: len(el.xpath('./td')) >= 3 condition = lambda self: len(self.el.xpath('./td')) >= 3
obj_date = Transaction.Date('./td[1]') obj_date = Transaction.Date('./td[1]')
obj_raw = Transaction.Raw('./td[2]') obj_raw = Transaction.Raw('./td[2]')
@ -255,8 +257,8 @@ class CardPage(OperationsPage, LoggedPage):
class item(ItemElement): class item(ItemElement):
def __iter__(self): def __iter__(self):
card_link = self.el.get('href') card_link = self.el.get('href')
history_url = '%s/%s/fr/banque/%s' % (self.browser.BASEURL, self.browser.currentSubBank, card_link) history_url = '%s/%s/fr/banque/%s' % (self.page.browser.BASEURL, self.page.browser.currentSubBank, card_link)
page = self.browser.location(history_url) page = self.page.browser.location(history_url)
for op in page.get_history(): for op in page.get_history():
yield op yield op
@ -272,7 +274,7 @@ class CardPage(OperationsPage, LoggedPage):
class item(ItemElement): class item(ItemElement):
klass = Transaction klass = Transaction
__filter__ = lambda el: len(el.xpath('./td')) >= 4 condition = lambda self: len(self.el.xpath('./td')) >= 4
obj_raw = Transaction.Raw('./td[last()-2] | ./td[last()-1]') obj_raw = Transaction.Raw('./td[last()-2] | ./td[last()-1]')
obj_type = Transaction.TYPE_CARD obj_type = Transaction.TYPE_CARD

View file

@ -23,7 +23,15 @@ from decimal import Decimal
import re import re
class Filter(object): class _Filter(object):
_creation_counter = 0
def __init__(self):
self._creation_counter = _Filter._creation_counter
_Filter._creation_counter += 1
class Filter(_Filter):
""" """
Class used to filter on a HTML element given as call parameter to return Class used to filter on a HTML element given as call parameter to return
matching elements. matching elements.
@ -38,6 +46,7 @@ class Filter(object):
""" """
def __init__(self, selector=None): def __init__(self, selector=None):
super(Filter, self).__init__()
self.selector = selector self.selector = selector
def __call__(self, item): def __call__(self, item):
@ -56,7 +65,8 @@ class Filter(object):
""" """
return value return value
class Env(Filter):
class Env(_Filter):
""" """
Filter to get environment value of the item. Filter to get environment value of the item.
@ -64,12 +74,13 @@ class Env(Filter):
method on ItemElement. method on ItemElement.
""" """
def __init__(self, name): def __init__(self, name):
super(Env, self).__init__()
self.name = name self.name = name
def __call__(self, item): def __call__(self, item):
return item.env[self.name] return item.env[self.name]
class TableCell(Filter): class TableCell(_Filter):
""" """
Used with TableElement, it get the cell value from its name. Used with TableElement, it get the cell value from its name.
@ -89,6 +100,7 @@ class TableCell(Filter):
""" """
def __init__(self, *names): def __init__(self, *names):
super(TableCell, self).__init__()
self.names = names self.names = names
def __call__(self, item): def __call__(self, item):
@ -136,3 +148,15 @@ class Link(Filter):
""" """
def filter(self, el): def filter(self, el):
return el[0].attrib.get('href', '') return el[0].attrib.get('href', '')
class Attr(_Filter):
"""
Get the attribute of object.
"""
def __init__(self, name):
super(Attr, self).__init__()
self.name = name
def __call__(self, item):
return item.use_selector(getattr(item, 'obj_%s' % self.name))

View file

@ -21,6 +21,7 @@ from __future__ import absolute_import
import requests import requests
import re import re
import sys
from copy import deepcopy from copy import deepcopy
from cStringIO import StringIO from cStringIO import StringIO
@ -30,7 +31,7 @@ from weboob.tools.parsers.lxmlparser import LxmlHtmlParser
from weboob.tools.log import getLogger from weboob.tools.log import getLogger
from .browser import DomainBrowser from .browser import DomainBrowser
from .filters import Filter, CleanText from .filters import _Filter, CleanText
class URL(object): class URL(object):
@ -114,7 +115,7 @@ class _PagesBrowserMeta(type):
new_class = super(_PagesBrowserMeta, cls).__new__(cls, name, bases, attrs) new_class = super(_PagesBrowserMeta, cls).__new__(cls, name, bases, attrs)
if new_class._urls is None: if new_class._urls is None:
new_class._urls = {} new_class._urls = OrderedDict()
else: else:
new_class._urls = deepcopy(new_class._urls) new_class._urls = deepcopy(new_class._urls)
new_class._urls.update(urls) new_class._urls.update(urls)
@ -387,15 +388,18 @@ class AbstractElement(object):
self.env = deepcopy(page.params) self.env = deepcopy(page.params)
def use_selector(self, func): def use_selector(self, func):
if isinstance(func, Filter): if isinstance(func, _Filter):
value = func(self) value = func(self)
elif callable(func): elif callable(func):
value = func() value = func()
else: else:
value = func value = deepcopy(func)
return value return value
def parse(self, obj):
pass
def xpath(self, *args, **kwargs): def xpath(self, *args, **kwargs):
return self.el.xpath(*args, **kwargs) return self.el.xpath(*args, **kwargs)
@ -412,9 +416,6 @@ class ListElement(AbstractElement):
def __call__(self): def __call__(self):
return self.__iter__() return self.__iter__()
def parse(self, el):
pass
def __iter__(self): def __iter__(self):
self.parse(self.el) self.parse(self.el)
@ -464,12 +465,31 @@ class ListElement(AbstractElement):
for obj in attr(self.page, self, el): for obj in attr(self.page, self, el):
yield self.store(obj) yield self.store(obj)
class SkipItem(Exception): class SkipItem(Exception):
pass pass
class _ItemElementMeta(type):
"""
Private meta-class used to keep order of obj_* attributes in ItemElement.
"""
def __new__(cls, name, bases, attrs):
filters = [(re.sub('^obj_', '', attr_name), attrs[attr_name]) for attr_name, obj in attrs.items() if attr_name.startswith('obj_')]
# constants first, then filters, then methods
filters.sort(key=lambda x: x[1]._creation_counter if hasattr(x[1], '_creation_counter') else (sys.maxint if callable(x[1]) else 0))
new_class = super(_ItemElementMeta, cls).__new__(cls, name, bases, attrs)
new_class._attrs = [f[0] for f in filters]
return new_class
class ItemElement(AbstractElement): class ItemElement(AbstractElement):
__metaclass__ = _ItemElementMeta
_attrs = None
klass = None klass = None
__filter__ = None condition = None
class Index(object): class Index(object):
pass pass
@ -478,9 +498,6 @@ class ItemElement(AbstractElement):
super(ItemElement, self).__init__(*args, **kwargs) super(ItemElement, self).__init__(*args, **kwargs)
self.obj = None self.obj = None
def parse(self, obj):
pass
def build_object(self): def build_object(self):
return self.klass() return self.klass()
@ -492,22 +509,15 @@ class ItemElement(AbstractElement):
return obj return obj
def __iter__(self): def __iter__(self):
if self.__filter__ is not None: if self.condition is not None and not self.condition():
try: return
skip = not self.__filter__(self.el)
except TypeError:
skip = not self.__filter__.im_func(self.el)
if skip:
return
try: try:
if self.obj is None: if self.obj is None:
self.obj = self.build_object() self.obj = self.build_object()
self.parse(self.el) self.parse(self.el)
for attr in dir(self): for attr in self._attrs:
m = re.match('obj_(.*)', attr) self.handle_attr(attr, getattr(self, 'obj_%s' % attr))
if m:
self.handle_attr(m.group(1), getattr(self, attr))
except SkipItem: except SkipItem:
return return

View file

@ -23,10 +23,11 @@ import datetime
import re import re
from weboob.capabilities.bank import Transaction, Account from weboob.capabilities.bank import Transaction, Account
from weboob.capabilities import NotAvailable from weboob.capabilities import NotAvailable, NotLoaded
from weboob.tools.misc import to_unicode from weboob.tools.misc import to_unicode
from weboob.tools.log import getLogger from weboob.tools.log import getLogger
from weboob.tools.browser2.page import TableElement
from weboob.tools.browser2.filters import Filter, CleanText, CleanDecimal from weboob.tools.browser2.filters import Filter, CleanText, CleanDecimal
@ -166,10 +167,17 @@ class FrenchTransaction(Transaction):
return return
class TransactionsElement(TableElement):
columns = {'date': [u'Date'],
'vdate': [u'Valeur'],
'raw': [u'Opération', u'Libellé'],
'credit': [u'Crédit', 'Montant'],
'debit': [u'Débit'],
}
class Date(CleanText): class Date(CleanText):
def __call__(self, item): def __call__(self, item):
date = super(FrenchTransaction.Date, self).__call__(item) date = super(FrenchTransaction.Date, self).__call__(item)
item.obj.rdate = date
return date return date
def filter(self, date): def filter(self, date):
@ -195,6 +203,8 @@ class FrenchTransaction(Transaction):
class Filter(CleanText): class Filter(CleanText):
def __call__(self, item): def __call__(self, item):
raw = super(Filter, self).__call__(item) raw = super(Filter, self).__call__(item)
if item.obj.rdate is NotLoaded:
item.obj.rdate = item.obj.date
item.obj.category = NotAvailable item.obj.category = NotAvailable
if ' ' in raw: if ' ' in raw:
item.obj.category, useless, item.obj.label = [part.strip() for part in raw.partition(' ')] item.obj.category, useless, item.obj.label = [part.strip() for part in raw.partition(' ')]