add exceptions, add _Filter.default_or_raise, and other fixes

This commit is contained in:
Romain Bignon 2014-03-25 22:28:09 +01:00
commit 698ed5fb06
2 changed files with 73 additions and 48 deletions

View file

@ -23,18 +23,55 @@ from dateutil.parser import parse as parse_date
import datetime import datetime
from decimal import Decimal, InvalidOperation from decimal import Decimal, InvalidOperation
import re import re
from weboob.capabilities.base import empty from weboob.capabilities.base import empty
_NO_DEFAULT = object() _NO_DEFAULT = object()
class ParseError(Exception):
pass
class FilterError(ParseError):
pass
class XPathNotFound(FilterError):
pass
class ColumnNotFound(FilterError):
pass
class AttributeNotFound(FilterError):
pass
class RegexpError(FilterError):
pass
class ItemNotFound(FilterError):
pass
class _Filter(object): class _Filter(object):
_creation_counter = 0 _creation_counter = 0
def __init__(self): def __init__(self, default=_NO_DEFAULT):
self.default = default
self._creation_counter = _Filter._creation_counter self._creation_counter = _Filter._creation_counter
_Filter._creation_counter += 1 _Filter._creation_counter += 1
def default_or_raise(self, exception):
if self.default is not _NO_DEFAULT:
return self.default
else:
raise exception
class Filter(_Filter): class Filter(_Filter):
""" """
@ -50,8 +87,8 @@ class Filter(_Filter):
Decimal('229.90') Decimal('229.90')
""" """
def __init__(self, selector=None): def __init__(self, selector=None, default=_NO_DEFAULT):
super(Filter, self).__init__() super(Filter, self).__init__(default=default)
self.selector = selector self.selector = selector
@classmethod @classmethod
@ -108,9 +145,8 @@ class TableCell(_Filter):
""" """
def __init__(self, *names, **kwargs): def __init__(self, *names, **kwargs):
super(TableCell, self).__init__() super(TableCell, self).__init__(**kwargs)
self.names = names self.names = names
self.default = kwargs.pop('default', _NO_DEFAULT)
def __call__(self, item): def __call__(self, item):
for name in self.names: for name in self.names:
@ -118,9 +154,7 @@ class TableCell(_Filter):
if idx is not None: if idx is not None:
return item.xpath('./td[%s]' % (idx + 1)) return item.xpath('./td[%s]' % (idx + 1))
if self.default is not _NO_DEFAULT: return self.default_or_raise(ColumnNotFound('Unable to find column %s' % ' or '.join(self.names)))
return self.default
raise KeyError('Unable to find column %s' % ' or '.join(self.names))
class CleanText(Filter): class CleanText(Filter):
@ -162,9 +196,8 @@ class CleanDecimal(CleanText):
Get a cleaned Decimal value from an element. Get a cleaned Decimal value from an element.
""" """
def __init__(self, selector, replace_dots=True, default=_NO_DEFAULT): def __init__(self, selector, replace_dots=True, default=_NO_DEFAULT):
super(CleanDecimal, self).__init__(selector) super(CleanDecimal, self).__init__(selector, default=default)
self.replace_dots = replace_dots self.replace_dots = replace_dots
self.default = default
def filter(self, text): def filter(self, text):
text = super(CleanDecimal, self).filter(text) text = super(CleanDecimal, self).filter(text)
@ -173,30 +206,21 @@ class CleanDecimal(CleanText):
try: try:
return Decimal(re.sub(ur'[^\d\-\.]', '', text)) return Decimal(re.sub(ur'[^\d\-\.]', '', text))
except InvalidOperation as e: except InvalidOperation as e:
if self.default is not _NO_DEFAULT: return self.default_or_raise(e)
return Decimal(self.default)
else:
raise InvalidOperation(e)
class Attr(Filter): class Attr(Filter):
def __init__(self, selector, attr, default=_NO_DEFAULT): def __init__(self, selector, attr, default=_NO_DEFAULT):
super(Attr, self).__init__(selector) super(Attr, self).__init__(selector, default=default)
self.attr = attr self.attr = attr
self.default = default
def filter(self, el): def filter(self, el):
try: try:
return el[0].attrib[self.attr] return el[0].attrib[self.attr]
except IndexError: except IndexError:
if self.default is not _NO_DEFAULT: return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector))
return self.default
raise ValueError('Unable to find link %s' % self.selector)
except KeyError: except KeyError:
if self.default is not _NO_DEFAULT: return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr)))
return self.default
else:
raise KeyError('Link %s does not has attribute %s' % (el[0], self.attr))
class Link(Attr): class Link(Attr):
@ -206,7 +230,7 @@ class Link(Attr):
If the <a> tag is not found, an exception IndexError is raised. If the <a> tag is not found, an exception IndexError is raised.
""" """
def __init__(self, selector, default=_NO_DEFAULT): def __init__(self, selector, default=_NO_DEFAULT):
super(Link, self).__init__(selector, 'href', default) super(Link, self).__init__(selector, 'href', default=default)
class Field(_Filter): class Field(_Filter):
@ -231,11 +255,10 @@ class Regexp(Filter):
u'1988-08-13' u'1988-08-13'
""" """
def __init__(self, selector, pattern, template=None, flags=0, default=_NO_DEFAULT): def __init__(self, selector, pattern, template=None, flags=0, default=_NO_DEFAULT):
super(Regexp, self).__init__(selector) super(Regexp, self).__init__(selector, default=default)
self.pattern = pattern self.pattern = pattern
self.regex = re.compile(pattern, flags) self.regex = re.compile(pattern, flags)
self.template = template self.template = template
self.default = default
def filter(self, txt): def filter(self, txt):
if isinstance(txt, (tuple,list)): if isinstance(txt, (tuple,list)):
@ -243,10 +266,7 @@ class Regexp(Filter):
mobj = self.regex.search(txt) mobj = self.regex.search(txt)
if not mobj: if not mobj:
if self.default is not _NO_DEFAULT: return self.default_or_raise(RegexpError('Unable to match %s in %r' % (self.pattern, txt)))
return self.default
else:
raise KeyError('Unable to match %s in %r' % (self.pattern, txt))
if self.template is None: if self.template is None:
return next(g for g in mobj.groups() if g is not None) return next(g for g in mobj.groups() if g is not None)
@ -256,25 +276,24 @@ class Regexp(Filter):
class Map(Filter): class Map(Filter):
def __init__(self, selector, map_dict, default=_NO_DEFAULT): def __init__(self, selector, map_dict, default=_NO_DEFAULT):
super(Map, self).__init__(selector) super(Map, self).__init__(selector, default=default)
self.map_dict = map_dict self.map_dict = map_dict
self.default = default
def filter(self, txt): def filter(self, txt):
try: try:
return self.map_dict[txt] return self.map_dict[txt]
except KeyError: except KeyError:
if self.default is not _NO_DEFAULT: return self.default_or_raise(ItemNotFound('Unable to handle %r on %r' % (txt, self.map_dict)))
return self.default
else:
raise KeyError('Unable to handle %r' % txt)
class DateTime(Filter): class DateTime(Filter):
def filter(self, txt): def filter(self, txt):
if empty(txt): if empty(txt):
return txt return txt
try:
return parse_date(txt) return parse_date(txt)
except ValueError as e:
return self.default_or_raise(ParseError('Unable to parse %r: %s' % (txt, e)))
class Date(DateTime): class Date(DateTime):
@ -290,8 +309,7 @@ class Time(Filter):
kwargs = {'hour': 'hh', 'minute': 'mm', 'second': 'ss'} kwargs = {'hour': 'hh', 'minute': 'mm', 'second': 'ss'}
def __init__(self, selector, default=_NO_DEFAULT): def __init__(self, selector, default=_NO_DEFAULT):
super(Time, self).__init__(selector) super(Time, self).__init__(selector, default=default)
self.default = default
def filter(self, txt): def filter(self, txt):
m = self.regexp.search(txt) m = self.regexp.search(txt)
@ -301,10 +319,7 @@ class Time(Filter):
kwargs[key] = int(m.groupdict()[index] or 0) kwargs[key] = int(m.groupdict()[index] or 0)
return self.klass(**kwargs) return self.klass(**kwargs)
if self.default is not _NO_DEFAULT: return self.default_or_raise(ParseError('Unable to find time in %r' % txt))
return self.default
else:
raise ValueError('Unable to find time in %r' % txt)
class Duration(Time): class Duration(Time):

View file

@ -19,6 +19,7 @@
from __future__ import absolute_import from __future__ import absolute_import
from urllib import unquote
import requests import requests
import re import re
import sys import sys
@ -33,13 +34,17 @@ from weboob.tools.regex_helper import normalize
from weboob.tools.log import getLogger from weboob.tools.log import getLogger
from .browser import DomainBrowser from .browser import DomainBrowser
from .filters import _Filter, CleanText from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound
class UrlNotResolvable(Exception): class UrlNotResolvable(Exception):
pass pass
class DataError(Exception):
pass
class URL(object): class URL(object):
""" """
A description of an URL on the PagesBrowser website. A description of an URL on the PagesBrowser website.
@ -68,12 +73,17 @@ class URL(object):
If arguments are provided, and only then, they are checked against the arguments If arguments are provided, and only then, they are checked against the arguments
that were used to build the current page URL. that were used to build the current page URL.
""" """
assert self.klass is not None, "You can use this method only if the is a BasePage class handler."
if len(kwargs): if len(kwargs):
params = self.match(self.browser.absurl(self.build(**kwargs), base=True)).groupdict() params = self.match(self.browser.absurl(self.build(**kwargs), base=True)).groupdict()
else: else:
params = None params = None
return self.browser.page and self.klass and isinstance(self.browser.page, self.klass) \
and (params is None or params == self.browser.page.params) # XXX use unquote on current params values because if there are spaces
# or special characters in them, it is encoded only in but not in kwargs.
return self.browser.page and isinstance(self.browser.page, self.klass) \
and (params is None or params == dict([(k,unquote(v)) for k,v in self.browser.page.params.iteritems()]))
def stay_or_go(self, **kwargs): def stay_or_go(self, **kwargs):
""" """
@ -525,7 +535,7 @@ class ListElement(AbstractElement):
next_page = getattr(self, 'next_page') next_page = getattr(self, 'next_page')
try: try:
value = self.use_selector(next_page) value = self.use_selector(next_page)
except IndexError: except (AttributeNotFound, XPathNotFound):
return return
if value is None: if value is None:
@ -537,7 +547,7 @@ class ListElement(AbstractElement):
def store(self, obj): def store(self, obj):
if obj.id: if obj.id:
if obj.id in self.objects: if obj.id in self.objects:
raise ValueError('There are two objects with the same ID! %s' % obj.id) raise DataError('There are two objects with the same ID! %s' % obj.id)
self.objects[obj.id] = obj self.objects[obj.id] = obj
return obj return obj