add exceptions, add _Filter.default_or_raise, and other fixes
This commit is contained in:
parent
9ab8538b42
commit
698ed5fb06
2 changed files with 73 additions and 48 deletions
|
|
@ -23,18 +23,55 @@ from dateutil.parser import parse as parse_date
|
|||
import datetime
|
||||
from decimal import Decimal, InvalidOperation
|
||||
import re
|
||||
|
||||
from weboob.capabilities.base import empty
|
||||
|
||||
|
||||
_NO_DEFAULT = object()
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FilterError(ParseError):
|
||||
pass
|
||||
|
||||
|
||||
class XPathNotFound(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class ColumnNotFound(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class AttributeNotFound(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class RegexpError(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class ItemNotFound(FilterError):
|
||||
pass
|
||||
|
||||
|
||||
class _Filter(object):
|
||||
_creation_counter = 0
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, default=_NO_DEFAULT):
|
||||
self.default = default
|
||||
self._creation_counter = _Filter._creation_counter
|
||||
_Filter._creation_counter += 1
|
||||
|
||||
def default_or_raise(self, exception):
|
||||
if self.default is not _NO_DEFAULT:
|
||||
return self.default
|
||||
else:
|
||||
raise exception
|
||||
|
||||
|
||||
class Filter(_Filter):
|
||||
"""
|
||||
|
|
@ -50,8 +87,8 @@ class Filter(_Filter):
|
|||
Decimal('229.90')
|
||||
"""
|
||||
|
||||
def __init__(self, selector=None):
|
||||
super(Filter, self).__init__()
|
||||
def __init__(self, selector=None, default=_NO_DEFAULT):
|
||||
super(Filter, self).__init__(default=default)
|
||||
self.selector = selector
|
||||
|
||||
@classmethod
|
||||
|
|
@ -108,9 +145,8 @@ class TableCell(_Filter):
|
|||
"""
|
||||
|
||||
def __init__(self, *names, **kwargs):
|
||||
super(TableCell, self).__init__()
|
||||
super(TableCell, self).__init__(**kwargs)
|
||||
self.names = names
|
||||
self.default = kwargs.pop('default', _NO_DEFAULT)
|
||||
|
||||
def __call__(self, item):
|
||||
for name in self.names:
|
||||
|
|
@ -118,9 +154,7 @@ class TableCell(_Filter):
|
|||
if idx is not None:
|
||||
return item.xpath('./td[%s]' % (idx + 1))
|
||||
|
||||
if self.default is not _NO_DEFAULT:
|
||||
return self.default
|
||||
raise KeyError('Unable to find column %s' % ' or '.join(self.names))
|
||||
return self.default_or_raise(ColumnNotFound('Unable to find column %s' % ' or '.join(self.names)))
|
||||
|
||||
|
||||
class CleanText(Filter):
|
||||
|
|
@ -162,9 +196,8 @@ class CleanDecimal(CleanText):
|
|||
Get a cleaned Decimal value from an element.
|
||||
"""
|
||||
def __init__(self, selector, replace_dots=True, default=_NO_DEFAULT):
|
||||
super(CleanDecimal, self).__init__(selector)
|
||||
super(CleanDecimal, self).__init__(selector, default=default)
|
||||
self.replace_dots = replace_dots
|
||||
self.default = default
|
||||
|
||||
def filter(self, text):
|
||||
text = super(CleanDecimal, self).filter(text)
|
||||
|
|
@ -173,30 +206,21 @@ class CleanDecimal(CleanText):
|
|||
try:
|
||||
return Decimal(re.sub(ur'[^\d\-\.]', '', text))
|
||||
except InvalidOperation as e:
|
||||
if self.default is not _NO_DEFAULT:
|
||||
return Decimal(self.default)
|
||||
else:
|
||||
raise InvalidOperation(e)
|
||||
return self.default_or_raise(e)
|
||||
|
||||
|
||||
class Attr(Filter):
|
||||
def __init__(self, selector, attr, default=_NO_DEFAULT):
|
||||
super(Attr, self).__init__(selector)
|
||||
super(Attr, self).__init__(selector, default=default)
|
||||
self.attr = attr
|
||||
self.default = default
|
||||
|
||||
def filter(self, el):
|
||||
try:
|
||||
return el[0].attrib[self.attr]
|
||||
except IndexError:
|
||||
if self.default is not _NO_DEFAULT:
|
||||
return self.default
|
||||
raise ValueError('Unable to find link %s' % self.selector)
|
||||
return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector))
|
||||
except KeyError:
|
||||
if self.default is not _NO_DEFAULT:
|
||||
return self.default
|
||||
else:
|
||||
raise KeyError('Link %s does not has attribute %s' % (el[0], self.attr))
|
||||
return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr)))
|
||||
|
||||
|
||||
class Link(Attr):
|
||||
|
|
@ -206,7 +230,7 @@ class Link(Attr):
|
|||
If the <a> tag is not found, an exception IndexError is raised.
|
||||
"""
|
||||
def __init__(self, selector, default=_NO_DEFAULT):
|
||||
super(Link, self).__init__(selector, 'href', default)
|
||||
super(Link, self).__init__(selector, 'href', default=default)
|
||||
|
||||
|
||||
class Field(_Filter):
|
||||
|
|
@ -231,11 +255,10 @@ class Regexp(Filter):
|
|||
u'1988-08-13'
|
||||
"""
|
||||
def __init__(self, selector, pattern, template=None, flags=0, default=_NO_DEFAULT):
|
||||
super(Regexp, self).__init__(selector)
|
||||
super(Regexp, self).__init__(selector, default=default)
|
||||
self.pattern = pattern
|
||||
self.regex = re.compile(pattern, flags)
|
||||
self.template = template
|
||||
self.default = default
|
||||
|
||||
def filter(self, txt):
|
||||
if isinstance(txt, (tuple,list)):
|
||||
|
|
@ -243,10 +266,7 @@ class Regexp(Filter):
|
|||
|
||||
mobj = self.regex.search(txt)
|
||||
if not mobj:
|
||||
if self.default is not _NO_DEFAULT:
|
||||
return self.default
|
||||
else:
|
||||
raise KeyError('Unable to match %s in %r' % (self.pattern, txt))
|
||||
return self.default_or_raise(RegexpError('Unable to match %s in %r' % (self.pattern, txt)))
|
||||
|
||||
if self.template is None:
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
|
|
@ -256,25 +276,24 @@ class Regexp(Filter):
|
|||
|
||||
class Map(Filter):
|
||||
def __init__(self, selector, map_dict, default=_NO_DEFAULT):
|
||||
super(Map, self).__init__(selector)
|
||||
super(Map, self).__init__(selector, default=default)
|
||||
self.map_dict = map_dict
|
||||
self.default = default
|
||||
|
||||
def filter(self, txt):
|
||||
try:
|
||||
return self.map_dict[txt]
|
||||
except KeyError:
|
||||
if self.default is not _NO_DEFAULT:
|
||||
return self.default
|
||||
else:
|
||||
raise KeyError('Unable to handle %r' % txt)
|
||||
return self.default_or_raise(ItemNotFound('Unable to handle %r on %r' % (txt, self.map_dict)))
|
||||
|
||||
|
||||
class DateTime(Filter):
|
||||
def filter(self, txt):
|
||||
if empty(txt):
|
||||
return txt
|
||||
return parse_date(txt)
|
||||
try:
|
||||
return parse_date(txt)
|
||||
except ValueError as e:
|
||||
return self.default_or_raise(ParseError('Unable to parse %r: %s' % (txt, e)))
|
||||
|
||||
|
||||
class Date(DateTime):
|
||||
|
|
@ -290,8 +309,7 @@ class Time(Filter):
|
|||
kwargs = {'hour': 'hh', 'minute': 'mm', 'second': 'ss'}
|
||||
|
||||
def __init__(self, selector, default=_NO_DEFAULT):
|
||||
super(Time, self).__init__(selector)
|
||||
self.default = default
|
||||
super(Time, self).__init__(selector, default=default)
|
||||
|
||||
def filter(self, txt):
|
||||
m = self.regexp.search(txt)
|
||||
|
|
@ -301,10 +319,7 @@ class Time(Filter):
|
|||
kwargs[key] = int(m.groupdict()[index] or 0)
|
||||
return self.klass(**kwargs)
|
||||
|
||||
if self.default is not _NO_DEFAULT:
|
||||
return self.default
|
||||
else:
|
||||
raise ValueError('Unable to find time in %r' % txt)
|
||||
return self.default_or_raise(ParseError('Unable to find time in %r' % txt))
|
||||
|
||||
|
||||
class Duration(Time):
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from urllib import unquote
|
||||
import requests
|
||||
import re
|
||||
import sys
|
||||
|
|
@ -33,13 +34,17 @@ from weboob.tools.regex_helper import normalize
|
|||
from weboob.tools.log import getLogger
|
||||
|
||||
from .browser import DomainBrowser
|
||||
from .filters import _Filter, CleanText
|
||||
from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound
|
||||
|
||||
|
||||
class UrlNotResolvable(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DataError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class URL(object):
|
||||
"""
|
||||
A description of an URL on the PagesBrowser website.
|
||||
|
|
@ -68,12 +73,17 @@ class URL(object):
|
|||
If arguments are provided, and only then, they are checked against the arguments
|
||||
that were used to build the current page URL.
|
||||
"""
|
||||
assert self.klass is not None, "You can use this method only if the is a BasePage class handler."
|
||||
|
||||
if len(kwargs):
|
||||
params = self.match(self.browser.absurl(self.build(**kwargs), base=True)).groupdict()
|
||||
else:
|
||||
params = None
|
||||
return self.browser.page and self.klass and isinstance(self.browser.page, self.klass) \
|
||||
and (params is None or params == self.browser.page.params)
|
||||
|
||||
# XXX use unquote on current params values because if there are spaces
|
||||
# or special characters in them, it is encoded only in but not in kwargs.
|
||||
return self.browser.page and isinstance(self.browser.page, self.klass) \
|
||||
and (params is None or params == dict([(k,unquote(v)) for k,v in self.browser.page.params.iteritems()]))
|
||||
|
||||
def stay_or_go(self, **kwargs):
|
||||
"""
|
||||
|
|
@ -525,7 +535,7 @@ class ListElement(AbstractElement):
|
|||
next_page = getattr(self, 'next_page')
|
||||
try:
|
||||
value = self.use_selector(next_page)
|
||||
except IndexError:
|
||||
except (AttributeNotFound, XPathNotFound):
|
||||
return
|
||||
|
||||
if value is None:
|
||||
|
|
@ -537,7 +547,7 @@ class ListElement(AbstractElement):
|
|||
def store(self, obj):
|
||||
if obj.id:
|
||||
if obj.id in self.objects:
|
||||
raise ValueError('There are two objects with the same ID! %s' % obj.id)
|
||||
raise DataError('There are two objects with the same ID! %s' % obj.id)
|
||||
self.objects[obj.id] = obj
|
||||
return obj
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue