add exceptions, add _Filter.default_or_raise, and other fixes
This commit is contained in:
parent
9ab8538b42
commit
698ed5fb06
2 changed files with 73 additions and 48 deletions
|
|
@ -23,18 +23,55 @@ from dateutil.parser import parse as parse_date
|
||||||
import datetime
|
import datetime
|
||||||
from decimal import Decimal, InvalidOperation
|
from decimal import Decimal, InvalidOperation
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from weboob.capabilities.base import empty
|
from weboob.capabilities.base import empty
|
||||||
|
|
||||||
|
|
||||||
_NO_DEFAULT = object()
|
_NO_DEFAULT = object()
|
||||||
|
|
||||||
|
|
||||||
|
class ParseError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class FilterError(ParseError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class XPathNotFound(FilterError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ColumnNotFound(FilterError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class AttributeNotFound(FilterError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class RegexpError(FilterError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ItemNotFound(FilterError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class _Filter(object):
|
class _Filter(object):
|
||||||
_creation_counter = 0
|
_creation_counter = 0
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, default=_NO_DEFAULT):
|
||||||
|
self.default = default
|
||||||
self._creation_counter = _Filter._creation_counter
|
self._creation_counter = _Filter._creation_counter
|
||||||
_Filter._creation_counter += 1
|
_Filter._creation_counter += 1
|
||||||
|
|
||||||
|
def default_or_raise(self, exception):
|
||||||
|
if self.default is not _NO_DEFAULT:
|
||||||
|
return self.default
|
||||||
|
else:
|
||||||
|
raise exception
|
||||||
|
|
||||||
|
|
||||||
class Filter(_Filter):
|
class Filter(_Filter):
|
||||||
"""
|
"""
|
||||||
|
|
@ -50,8 +87,8 @@ class Filter(_Filter):
|
||||||
Decimal('229.90')
|
Decimal('229.90')
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, selector=None):
|
def __init__(self, selector=None, default=_NO_DEFAULT):
|
||||||
super(Filter, self).__init__()
|
super(Filter, self).__init__(default=default)
|
||||||
self.selector = selector
|
self.selector = selector
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
@ -108,9 +145,8 @@ class TableCell(_Filter):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, *names, **kwargs):
|
def __init__(self, *names, **kwargs):
|
||||||
super(TableCell, self).__init__()
|
super(TableCell, self).__init__(**kwargs)
|
||||||
self.names = names
|
self.names = names
|
||||||
self.default = kwargs.pop('default', _NO_DEFAULT)
|
|
||||||
|
|
||||||
def __call__(self, item):
|
def __call__(self, item):
|
||||||
for name in self.names:
|
for name in self.names:
|
||||||
|
|
@ -118,9 +154,7 @@ class TableCell(_Filter):
|
||||||
if idx is not None:
|
if idx is not None:
|
||||||
return item.xpath('./td[%s]' % (idx + 1))
|
return item.xpath('./td[%s]' % (idx + 1))
|
||||||
|
|
||||||
if self.default is not _NO_DEFAULT:
|
return self.default_or_raise(ColumnNotFound('Unable to find column %s' % ' or '.join(self.names)))
|
||||||
return self.default
|
|
||||||
raise KeyError('Unable to find column %s' % ' or '.join(self.names))
|
|
||||||
|
|
||||||
|
|
||||||
class CleanText(Filter):
|
class CleanText(Filter):
|
||||||
|
|
@ -162,9 +196,8 @@ class CleanDecimal(CleanText):
|
||||||
Get a cleaned Decimal value from an element.
|
Get a cleaned Decimal value from an element.
|
||||||
"""
|
"""
|
||||||
def __init__(self, selector, replace_dots=True, default=_NO_DEFAULT):
|
def __init__(self, selector, replace_dots=True, default=_NO_DEFAULT):
|
||||||
super(CleanDecimal, self).__init__(selector)
|
super(CleanDecimal, self).__init__(selector, default=default)
|
||||||
self.replace_dots = replace_dots
|
self.replace_dots = replace_dots
|
||||||
self.default = default
|
|
||||||
|
|
||||||
def filter(self, text):
|
def filter(self, text):
|
||||||
text = super(CleanDecimal, self).filter(text)
|
text = super(CleanDecimal, self).filter(text)
|
||||||
|
|
@ -173,30 +206,21 @@ class CleanDecimal(CleanText):
|
||||||
try:
|
try:
|
||||||
return Decimal(re.sub(ur'[^\d\-\.]', '', text))
|
return Decimal(re.sub(ur'[^\d\-\.]', '', text))
|
||||||
except InvalidOperation as e:
|
except InvalidOperation as e:
|
||||||
if self.default is not _NO_DEFAULT:
|
return self.default_or_raise(e)
|
||||||
return Decimal(self.default)
|
|
||||||
else:
|
|
||||||
raise InvalidOperation(e)
|
|
||||||
|
|
||||||
|
|
||||||
class Attr(Filter):
|
class Attr(Filter):
|
||||||
def __init__(self, selector, attr, default=_NO_DEFAULT):
|
def __init__(self, selector, attr, default=_NO_DEFAULT):
|
||||||
super(Attr, self).__init__(selector)
|
super(Attr, self).__init__(selector, default=default)
|
||||||
self.attr = attr
|
self.attr = attr
|
||||||
self.default = default
|
|
||||||
|
|
||||||
def filter(self, el):
|
def filter(self, el):
|
||||||
try:
|
try:
|
||||||
return el[0].attrib[self.attr]
|
return el[0].attrib[self.attr]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
if self.default is not _NO_DEFAULT:
|
return self.default_or_raise(XPathNotFound('Unable to find link %s' % self.selector))
|
||||||
return self.default
|
|
||||||
raise ValueError('Unable to find link %s' % self.selector)
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if self.default is not _NO_DEFAULT:
|
return self.default_or_raise(AttributeNotFound('Link %s does not has attribute %s' % (el[0], self.attr)))
|
||||||
return self.default
|
|
||||||
else:
|
|
||||||
raise KeyError('Link %s does not has attribute %s' % (el[0], self.attr))
|
|
||||||
|
|
||||||
|
|
||||||
class Link(Attr):
|
class Link(Attr):
|
||||||
|
|
@ -206,7 +230,7 @@ class Link(Attr):
|
||||||
If the <a> tag is not found, an exception IndexError is raised.
|
If the <a> tag is not found, an exception IndexError is raised.
|
||||||
"""
|
"""
|
||||||
def __init__(self, selector, default=_NO_DEFAULT):
|
def __init__(self, selector, default=_NO_DEFAULT):
|
||||||
super(Link, self).__init__(selector, 'href', default)
|
super(Link, self).__init__(selector, 'href', default=default)
|
||||||
|
|
||||||
|
|
||||||
class Field(_Filter):
|
class Field(_Filter):
|
||||||
|
|
@ -231,11 +255,10 @@ class Regexp(Filter):
|
||||||
u'1988-08-13'
|
u'1988-08-13'
|
||||||
"""
|
"""
|
||||||
def __init__(self, selector, pattern, template=None, flags=0, default=_NO_DEFAULT):
|
def __init__(self, selector, pattern, template=None, flags=0, default=_NO_DEFAULT):
|
||||||
super(Regexp, self).__init__(selector)
|
super(Regexp, self).__init__(selector, default=default)
|
||||||
self.pattern = pattern
|
self.pattern = pattern
|
||||||
self.regex = re.compile(pattern, flags)
|
self.regex = re.compile(pattern, flags)
|
||||||
self.template = template
|
self.template = template
|
||||||
self.default = default
|
|
||||||
|
|
||||||
def filter(self, txt):
|
def filter(self, txt):
|
||||||
if isinstance(txt, (tuple,list)):
|
if isinstance(txt, (tuple,list)):
|
||||||
|
|
@ -243,10 +266,7 @@ class Regexp(Filter):
|
||||||
|
|
||||||
mobj = self.regex.search(txt)
|
mobj = self.regex.search(txt)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
if self.default is not _NO_DEFAULT:
|
return self.default_or_raise(RegexpError('Unable to match %s in %r' % (self.pattern, txt)))
|
||||||
return self.default
|
|
||||||
else:
|
|
||||||
raise KeyError('Unable to match %s in %r' % (self.pattern, txt))
|
|
||||||
|
|
||||||
if self.template is None:
|
if self.template is None:
|
||||||
return next(g for g in mobj.groups() if g is not None)
|
return next(g for g in mobj.groups() if g is not None)
|
||||||
|
|
@ -256,25 +276,24 @@ class Regexp(Filter):
|
||||||
|
|
||||||
class Map(Filter):
|
class Map(Filter):
|
||||||
def __init__(self, selector, map_dict, default=_NO_DEFAULT):
|
def __init__(self, selector, map_dict, default=_NO_DEFAULT):
|
||||||
super(Map, self).__init__(selector)
|
super(Map, self).__init__(selector, default=default)
|
||||||
self.map_dict = map_dict
|
self.map_dict = map_dict
|
||||||
self.default = default
|
|
||||||
|
|
||||||
def filter(self, txt):
|
def filter(self, txt):
|
||||||
try:
|
try:
|
||||||
return self.map_dict[txt]
|
return self.map_dict[txt]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
if self.default is not _NO_DEFAULT:
|
return self.default_or_raise(ItemNotFound('Unable to handle %r on %r' % (txt, self.map_dict)))
|
||||||
return self.default
|
|
||||||
else:
|
|
||||||
raise KeyError('Unable to handle %r' % txt)
|
|
||||||
|
|
||||||
|
|
||||||
class DateTime(Filter):
|
class DateTime(Filter):
|
||||||
def filter(self, txt):
|
def filter(self, txt):
|
||||||
if empty(txt):
|
if empty(txt):
|
||||||
return txt
|
return txt
|
||||||
|
try:
|
||||||
return parse_date(txt)
|
return parse_date(txt)
|
||||||
|
except ValueError as e:
|
||||||
|
return self.default_or_raise(ParseError('Unable to parse %r: %s' % (txt, e)))
|
||||||
|
|
||||||
|
|
||||||
class Date(DateTime):
|
class Date(DateTime):
|
||||||
|
|
@ -290,8 +309,7 @@ class Time(Filter):
|
||||||
kwargs = {'hour': 'hh', 'minute': 'mm', 'second': 'ss'}
|
kwargs = {'hour': 'hh', 'minute': 'mm', 'second': 'ss'}
|
||||||
|
|
||||||
def __init__(self, selector, default=_NO_DEFAULT):
|
def __init__(self, selector, default=_NO_DEFAULT):
|
||||||
super(Time, self).__init__(selector)
|
super(Time, self).__init__(selector, default=default)
|
||||||
self.default = default
|
|
||||||
|
|
||||||
def filter(self, txt):
|
def filter(self, txt):
|
||||||
m = self.regexp.search(txt)
|
m = self.regexp.search(txt)
|
||||||
|
|
@ -301,10 +319,7 @@ class Time(Filter):
|
||||||
kwargs[key] = int(m.groupdict()[index] or 0)
|
kwargs[key] = int(m.groupdict()[index] or 0)
|
||||||
return self.klass(**kwargs)
|
return self.klass(**kwargs)
|
||||||
|
|
||||||
if self.default is not _NO_DEFAULT:
|
return self.default_or_raise(ParseError('Unable to find time in %r' % txt))
|
||||||
return self.default
|
|
||||||
else:
|
|
||||||
raise ValueError('Unable to find time in %r' % txt)
|
|
||||||
|
|
||||||
|
|
||||||
class Duration(Time):
|
class Duration(Time):
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,7 @@
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
from urllib import unquote
|
||||||
import requests
|
import requests
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
@ -33,13 +34,17 @@ from weboob.tools.regex_helper import normalize
|
||||||
from weboob.tools.log import getLogger
|
from weboob.tools.log import getLogger
|
||||||
|
|
||||||
from .browser import DomainBrowser
|
from .browser import DomainBrowser
|
||||||
from .filters import _Filter, CleanText
|
from .filters import _Filter, CleanText, AttributeNotFound, XPathNotFound
|
||||||
|
|
||||||
|
|
||||||
class UrlNotResolvable(Exception):
|
class UrlNotResolvable(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class DataError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class URL(object):
|
class URL(object):
|
||||||
"""
|
"""
|
||||||
A description of an URL on the PagesBrowser website.
|
A description of an URL on the PagesBrowser website.
|
||||||
|
|
@ -68,12 +73,17 @@ class URL(object):
|
||||||
If arguments are provided, and only then, they are checked against the arguments
|
If arguments are provided, and only then, they are checked against the arguments
|
||||||
that were used to build the current page URL.
|
that were used to build the current page URL.
|
||||||
"""
|
"""
|
||||||
|
assert self.klass is not None, "You can use this method only if the is a BasePage class handler."
|
||||||
|
|
||||||
if len(kwargs):
|
if len(kwargs):
|
||||||
params = self.match(self.browser.absurl(self.build(**kwargs), base=True)).groupdict()
|
params = self.match(self.browser.absurl(self.build(**kwargs), base=True)).groupdict()
|
||||||
else:
|
else:
|
||||||
params = None
|
params = None
|
||||||
return self.browser.page and self.klass and isinstance(self.browser.page, self.klass) \
|
|
||||||
and (params is None or params == self.browser.page.params)
|
# XXX use unquote on current params values because if there are spaces
|
||||||
|
# or special characters in them, it is encoded only in but not in kwargs.
|
||||||
|
return self.browser.page and isinstance(self.browser.page, self.klass) \
|
||||||
|
and (params is None or params == dict([(k,unquote(v)) for k,v in self.browser.page.params.iteritems()]))
|
||||||
|
|
||||||
def stay_or_go(self, **kwargs):
|
def stay_or_go(self, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
@ -525,7 +535,7 @@ class ListElement(AbstractElement):
|
||||||
next_page = getattr(self, 'next_page')
|
next_page = getattr(self, 'next_page')
|
||||||
try:
|
try:
|
||||||
value = self.use_selector(next_page)
|
value = self.use_selector(next_page)
|
||||||
except IndexError:
|
except (AttributeNotFound, XPathNotFound):
|
||||||
return
|
return
|
||||||
|
|
||||||
if value is None:
|
if value is None:
|
||||||
|
|
@ -537,7 +547,7 @@ class ListElement(AbstractElement):
|
||||||
def store(self, obj):
|
def store(self, obj):
|
||||||
if obj.id:
|
if obj.id:
|
||||||
if obj.id in self.objects:
|
if obj.id in self.objects:
|
||||||
raise ValueError('There are two objects with the same ID! %s' % obj.id)
|
raise DataError('There are two objects with the same ID! %s' % obj.id)
|
||||||
self.objects[obj.id] = obj
|
self.objects[obj.id] = obj
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue