add some javascript dedicated filters
This commit is contained in:
parent
e37df1417f
commit
7f7d762aa5
2 changed files with 131 additions and 0 deletions
|
|
@ -20,3 +20,4 @@
|
||||||
from .standard import * # NOQA
|
from .standard import * # NOQA
|
||||||
from .html import * # NOQA
|
from .html import * # NOQA
|
||||||
from .json import * # NOQA
|
from .json import * # NOQA
|
||||||
|
from .javascript import * # NOQA
|
||||||
|
|
|
||||||
130
weboob/tools/browser2/filters/javascript.py
Normal file
130
weboob/tools/browser2/filters/javascript.py
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2014 Simon Murail
|
||||||
|
#
|
||||||
|
# This file is part of weboob.
|
||||||
|
#
|
||||||
|
# weboob is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# weboob is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from weboob.tools.browser2.filters.standard import _NO_DEFAULT, Filter, Regexp, RegexpError
|
||||||
|
from weboob.tools.exceptions import ParseError
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['JSPayload', 'JSVar']
|
||||||
|
|
||||||
|
|
||||||
|
def _quoted(q):
|
||||||
|
return r'{0}(?:[^{0}]|\{0})*{0}'.format(q)
|
||||||
|
|
||||||
|
|
||||||
|
class JSPayload(Filter):
|
||||||
|
r"""
|
||||||
|
Get Javascript code from tag's text, cleaned from all comments.
|
||||||
|
|
||||||
|
It filters code in a such a way that corner cases are handled, such as
|
||||||
|
comments in string literals and comments in comments.
|
||||||
|
|
||||||
|
The following snippet is borrowed from <http://ostermiller.org/findcomment.html>:
|
||||||
|
|
||||||
|
>>> JSPayload.filter('''someString = "An example comment: /* example */";
|
||||||
|
...
|
||||||
|
... // The comment around this code has been commented out.
|
||||||
|
... // /*
|
||||||
|
... some_code();
|
||||||
|
... // */''')
|
||||||
|
'someString = "An example comment: /* example */";\n\nsome_code();\n'
|
||||||
|
|
||||||
|
"""
|
||||||
|
_single_line_comment = '[ \t\v\f]*//.*\r?(?:\n|$)'
|
||||||
|
_multi_line_comment = '/\*(?:.|[\r\n])*?\*/'
|
||||||
|
_splitter = re.compile('(?:(%s|%s)|%s|%s)' % (_quoted('"'),
|
||||||
|
_quoted("'"),
|
||||||
|
_single_line_comment,
|
||||||
|
_multi_line_comment))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def filter(cls, value):
|
||||||
|
return ''.join(filter(bool, cls._splitter.split(value)))
|
||||||
|
|
||||||
|
|
||||||
|
class JSVar(Regexp):
|
||||||
|
r"""
|
||||||
|
Get the init value of first found assignment value of a variable.
|
||||||
|
|
||||||
|
It only understands literal values, but should parse them well. Values
|
||||||
|
are converted in python values, quotes and slashes in strings are stripped.
|
||||||
|
|
||||||
|
>>> JSVar(var='test').filter("var test = .1")
|
||||||
|
0.1
|
||||||
|
>>> JSVar(var='test').filter("test = 42")
|
||||||
|
42
|
||||||
|
>>> JSVar(var='test').filter('test = "Some \\"string\\" value"')
|
||||||
|
'Some "string" value'
|
||||||
|
>>> JSVar(var='test').filter("var test = false")
|
||||||
|
False
|
||||||
|
"""
|
||||||
|
pattern_template = r"""(?x)
|
||||||
|
(?:var\s+)? # optional var keyword
|
||||||
|
\b%%s # var name
|
||||||
|
\s*=\s* # equal sign
|
||||||
|
(?:(?P<float>[-+]?\s* # float ?
|
||||||
|
(?:(?:\d+\.\d*|\d*\.\d+)(?:[eE]\d+)?
|
||||||
|
|\d+[eE]\d+))
|
||||||
|
|(?P<int>[-+]?\s*(?:0[bBxXoO])?\d+) # int ?
|
||||||
|
|(?P<str>(?:%s|%s)) # str ?
|
||||||
|
|(?P<bool>true|false) # bool ?
|
||||||
|
|(?P<None>null)) # None ?
|
||||||
|
""" % (_quoted('"'), _quoted("'"))
|
||||||
|
|
||||||
|
_re_spaces = re.compile(r'\s+')
|
||||||
|
|
||||||
|
def to_python(self, m):
|
||||||
|
values = m.groupdict()
|
||||||
|
for t, v in values.iteritems():
|
||||||
|
if v is not None:
|
||||||
|
break
|
||||||
|
if self.of_type and t != self.of_type.__name__:
|
||||||
|
raise ParseError('Variable %r of type %s not found' % (self.var, self.of_type))
|
||||||
|
if t in ('int', 'float'):
|
||||||
|
v = self._re_spaces.sub('', v).lower()
|
||||||
|
if t == 'int':
|
||||||
|
base = {'x': 16, 'o': 8, 'b': 2}.get(v[1], 10) if v[0] == '0' else 10
|
||||||
|
return int(v, base=base)
|
||||||
|
return float(v)
|
||||||
|
if t == 'str':
|
||||||
|
return v[1:-1].decode('string_escape')
|
||||||
|
if t == 'bool':
|
||||||
|
return v == 'true'
|
||||||
|
if t == 'None':
|
||||||
|
return
|
||||||
|
if self.default:
|
||||||
|
return self.default
|
||||||
|
raise ParseError('Unable to parse variable %r value' % self.var)
|
||||||
|
|
||||||
|
def __init__(self, selector=None, var=None, of_type=None, default=_NO_DEFAULT):
|
||||||
|
assert var is not None, 'Please specify a var parameter'
|
||||||
|
self.var = var
|
||||||
|
self.of_type = of_type
|
||||||
|
pattern = self.pattern_template % var
|
||||||
|
super(JSVar, self).__init__(selector, pattern=pattern, template=self.to_python, default=default)
|
||||||
|
|
||||||
|
def filter(self, txt):
|
||||||
|
try:
|
||||||
|
return super(JSVar, self).filter(txt)
|
||||||
|
except RegexpError:
|
||||||
|
raise ParseError('Variable %r not found' % self.var)
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue