add a 'raw' parser
This commit is contained in:
parent
47f159a3a4
commit
218159992e
2 changed files with 14 additions and 1 deletions
|
|
@ -59,6 +59,12 @@ def load_json():
|
||||||
from .jsonparser import JsonParser
|
from .jsonparser import JsonParser
|
||||||
return JsonParser
|
return JsonParser
|
||||||
|
|
||||||
|
def load_raw():
|
||||||
|
# This parser doesn't read HTML, don't include it in the
|
||||||
|
# preference_order default value below.
|
||||||
|
from .iparser import RawParser
|
||||||
|
return RawParser
|
||||||
|
|
||||||
|
|
||||||
def get_parser(preference_order=('lxml', 'lxmlsoup')):
|
def get_parser(preference_order=('lxml', 'lxmlsoup')):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['IParser']
|
__all__ = ['IParser', 'RawParser']
|
||||||
|
|
||||||
|
|
||||||
class IParser(object):
|
class IParser(object):
|
||||||
|
|
@ -53,3 +53,10 @@ class IParser(object):
|
||||||
"""
|
"""
|
||||||
p = re.compile(r'<.*?>')
|
p = re.compile(r'<.*?>')
|
||||||
return p.sub(' ', data).strip()
|
return p.sub(' ', data).strip()
|
||||||
|
|
||||||
|
class RawParser(IParser):
|
||||||
|
def parse(self, data, encoding=None):
|
||||||
|
return data.read()
|
||||||
|
|
||||||
|
def tostring(self, elem):
|
||||||
|
return elem
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue