add a 'raw' parser
This commit is contained in:
parent
47f159a3a4
commit
218159992e
2 changed files with 14 additions and 1 deletions
|
|
@ -59,6 +59,12 @@ def load_json():
|
|||
from .jsonparser import JsonParser
|
||||
return JsonParser
|
||||
|
||||
def load_raw():
|
||||
# This parser doesn't read HTML, don't include it in the
|
||||
# preference_order default value below.
|
||||
from .iparser import RawParser
|
||||
return RawParser
|
||||
|
||||
|
||||
def get_parser(preference_order=('lxml', 'lxmlsoup')):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@
|
|||
import re
|
||||
|
||||
|
||||
__all__ = ['IParser']
|
||||
__all__ = ['IParser', 'RawParser']
|
||||
|
||||
|
||||
class IParser(object):
|
||||
|
|
@ -53,3 +53,10 @@ class IParser(object):
|
|||
"""
|
||||
p = re.compile(r'<.*?>')
|
||||
return p.sub(' ', data).strip()
|
||||
|
||||
class RawParser(IParser):
|
||||
def parse(self, data, encoding=None):
|
||||
return data.read()
|
||||
|
||||
def tostring(self, elem):
|
||||
return elem
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue