add a 'raw' parser

This commit is contained in:
Romain Bignon 2013-01-05 18:44:23 +01:00
commit 218159992e
2 changed files with 14 additions and 1 deletions

View file

@ -59,6 +59,12 @@ def load_json():
from .jsonparser import JsonParser
return JsonParser
def load_raw():
# This parser doesn't read HTML, don't include it in the
# preference_order default value below.
from .iparser import RawParser
return RawParser
def get_parser(preference_order=('lxml', 'lxmlsoup')):
"""

View file

@ -21,7 +21,7 @@
import re
__all__ = ['IParser']
__all__ = ['IParser', 'RawParser']
class IParser(object):
@ -53,3 +53,10 @@ class IParser(object):
"""
p = re.compile(r'<.*?>')
return p.sub(' ', data).strip()
class RawParser(IParser):
def parse(self, data, encoding=None):
return data.read()
def tostring(self, elem):
return elem