add new lxmlsoup parser

This commit is contained in:
Christophe Benz 2010-05-20 01:33:54 +02:00
commit a9c8c93965
2 changed files with 43 additions and 1 deletions

View file

@ -30,6 +30,10 @@ def load_lxml():
from .lxmlparser import LxmlHtmlParser from .lxmlparser import LxmlHtmlParser
return LxmlHtmlParser return LxmlHtmlParser
def load_lxmlsoup():
from .lxmlsoupparser import LxmlSoupParser
return LxmlSoupParser
def load_html5lib(): def load_html5lib():
from .html5libparser import Html5libParser from .html5libparser import Html5libParser
return Html5libParser return Html5libParser
@ -42,7 +46,7 @@ def load_builtin():
from .htmlparser import HTMLParser from .htmlparser import HTMLParser
return HTMLParser return HTMLParser
def get_parser(preference_order=('lxml', 'html5lib', 'elementtidy', 'builtin')): def get_parser(preference_order=('lxml', 'lxmlsoup', 'html5lib', 'elementtidy', 'builtin')):
""" """
Get a parser from a preference order list. Get a parser from a preference order list.
This allows Weboob to run on systems without lxml, which is the default parser. This allows Weboob to run on systems without lxml, which is the default parser.

View file

@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010 Christophe Benz
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import lxml.html
import lxml.html.soupparser
from .iparser import IParser
__all__ = ['LxmlHtmlParser']
class LxmlSoupParser(IParser):
"""
Parser using lxml elementsoup.
Note that it is not available on every systems.
"""
def parse(self, data, encoding=None):
return lxml.html.soupparser.parse(data)
def tostring(self, element):
return lxml.html.tostring(element, encoding=unicode)