fix parsing of HTML entities with HTMLParser
This commit is contained in:
parent
2f297530a8
commit
3027c1ece2
1 changed files with 7 additions and 0 deletions
|
|
@ -38,6 +38,7 @@ try:
|
||||||
HTMLTreeBuilder = TidyHTMLTreeBuilder.TidyHTMLTreeBuilder
|
HTMLTreeBuilder = TidyHTMLTreeBuilder.TidyHTMLTreeBuilder
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from HTMLParser import HTMLParser
|
from HTMLParser import HTMLParser
|
||||||
|
import htmlentitydefs
|
||||||
|
|
||||||
class HTMLTreeBuilder(HTMLParser):
|
class HTMLTreeBuilder(HTMLParser):
|
||||||
def __init__(self, encoding=None):
|
def __init__(self, encoding=None):
|
||||||
|
|
@ -58,6 +59,12 @@ except ImportError:
|
||||||
self._target.start(tag, dict(attrs))
|
self._target.start(tag, dict(attrs))
|
||||||
self._target.end(tag)
|
self._target.end(tag)
|
||||||
|
|
||||||
|
def handle_charref(self, name):
|
||||||
|
self._target.data(unichr(int(name)))
|
||||||
|
|
||||||
|
def handle_entityref(self, name):
|
||||||
|
self._target.data(unichr(htmlentitydefs.name2codepoint[name]))
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
self._target.data(data)
|
self._target.data(data)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue