fix parsing of HTML entities with HTMLParser
This commit is contained in:
parent
2f297530a8
commit
3027c1ece2
1 changed files with 7 additions and 0 deletions
|
|
@ -38,6 +38,7 @@ try:
|
|||
HTMLTreeBuilder = TidyHTMLTreeBuilder.TidyHTMLTreeBuilder
|
||||
except ImportError:
|
||||
from HTMLParser import HTMLParser
|
||||
import htmlentitydefs
|
||||
|
||||
class HTMLTreeBuilder(HTMLParser):
|
||||
def __init__(self, encoding=None):
|
||||
|
|
@ -58,6 +59,12 @@ except ImportError:
|
|||
self._target.start(tag, dict(attrs))
|
||||
self._target.end(tag)
|
||||
|
||||
def handle_charref(self, name):
|
||||
self._target.data(unichr(int(name)))
|
||||
|
||||
def handle_entityref(self, name):
|
||||
self._target.data(unichr(htmlentitydefs.name2codepoint[name]))
|
||||
|
||||
def handle_data(self, data):
|
||||
self._target.data(data)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue