From 3027c1ece280bc665f03781203d6b37b1c1bd82c Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Mon, 12 Apr 2010 14:21:38 +0200 Subject: [PATCH] fix parsing of HTML entities with HTMLParser --- weboob/tools/parser.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/weboob/tools/parser.py b/weboob/tools/parser.py index ee972f1a..92e52070 100644 --- a/weboob/tools/parser.py +++ b/weboob/tools/parser.py @@ -38,6 +38,7 @@ try: HTMLTreeBuilder = TidyHTMLTreeBuilder.TidyHTMLTreeBuilder except ImportError: from HTMLParser import HTMLParser + import htmlentitydefs class HTMLTreeBuilder(HTMLParser): def __init__(self, encoding=None): @@ -58,6 +59,12 @@ except ImportError: self._target.start(tag, dict(attrs)) self._target.end(tag) + def handle_charref(self, name): + self._target.data(unichr(int(name))) + + def handle_entityref(self, name): + self._target.data(unichr(htmlentitydefs.name2codepoint[name])) + def handle_data(self, data): self._target.data(data)