diff --git a/weboob/backends/aum/adopte.py b/weboob/backends/aum/adopte.py index 62ef64bd..4bc8e3c5 100644 --- a/weboob/backends/aum/adopte.py +++ b/weboob/backends/aum/adopte.py @@ -39,12 +39,13 @@ class AdopteParser(HTMLParser): def __init__(self): HTMLParser.__init__(self, tree=treebuilders.getTreeBuilder("dom")) - def parse(self, data): - return HTMLParser.parse(self, data, encoding='iso-8859-1') + def parse(self, data, encoding): + return HTMLParser.parse(self, data, encoding=encoding) class AdopteUnMec(Browser): DOMAIN = 'www.adopteunmec.com' PROTOCOL = 'http' + ENCODING = 'iso-8859-1' PAGES = {'http://www.adopteunmec.com/': LoginPage, 'http://www.adopteunmec.com/index.html': LoginPage, 'http://www.adopteunmec.com/index.php': LoginPage, diff --git a/weboob/tools/browser.py b/weboob/tools/browser.py index 5828fd67..5c5bffaf 100644 --- a/weboob/tools/browser.py +++ b/weboob/tools/browser.py @@ -84,6 +84,7 @@ class Browser(mechanize.Browser): DOMAIN = None PROTOCOL = 'http' + ENCODING = 'utf-8' PAGES = {} USER_AGENT = 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.4) Gecko/2008111318 Ubuntu/8.10 (intrepid) Firefox/3.0.3' @@ -110,7 +111,7 @@ class Browser(mechanize.Browser): # ------ Browser methods --------------------------------------- def __init__(self, username=None, password=None, firefox_cookies=None, parser=StandardParser): - mechanize.Browser.__init__(self, history=NoHistory()) + mechanize.Browser.__init__(self)#, history=NoHistory()) self.addheaders = [ ['User-agent', self.USER_AGENT] ] @@ -235,12 +236,12 @@ class Browser(mechanize.Browser): debug('[%s] Gone on %s' % (self.username, result.geturl())) self.last_update = time.time() - document = self.__parser.parse(result) + document = self.__parser.parse(result, self.ENCODING) self.page = pageCls(self, document, result.geturl()) self.page.loaded() if self.password and not self.is_logged(): - print '!! Relogin !!' + debug('!! Relogin !!') self.login() return diff --git a/weboob/tools/parser.py b/weboob/tools/parser.py index 62a080e8..0221e014 100644 --- a/weboob/tools/parser.py +++ b/weboob/tools/parser.py @@ -26,15 +26,12 @@ except ImportError: try: from elementtidy import TidyHTMLTreeBuilder TidyHTMLTreeBuilder.ElementTree = ElementTree # force cElementTree if using it. - class HTMLTreeBuilder(TidyHTMLTreeBuilder.TidyHTMLTreeBuilder): - def __init__(self): - TidyHTMLTreeBuilder.TidyHTMLTreeBuilder.__init__(self, 'utf-8') - + HTMLTreeBuilder = TidyHTMLTreeBuilder.TidyHTMLTreeBuilder except ImportError: from HTMLParser import HTMLParser class HTMLTreeBuilder(HTMLParser): - def __init__(self, html=0, target=None): + def __init__(self, encoding=None): HTMLParser.__init__(self) if target is None: target = ElementTree.TreeBuilder() @@ -61,8 +58,8 @@ except ImportError: self._target.end(tag) class StandardParser(object): - def parse(self, data): - parser = HTMLTreeBuilder() + def parse(self, data, encoding=None): + parser = HTMLTreeBuilder(encoding) tree = ElementTree.parse(data, parser) for elem in tree.getiterator():