From 67d776552fce8b2b4b6d9b2d46dff776c5ab65f4 Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Fri, 9 Mar 2012 01:43:45 +0100 Subject: [PATCH] Make kickass code less crappy This does not actually fix anything; the issue was that the page was gzipped. However, since the code was not checking anything, it took me some time to realize it. We now have at least one "BrokenPageError" check. --- modules/kickass/browser.py | 2 +- modules/kickass/{pages/torrents.py => pages.py} | 17 ++++++++++------- modules/kickass/pages/__init__.py | 0 3 files changed, 11 insertions(+), 8 deletions(-) rename modules/kickass/{pages/torrents.py => pages.py} (89%) delete mode 100644 modules/kickass/pages/__init__.py diff --git a/modules/kickass/browser.py b/modules/kickass/browser.py index 8894a438..5b2aaefd 100644 --- a/modules/kickass/browser.py +++ b/modules/kickass/browser.py @@ -20,7 +20,7 @@ from weboob.tools.browser import BaseBrowser -from .pages.torrents import TorrentsPage, TorrentPage +from .pages import TorrentsPage, TorrentPage __all__ = ['KickassBrowser'] diff --git a/modules/kickass/pages/torrents.py b/modules/kickass/pages.py similarity index 89% rename from modules/kickass/pages/torrents.py rename to modules/kickass/pages.py index becacd03..34660c63 100644 --- a/modules/kickass/pages/torrents.py +++ b/modules/kickass/pages.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2010-2011 Julien Veyssier, Laurent Bachelier +# Copyright(C) 2010-2012 Julien Veyssier, Laurent Bachelier # # This file is part of weboob. # @@ -79,7 +79,7 @@ class TorrentPage(BasePage): for div in self.document.getiterator('div'): if div.attrib.get('id', '') == 'desc': try: - description = div.text_content() + description = div.text_content().strip() except UnicodeDecodeError: description = 'Description with invalid UTF-8.' elif div.attrib.get('class', '') == 'seedBlock': @@ -93,12 +93,13 @@ class TorrentPage(BasePage): else: leech = 0 - for h in self.document.getiterator('h1'): - if h.attrib.get('class', '') == 'torrentName': - title = h.getchildren()[0].getchildren()[0].text + title = self.parser.select(self.document.getroot(), + 'h1.torrentName span', 1) + title = title.text for a in self.document.getiterator('a'): - if ('Download' in a.attrib.get('title', '')) and ('torrent file' in a.attrib.get('title', '')): + if ('Download' in a.attrib.get('title', '')) \ + and ('torrent file' in a.attrib.get('title', '')): url = a.attrib.get('href', '') size = 0 @@ -106,7 +107,9 @@ class TorrentPage(BasePage): for span in self.document.getiterator('span'): # sometimes there are others span, this is not so sure but the size of the children list # is enough to know if this is the right span - if (span.attrib.get('class', '') == 'folder' or span.attrib.get('class', '') == 'folderopen') and len(span.getchildren()) > 2: + if (span.attrib.get('class', '') == 'folder' \ + or span.attrib.get('class', '') == 'folderopen') \ + and len(span.getchildren()) > 2: size = span.getchildren()[1].tail u = span.getchildren()[2].text size = float(size.split(': ')[1].replace(',', '.')) diff --git a/modules/kickass/pages/__init__.py b/modules/kickass/pages/__init__.py deleted file mode 100644 index e69de29b..00000000