Fix the KickAss test, and enhance it (closes #651)

This allowed to fix a crash when parsing descriptions with invalid unicode (KickAss is quite horrible it seems). A new attribute, filename, has been added. It is the recommended filename of the .torrent file, and should simplify downloading files when it will be supported by weboorents. The usage text/tail was useless, lxml supports text_content() for HTML, which is much simpler.
2011-04-27 20:28:56 +02:00 · 2011-04-27 20:28:56 +02:00 · 878621825b
commit 878621825b
parent b64b039cdd
3 changed files with 45 additions and 19 deletions
--- a/weboob/backends/kickass/pages/torrents.py
+++ b/weboob/backends/kickass/pages/torrents.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2010-2011 Julien Veyssier
+# Copyright(C) 2010-2011 Julien Veyssier, Laurent Bachelier
 #
 # This file is part of weboob.
 #
@ -18,6 +18,12 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.


+try:
+    from urlparse import parse_qs
+except ImportError:
+    from cgi import parse_qs
+from urlparse import urlsplit
+
 from weboob.capabilities.torrent import Torrent
 from weboob.tools.browser import BasePage
 from weboob.tools.misc import get_bytes_size
@ -34,10 +40,7 @@ class TorrentsPage(BasePage):
                if not title:
                    title = ''
                for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren():
-                    if red.text:
-                        title += red.text
-                    if red.tail:
-                        title += red.tail
+                    title += red.text_content()
                idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
                    .replace('.html', '')

@ -57,6 +60,7 @@ class TorrentsPage(BasePage):
                yield Torrent(idt,
                              title,
                              url=url,
+                              filename=parse_qs(urlsplit(url).query).get('title', [None])[0],
                              size=get_bytes_size(size, u),
                              seeders=int(seed),
                              leechers=int(leech))
@ -70,17 +74,17 @@ class TorrentPage(BasePage):
        url = 'No Url found'
        for div in self.document.getiterator('div'):
            if div.attrib.get('id', '') == 'desc':
-                description = div.text.strip()
-                for ch in div.getchildren():
-                    if ch.tail != None:
-                        description += ' '+ch.tail.strip()
+                try:
+                    description = div.text_content()
+                except UnicodeDecodeError:
+                    description = 'Description with invalid UTF-8.'
            elif div.attrib.get('class', '') == 'seedBlock':
-                if div.getchildren()[1].text != None:
+                if div.getchildren()[1].text is not None:
                    seed = int(div.getchildren()[1].text)
                else:
                    seed = 0
            elif div.attrib.get('class', '') == 'leechBlock':
-                if div.getchildren()[1].text != None:
+                if div.getchildren()[1].text is not None:
                    leech = int(div.getchildren()[1].text)
                else:
                    leech = 0
@ -107,9 +111,9 @@ class TorrentPage(BasePage):
            if td.attrib.get('class', '') == 'torFileName':
                files.append(td.text)

-        torrent = Torrent(id, title)
        torrent = Torrent(id, title)
        torrent.url = url
+        torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0]
        torrent.size = get_bytes_size(size, u)
        torrent.seeders = int(seed)
        torrent.leechers = int(leech)
--- a/weboob/backends/kickass/test.py
+++ b/weboob/backends/kickass/test.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2010-2011 Julien Veyssier
+# Copyright(C) 2010-2011 Julien Veyssier, Laurent Bachelier
 #
 # This file is part of weboob.
 #
@ -18,12 +18,33 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

 from weboob.tools.test import BackendTest
+from weboob.capabilities.base import NotLoaded
+
+import urllib
+from random import choice

 class KickassTest(BackendTest):
    BACKEND = 'kickass'

    def test_torrent(self):
-        l = list(self.backend.iter_torrents('debian'))
-        if len(l) > 0:
-            assert l[0].url.endswith('.torrent')
-            self.backend.get_torrent_file(l[0].id)
+        torrents = list(self.backend.iter_torrents('debian'))
+        for torrent in torrents:
+            path, qs = urllib.splitquery(torrent.url)
+            assert path.endswith('.torrent')
+            if qs:
+                assert torrent.filename
+            assert torrent.id
+            assert torrent.name
+            assert torrent.description is NotLoaded
+            full_torrent = self.backend.get_torrent(torrent.id)
+            # do not assert torrent.name is full_torrent.name
+            # (or even that one contains another), it isn't always true!
+            assert full_torrent.name
+            assert full_torrent.url
+            assert full_torrent.description is not NotLoaded
+
+        # get the file of a random torrent
+        # from the list (getting them all would be too long)
+        if len(torrents):
+            torrent = choice(torrents)
+            self.backend.get_torrent_file(torrent.id)
--- a/weboob/capabilities/torrent.py
+++ b/weboob/capabilities/torrent.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2010-2011 Romain Bignon
+# Copyright(C) 2010-2011 Romain Bignon, Laurent Bachelier
 #
 # This file is part of weboob.
 #
@ -28,7 +28,7 @@ __all__ = ['ICapTorrent', 'Torrent']
 class Torrent(CapBaseObject):
    def __init__(self, id, name, date=NotLoaded, size=NotLoaded, url=NotLoaded,
                       seeders=NotLoaded, leechers=NotLoaded, files=NotLoaded,
-                       description=NotLoaded):
+                       description=NotLoaded, filename=NotLoaded):
        CapBaseObject.__init__(self, id)
        self.add_field('name', basestring, name)
        self.add_field('size', (int,long,float), size)
@ -38,6 +38,7 @@ class Torrent(CapBaseObject):
        self.add_field('leechers', int, leechers)
        self.add_field('files', list, files)
        self.add_field('description', basestring, description)
+        self.add_field('filename', basestring, filename) # suggested name of the .torrent file

 class ICapTorrent(IBaseCap):
    def iter_torrents(self, pattern):