Fix the KickAss test, and enhance it (closes #651)

This allowed to fix a crash when parsing descriptions with invalid
unicode (KickAss is quite horrible it seems).

A new attribute, filename, has been added. It is the recommended
filename of the .torrent file, and should simplify downloading files when
it will be supported by weboorents.

The usage text/tail was useless, lxml supports text_content() for HTML,
which is much simpler.
This commit is contained in:
Laurent Bachelier 2011-04-27 20:28:56 +02:00 committed by Romain Bignon
commit 878621825b
3 changed files with 45 additions and 19 deletions

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Julien Veyssier # Copyright(C) 2010-2011 Julien Veyssier, Laurent Bachelier
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -18,6 +18,12 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
try:
from urlparse import parse_qs
except ImportError:
from cgi import parse_qs
from urlparse import urlsplit
from weboob.capabilities.torrent import Torrent from weboob.capabilities.torrent import Torrent
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.tools.misc import get_bytes_size from weboob.tools.misc import get_bytes_size
@ -34,10 +40,7 @@ class TorrentsPage(BasePage):
if not title: if not title:
title = '' title = ''
for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren(): for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren():
if red.text: title += red.text_content()
title += red.text
if red.tail:
title += red.tail
idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \ idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
.replace('.html', '') .replace('.html', '')
@ -57,6 +60,7 @@ class TorrentsPage(BasePage):
yield Torrent(idt, yield Torrent(idt,
title, title,
url=url, url=url,
filename=parse_qs(urlsplit(url).query).get('title', [None])[0],
size=get_bytes_size(size, u), size=get_bytes_size(size, u),
seeders=int(seed), seeders=int(seed),
leechers=int(leech)) leechers=int(leech))
@ -70,17 +74,17 @@ class TorrentPage(BasePage):
url = 'No Url found' url = 'No Url found'
for div in self.document.getiterator('div'): for div in self.document.getiterator('div'):
if div.attrib.get('id', '') == 'desc': if div.attrib.get('id', '') == 'desc':
description = div.text.strip() try:
for ch in div.getchildren(): description = div.text_content()
if ch.tail != None: except UnicodeDecodeError:
description += ' '+ch.tail.strip() description = 'Description with invalid UTF-8.'
elif div.attrib.get('class', '') == 'seedBlock': elif div.attrib.get('class', '') == 'seedBlock':
if div.getchildren()[1].text != None: if div.getchildren()[1].text is not None:
seed = int(div.getchildren()[1].text) seed = int(div.getchildren()[1].text)
else: else:
seed = 0 seed = 0
elif div.attrib.get('class', '') == 'leechBlock': elif div.attrib.get('class', '') == 'leechBlock':
if div.getchildren()[1].text != None: if div.getchildren()[1].text is not None:
leech = int(div.getchildren()[1].text) leech = int(div.getchildren()[1].text)
else: else:
leech = 0 leech = 0
@ -107,9 +111,9 @@ class TorrentPage(BasePage):
if td.attrib.get('class', '') == 'torFileName': if td.attrib.get('class', '') == 'torFileName':
files.append(td.text) files.append(td.text)
torrent = Torrent(id, title)
torrent = Torrent(id, title) torrent = Torrent(id, title)
torrent.url = url torrent.url = url
torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0]
torrent.size = get_bytes_size(size, u) torrent.size = get_bytes_size(size, u)
torrent.seeders = int(seed) torrent.seeders = int(seed)
torrent.leechers = int(leech) torrent.leechers = int(leech)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Julien Veyssier # Copyright(C) 2010-2011 Julien Veyssier, Laurent Bachelier
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -18,12 +18,33 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
from weboob.capabilities.base import NotLoaded
import urllib
from random import choice
class KickassTest(BackendTest): class KickassTest(BackendTest):
BACKEND = 'kickass' BACKEND = 'kickass'
def test_torrent(self): def test_torrent(self):
l = list(self.backend.iter_torrents('debian')) torrents = list(self.backend.iter_torrents('debian'))
if len(l) > 0: for torrent in torrents:
assert l[0].url.endswith('.torrent') path, qs = urllib.splitquery(torrent.url)
self.backend.get_torrent_file(l[0].id) assert path.endswith('.torrent')
if qs:
assert torrent.filename
assert torrent.id
assert torrent.name
assert torrent.description is NotLoaded
full_torrent = self.backend.get_torrent(torrent.id)
# do not assert torrent.name is full_torrent.name
# (or even that one contains another), it isn't always true!
assert full_torrent.name
assert full_torrent.url
assert full_torrent.description is not NotLoaded
# get the file of a random torrent
# from the list (getting them all would be too long)
if len(torrents):
torrent = choice(torrents)
self.backend.get_torrent_file(torrent.id)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon # Copyright(C) 2010-2011 Romain Bignon, Laurent Bachelier
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -28,7 +28,7 @@ __all__ = ['ICapTorrent', 'Torrent']
class Torrent(CapBaseObject): class Torrent(CapBaseObject):
def __init__(self, id, name, date=NotLoaded, size=NotLoaded, url=NotLoaded, def __init__(self, id, name, date=NotLoaded, size=NotLoaded, url=NotLoaded,
seeders=NotLoaded, leechers=NotLoaded, files=NotLoaded, seeders=NotLoaded, leechers=NotLoaded, files=NotLoaded,
description=NotLoaded): description=NotLoaded, filename=NotLoaded):
CapBaseObject.__init__(self, id) CapBaseObject.__init__(self, id)
self.add_field('name', basestring, name) self.add_field('name', basestring, name)
self.add_field('size', (int,long,float), size) self.add_field('size', (int,long,float), size)
@ -38,6 +38,7 @@ class Torrent(CapBaseObject):
self.add_field('leechers', int, leechers) self.add_field('leechers', int, leechers)
self.add_field('files', list, files) self.add_field('files', list, files)
self.add_field('description', basestring, description) self.add_field('description', basestring, description)
self.add_field('filename', basestring, filename) # suggested name of the .torrent file
class ICapTorrent(IBaseCap): class ICapTorrent(IBaseCap):
def iter_torrents(self, pattern): def iter_torrents(self, pattern):