Fix the KickAss test, and enhance it (closes #651)
This allowed to fix a crash when parsing descriptions with invalid unicode (KickAss is quite horrible it seems). A new attribute, filename, has been added. It is the recommended filename of the .torrent file, and should simplify downloading files when it will be supported by weboorents. The usage text/tail was useless, lxml supports text_content() for HTML, which is much simpler.
This commit is contained in:
parent
b64b039cdd
commit
878621825b
3 changed files with 45 additions and 19 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright(C) 2010-2011 Julien Veyssier
|
# Copyright(C) 2010-2011 Julien Veyssier, Laurent Bachelier
|
||||||
#
|
#
|
||||||
# This file is part of weboob.
|
# This file is part of weboob.
|
||||||
#
|
#
|
||||||
|
|
@ -18,6 +18,12 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urlparse import parse_qs
|
||||||
|
except ImportError:
|
||||||
|
from cgi import parse_qs
|
||||||
|
from urlparse import urlsplit
|
||||||
|
|
||||||
from weboob.capabilities.torrent import Torrent
|
from weboob.capabilities.torrent import Torrent
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
from weboob.tools.misc import get_bytes_size
|
from weboob.tools.misc import get_bytes_size
|
||||||
|
|
@ -34,10 +40,7 @@ class TorrentsPage(BasePage):
|
||||||
if not title:
|
if not title:
|
||||||
title = ''
|
title = ''
|
||||||
for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren():
|
for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren():
|
||||||
if red.text:
|
title += red.text_content()
|
||||||
title += red.text
|
|
||||||
if red.tail:
|
|
||||||
title += red.tail
|
|
||||||
idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
|
idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
|
||||||
.replace('.html', '')
|
.replace('.html', '')
|
||||||
|
|
||||||
|
|
@ -57,6 +60,7 @@ class TorrentsPage(BasePage):
|
||||||
yield Torrent(idt,
|
yield Torrent(idt,
|
||||||
title,
|
title,
|
||||||
url=url,
|
url=url,
|
||||||
|
filename=parse_qs(urlsplit(url).query).get('title', [None])[0],
|
||||||
size=get_bytes_size(size, u),
|
size=get_bytes_size(size, u),
|
||||||
seeders=int(seed),
|
seeders=int(seed),
|
||||||
leechers=int(leech))
|
leechers=int(leech))
|
||||||
|
|
@ -70,17 +74,17 @@ class TorrentPage(BasePage):
|
||||||
url = 'No Url found'
|
url = 'No Url found'
|
||||||
for div in self.document.getiterator('div'):
|
for div in self.document.getiterator('div'):
|
||||||
if div.attrib.get('id', '') == 'desc':
|
if div.attrib.get('id', '') == 'desc':
|
||||||
description = div.text.strip()
|
try:
|
||||||
for ch in div.getchildren():
|
description = div.text_content()
|
||||||
if ch.tail != None:
|
except UnicodeDecodeError:
|
||||||
description += ' '+ch.tail.strip()
|
description = 'Description with invalid UTF-8.'
|
||||||
elif div.attrib.get('class', '') == 'seedBlock':
|
elif div.attrib.get('class', '') == 'seedBlock':
|
||||||
if div.getchildren()[1].text != None:
|
if div.getchildren()[1].text is not None:
|
||||||
seed = int(div.getchildren()[1].text)
|
seed = int(div.getchildren()[1].text)
|
||||||
else:
|
else:
|
||||||
seed = 0
|
seed = 0
|
||||||
elif div.attrib.get('class', '') == 'leechBlock':
|
elif div.attrib.get('class', '') == 'leechBlock':
|
||||||
if div.getchildren()[1].text != None:
|
if div.getchildren()[1].text is not None:
|
||||||
leech = int(div.getchildren()[1].text)
|
leech = int(div.getchildren()[1].text)
|
||||||
else:
|
else:
|
||||||
leech = 0
|
leech = 0
|
||||||
|
|
@ -107,9 +111,9 @@ class TorrentPage(BasePage):
|
||||||
if td.attrib.get('class', '') == 'torFileName':
|
if td.attrib.get('class', '') == 'torFileName':
|
||||||
files.append(td.text)
|
files.append(td.text)
|
||||||
|
|
||||||
torrent = Torrent(id, title)
|
|
||||||
torrent = Torrent(id, title)
|
torrent = Torrent(id, title)
|
||||||
torrent.url = url
|
torrent.url = url
|
||||||
|
torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0]
|
||||||
torrent.size = get_bytes_size(size, u)
|
torrent.size = get_bytes_size(size, u)
|
||||||
torrent.seeders = int(seed)
|
torrent.seeders = int(seed)
|
||||||
torrent.leechers = int(leech)
|
torrent.leechers = int(leech)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright(C) 2010-2011 Julien Veyssier
|
# Copyright(C) 2010-2011 Julien Veyssier, Laurent Bachelier
|
||||||
#
|
#
|
||||||
# This file is part of weboob.
|
# This file is part of weboob.
|
||||||
#
|
#
|
||||||
|
|
@ -18,12 +18,33 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from weboob.tools.test import BackendTest
|
from weboob.tools.test import BackendTest
|
||||||
|
from weboob.capabilities.base import NotLoaded
|
||||||
|
|
||||||
|
import urllib
|
||||||
|
from random import choice
|
||||||
|
|
||||||
class KickassTest(BackendTest):
|
class KickassTest(BackendTest):
|
||||||
BACKEND = 'kickass'
|
BACKEND = 'kickass'
|
||||||
|
|
||||||
def test_torrent(self):
|
def test_torrent(self):
|
||||||
l = list(self.backend.iter_torrents('debian'))
|
torrents = list(self.backend.iter_torrents('debian'))
|
||||||
if len(l) > 0:
|
for torrent in torrents:
|
||||||
assert l[0].url.endswith('.torrent')
|
path, qs = urllib.splitquery(torrent.url)
|
||||||
self.backend.get_torrent_file(l[0].id)
|
assert path.endswith('.torrent')
|
||||||
|
if qs:
|
||||||
|
assert torrent.filename
|
||||||
|
assert torrent.id
|
||||||
|
assert torrent.name
|
||||||
|
assert torrent.description is NotLoaded
|
||||||
|
full_torrent = self.backend.get_torrent(torrent.id)
|
||||||
|
# do not assert torrent.name is full_torrent.name
|
||||||
|
# (or even that one contains another), it isn't always true!
|
||||||
|
assert full_torrent.name
|
||||||
|
assert full_torrent.url
|
||||||
|
assert full_torrent.description is not NotLoaded
|
||||||
|
|
||||||
|
# get the file of a random torrent
|
||||||
|
# from the list (getting them all would be too long)
|
||||||
|
if len(torrents):
|
||||||
|
torrent = choice(torrents)
|
||||||
|
self.backend.get_torrent_file(torrent.id)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
# Copyright(C) 2010-2011 Romain Bignon
|
# Copyright(C) 2010-2011 Romain Bignon, Laurent Bachelier
|
||||||
#
|
#
|
||||||
# This file is part of weboob.
|
# This file is part of weboob.
|
||||||
#
|
#
|
||||||
|
|
@ -28,7 +28,7 @@ __all__ = ['ICapTorrent', 'Torrent']
|
||||||
class Torrent(CapBaseObject):
|
class Torrent(CapBaseObject):
|
||||||
def __init__(self, id, name, date=NotLoaded, size=NotLoaded, url=NotLoaded,
|
def __init__(self, id, name, date=NotLoaded, size=NotLoaded, url=NotLoaded,
|
||||||
seeders=NotLoaded, leechers=NotLoaded, files=NotLoaded,
|
seeders=NotLoaded, leechers=NotLoaded, files=NotLoaded,
|
||||||
description=NotLoaded):
|
description=NotLoaded, filename=NotLoaded):
|
||||||
CapBaseObject.__init__(self, id)
|
CapBaseObject.__init__(self, id)
|
||||||
self.add_field('name', basestring, name)
|
self.add_field('name', basestring, name)
|
||||||
self.add_field('size', (int,long,float), size)
|
self.add_field('size', (int,long,float), size)
|
||||||
|
|
@ -38,6 +38,7 @@ class Torrent(CapBaseObject):
|
||||||
self.add_field('leechers', int, leechers)
|
self.add_field('leechers', int, leechers)
|
||||||
self.add_field('files', list, files)
|
self.add_field('files', list, files)
|
||||||
self.add_field('description', basestring, description)
|
self.add_field('description', basestring, description)
|
||||||
|
self.add_field('filename', basestring, filename) # suggested name of the .torrent file
|
||||||
|
|
||||||
class ICapTorrent(IBaseCap):
|
class ICapTorrent(IBaseCap):
|
||||||
def iter_torrents(self, pattern):
|
def iter_torrents(self, pattern):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue