btmon speeded up, all ICapTorrent backends clarified and fillobj cleaned

This commit is contained in:
Julien Veyssier 2013-03-15 18:20:54 +01:00
commit b907c8b266
10 changed files with 101 additions and 39 deletions

View file

@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.torrent import ICapTorrent from weboob.capabilities.torrent import ICapTorrent, Torrent
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend
from .browser import BtmonBrowser from .browser import BtmonBrowser
@ -50,3 +50,16 @@ class BtmonBackend(BaseBackend, ICapTorrent):
def iter_torrents(self, pattern): def iter_torrents(self, pattern):
return self.browser.iter_torrents(quote_plus(pattern.encode('utf-8'))) return self.browser.iter_torrents(quote_plus(pattern.encode('utf-8')))
def fill_torrent(self, torrent, fields):
if 'description' in fields:
tor = self.get_torrent(torrent.id)
torrent.description = tor.description
torrent.magnet = tor.magnet
torrent.files = tor.files
torrent.url = tor.url
return torrent
OBJECTS = {
Torrent:fill_torrent
}

View file

@ -21,7 +21,7 @@
import string import string
from weboob.capabilities.torrent import Torrent from weboob.capabilities.torrent import Torrent
from weboob.capabilities.base import NotAvailable from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.tools.misc import get_bytes_size from weboob.tools.misc import get_bytes_size
@ -32,22 +32,50 @@ __all__ = ['TorrentsPage','TorrentPage']
class TorrentsPage(BasePage): class TorrentsPage(BasePage):
def iter_torrents(self): def iter_torrents(self):
for div in self.parser.select(self.document.getroot(),'div.list_tor'): for div in self.parser.select(self.document.getroot(),'div.list_tor'):
name = NotAvailable
size = NotAvailable
seeders = NotAvailable
leechers = NotAvailable
right_div = self.parser.select(div,'div.list_tor_right',1)
try:
seeders = int(self.parser.select(right_div,'b.green',1).text)
except ValueError:
seeders = 0
try:
leechers = int(self.parser.select(right_div,'b.red',1).text)
except ValueError:
leechers = 0
sizep = self.parser.select(right_div,'p')[0]
sizespan = self.parser.select(sizep,'span')[0]
nsize = float(sizespan.text_content().split(':')[1].split()[0])
usize = sizespan.text_content().split()[-1].upper()
size = get_bytes_size(nsize,usize)
a = self.parser.select(div,'a.list_tor_title',1) a = self.parser.select(div,'a.list_tor_title',1)
href = a.attrib.get('href','') href = a.attrib.get('href','')
self.browser.location('http://%s%s'%(self.browser.DOMAIN,href)) name = unicode(a.text_content())
assert self.browser.is_on_page(TorrentPage) id = unicode(href.strip('/').split('.html')[0])
yield self.browser.page.get_torrent() torrent = Torrent(id,name)
torrent.url = NotLoaded
torrent.filename = id
torrent.magnet = NotLoaded
torrent.size = size
torrent.seeders = seeders
torrent.leechers = leechers
torrent.description = NotLoaded
torrent.files = NotLoaded
yield torrent
class TorrentPage(BasePage): class TorrentPage(BasePage):
def get_torrent(self): def get_torrent(self):
seed = 0 seed = 0
leech = 0 leech = 0
description = NotAvailable.__unicode__() description = NotAvailable
url = NotAvailable url = NotAvailable
magnet = NotAvailable magnet = NotAvailable
title = NotAvailable title = NotAvailable
id = self.browser.geturl().split('.html')[0].split('/')[-1] id = unicode(self.browser.geturl().split('.html')[0].split('/')[-1])
div = self.parser.select(self.document.getroot(),'div#middle_content',1) div = self.parser.select(self.document.getroot(),'div#middle_content',1)
title = u'%s'%self.parser.select(self.document.getroot(),'div#middle_content > h1',1).text title = u'%s'%self.parser.select(self.document.getroot(),'div#middle_content > h1',1).text
@ -56,8 +84,8 @@ class TorrentPage(BasePage):
seed = slblock_values[0].text seed = slblock_values[0].text
leech = slblock_values[1].text leech = slblock_values[1].text
href_t = self.parser.select(div,'a.down',1).attrib.get('href','') href_t = self.parser.select(div,'a.down',1).attrib.get('href','')
url = 'http://%s%s'%(self.browser.DOMAIN,href_t) url = u'http://%s%s'%(self.browser.DOMAIN,href_t)
magnet = self.parser.select(div,'a.magnet',1).attrib.get('href','') magnet = unicode(self.parser.select(div,'a.magnet',1).attrib.get('href',''))
divtabs = self.parser.select(div,'div#tabs',1) divtabs = self.parser.select(div,'div#tabs',1)
files_div = self.parser.select(divtabs,'div.body > div.doubleblock > div.leftblock') files_div = self.parser.select(divtabs,'div.body > div.doubleblock > div.leftblock')
@ -74,7 +102,7 @@ class TorrentPage(BasePage):
u = size_text.split(',')[1].strip().translate(None,string.digits).strip('.').strip().upper() u = size_text.split(',')[1].strip().translate(None,string.digits).strip('.').strip().upper()
div_desc = self.parser.select(divtabs,'div#descriptionContent') div_desc = self.parser.select(divtabs,'div#descriptionContent')
if len(div_desc) > 0: if len(div_desc) > 0:
description = div_desc[0].text_content() description = unicode(div_desc[0].text_content())
torrent = Torrent(id, title) torrent = Torrent(id, title)
torrent.url = url torrent.url = url

View file

@ -29,11 +29,10 @@ class BtmonTest(BackendTest):
def test_torrent(self): def test_torrent(self):
torrents = list(self.backend.iter_torrents('spiderman')) torrents = list(self.backend.iter_torrents('spiderman'))
for torrent in torrents: for torrent in torrents:
path, qs = urllib.splitquery(torrent.url)
assert path.endswith('.torrent')
assert torrent.id assert torrent.id
assert torrent.name assert torrent.name
assert torrent.description is NotLoaded assert torrent.description is NotLoaded
assert torrent.files is NotLoaded
# get the file of a random torrent # get the file of a random torrent
# from the list (getting them all would be too long) # from the list (getting them all would be too long)

View file

@ -52,8 +52,13 @@ class IsohuntBackend(BaseBackend, ICapTorrent):
return self.browser.iter_torrents(pattern.replace(' ','+')) return self.browser.iter_torrents(pattern.replace(' ','+'))
def fill_torrent(self, torrent, fields): def fill_torrent(self, torrent, fields):
if 'description' in fields or fields == None or 'files' in fields: if 'description' in fields or 'files' in fields:
return self.get_torrent(torrent.id) tor = self.get_torrent(torrent.id)
torrent.description = tor.description
torrent.magnet = tor.magnet
torrent.files = tor.files
torrent.url = tor.url
return torrent
OBJECTS = { OBJECTS = {
Torrent:fill_torrent Torrent:fill_torrent

View file

@ -19,10 +19,16 @@
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
from random import choice
class IsohuntTest(BackendTest): class IsohuntTest(BackendTest):
BACKEND = 'isohunt' BACKEND = 'isohunt'
def test_torrent(self): def test_torrent(self):
l = list(self.backend.iter_torrents('debian')) l = list(self.backend.iter_torrents('debian'))
if len(l) > 0: if len(l):
self.backend.get_torrent_file(l[0].id) torrent = choice(l)
full_torrent = self.backend.get_torrent(torrent.id)
assert torrent.name
assert full_torrent.name
assert self.backend.get_torrent_file(torrent.id)

View file

@ -63,8 +63,13 @@ class KickassBackend(BaseBackend, ICapTorrent):
return self.browser.iter_torrents(quote_plus(pattern.encode('utf-8'))) return self.browser.iter_torrents(quote_plus(pattern.encode('utf-8')))
def fill_torrent(self, torrent, fields): def fill_torrent(self, torrent, fields):
if 'description' in fields or fields == None: if 'description' in fields or 'files' in fields:
return self.get_torrent(torrent.id) tor = self.get_torrent(torrent.id)
torrent.description = tor.description
torrent.magnet = tor.magnet
torrent.files = tor.files
torrent.url = tor.url
return torrent
OBJECTS = { OBJECTS = {
Torrent:fill_torrent Torrent:fill_torrent

View file

@ -44,7 +44,9 @@ class TorrentsPage(BasePage):
continue continue
title = tr.getchildren()[0].getchildren()[1].getchildren()[1].text title = tr.getchildren()[0].getchildren()[1].getchildren()[1].text
if not title: if not title:
title = '' title = u''
else:
title = unicode(title)
for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren(): for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren():
title += red.text_content() title += red.text_content()
idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \ idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
@ -54,11 +56,11 @@ class TorrentsPage(BasePage):
for a in self.parser.select(tr,'div.iaconbox a'): for a in self.parser.select(tr,'div.iaconbox a'):
href = a.attrib.get('href', '') href = a.attrib.get('href', '')
if href.startswith('magnet'): if href.startswith('magnet'):
magnet = href magnet = unicode(href)
elif href.startswith('http'): elif href.startswith('http'):
url = href url = unicode(href)
elif href.startswith('//'): elif href.startswith('//'):
url = 'http:%s'%href url = u'http:%s'%href
size = tr.getchildren()[1].text size = tr.getchildren()[1].text
u = tr.getchildren()[1].getchildren()[0].text u = tr.getchildren()[1].getchildren()[0].text
@ -72,7 +74,7 @@ class TorrentsPage(BasePage):
torrent.magnet = magnet torrent.magnet = magnet
torrent.description = NotLoaded torrent.description = NotLoaded
torrent.files = NotLoaded torrent.files = NotLoaded
torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0] torrent.filename = unicode(parse_qs(urlsplit(url).query).get('title', [None])[0])
torrent.size = get_bytes_size(size, u) torrent.size = get_bytes_size(size, u)
torrent.seeders = int(seed) torrent.seeders = int(seed)
torrent.leechers = int(leech) torrent.leechers = int(leech)
@ -90,7 +92,7 @@ class TorrentPage(BasePage):
for div in self.document.getiterator('div'): for div in self.document.getiterator('div'):
if div.attrib.get('id', '') == 'desc': if div.attrib.get('id', '') == 'desc':
try: try:
description = div.text_content().strip() description = unicode(div.text_content().strip())
except UnicodeDecodeError: except UnicodeDecodeError:
description = 'Description with invalid UTF-8.' description = 'Description with invalid UTF-8.'
elif div.attrib.get('class', '') == 'seedBlock': elif div.attrib.get('class', '') == 'seedBlock':
@ -106,17 +108,17 @@ class TorrentPage(BasePage):
title = self.parser.select(self.document.getroot(), title = self.parser.select(self.document.getroot(),
'h1.torrentName span', 1) 'h1.torrentName span', 1)
title = title.text title = unicode(title.text)
for a in self.parser.select(self.document.getroot(), for a in self.parser.select(self.document.getroot(),
'div.downloadButtonGroup a'): 'div.downloadButtonGroup a'):
href = a.attrib.get('href', '') href = a.attrib.get('href', '')
if href.startswith('magnet'): if href.startswith('magnet'):
magnet = href magnet = unicode(href)
elif href.startswith('//'): elif href.startswith('//'):
url = 'http:%s'%href url = u'http:%s'%href
elif href.startswith('http'): elif href.startswith('http'):
url = href url = unicode(href)
size = 0 size = 0
u = '' u = ''

View file

@ -54,8 +54,13 @@ class PiratebayBackend(BaseBackend, ICapTorrent):
return self.browser.iter_torrents(pattern.replace(' ', '+')) return self.browser.iter_torrents(pattern.replace(' ', '+'))
def fill_torrent(self, torrent, fields): def fill_torrent(self, torrent, fields):
if 'description' in fields or fields == None: if 'description' in fields or 'files' in fields:
return self.get_torrent(torrent.id) tor = self.get_torrent(torrent.id)
torrent.description = tor.description
torrent.magnet = tor.magnet
torrent.files = tor.files
torrent.url = tor.url
return torrent
OBJECTS = { OBJECTS = {
Torrent:fill_torrent Torrent:fill_torrent

View file

@ -20,7 +20,7 @@
from weboob.tools.browser import BasePage,BrokenPageError from weboob.tools.browser import BasePage,BrokenPageError
from weboob.capabilities.torrent import Torrent from weboob.capabilities.torrent import Torrent
from weboob.capabilities.base import NotAvailable from weboob.capabilities.base import NotAvailable, NotLoaded
__all__ = ['TorrentsPage'] __all__ = ['TorrentsPage']
@ -68,15 +68,16 @@ class TorrentsPage(BasePage):
torrent.size = self.unit(float(size), u) torrent.size = self.unit(float(size), u)
torrent.seeders = int(seed) torrent.seeders = int(seed)
torrent.leechers = int(leech) torrent.leechers = int(leech)
torrent.description = NotAvailable torrent.description = NotLoaded
torrent.files = NotAvailable torrent.files = NotLoaded
torrent.magnet = NotLoaded
yield torrent yield torrent
class TorrentPage(BasePage): class TorrentPage(BasePage):
def get_torrent(self, id): def get_torrent(self, id):
url = None url = NotAvailable
magnet = None magnet = NotAvailable
for div in self.document.getiterator('div'): for div in self.document.getiterator('div'):
if div.attrib.get('id', '') == 'title': if div.attrib.get('id', '') == 'title':
title = unicode(div.text.strip()) title = unicode(div.text.strip())
@ -107,7 +108,7 @@ class TorrentPage(BasePage):
leech = ch.text leech = ch.text
prev_child_txt = ch.text prev_child_txt = ch.text
elif div.attrib.get('class', '') == 'nfo': elif div.attrib.get('class', '') == 'nfo':
description = unicode(div.getchildren()[0].text.strip()) description = unicode(div.getchildren()[0].text_content().strip())
torrent = Torrent(id, title) torrent = Torrent(id, title)
torrent.url = url or NotAvailable torrent.url = url or NotAvailable
torrent.magnet = magnet torrent.magnet = magnet
@ -115,6 +116,6 @@ class TorrentPage(BasePage):
torrent.seeders = int(seed) torrent.seeders = int(seed)
torrent.leechers = int(leech) torrent.leechers = int(leech)
torrent.description = description torrent.description = description
torrent.files = ['NYI'] torrent.files = NotAvailable
return torrent return torrent

View file

@ -47,10 +47,8 @@ class Subtitle(QFrame):
else: else:
self.ui.langLabel.parent().hide() self.ui.langLabel.parent().hide()
if not empty(subtitle.description): if not empty(subtitle.description):
print 'plop'
self.ui.descriptionPlain.setPlainText(u'%s'%subtitle.description) self.ui.descriptionPlain.setPlainText(u'%s'%subtitle.description)
else: else:
print 'ploppppp'
self.ui.descriptionPlain.parent().hide() self.ui.descriptionPlain.parent().hide()
if not empty(subtitle.url): if not empty(subtitle.url):
self.ui.urlEdit.setText(u'%s'%subtitle.url) self.ui.urlEdit.setText(u'%s'%subtitle.url)