From 8be0f5e3dd39871bc565782ccc88f03a26fde1d1 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Tue, 21 Jun 2011 12:08:52 +0200 Subject: [PATCH] fix parsing of search results --- weboob/backends/piratebay/pages/torrents.py | 48 ++++++++++----------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/weboob/backends/piratebay/pages/torrents.py b/weboob/backends/piratebay/pages/torrents.py index 651c6c0d..60bdc26b 100644 --- a/weboob/backends/piratebay/pages/torrents.py +++ b/weboob/backends/piratebay/pages/torrents.py @@ -38,36 +38,32 @@ class TorrentsPage(BasePage): return float(n*m[u]) def iter_torrents(self): + table = self.parser.select(self.document.getroot(), 'table#searchResult', 1) + for tr in table.getiterator('tr'): + if tr.get('class','') != "header": + td = tr.getchildren()[1] + div = td.getchildren()[0] + link = div.find('a').attrib['href'] + title = div.find('a').text + idt = link.split('/')[2] - for table in self.document.getiterator('table'): - if table.attrib.get('id','') != 'searchResult': - raise Exception('You''re in serious troubles!') - else: - for tr in table.getiterator('tr'): - if tr.get('class','') != "header": - td = tr.getchildren()[1] - div = td.getchildren()[0] - link = div.find('a').attrib['href'] - title = div.find('a').text - idt = link.split('/')[2] + a = td.getchildren()[1] + url = a.attrib['href'] - a = td.getchildren()[1] - url = a.attrib['href'] + size = td.find('font').text.split(',')[1].strip() + u = size.split(' ')[1].split(u'\xa0')[1].replace('i','') + size = size.split(' ')[1].split(u'\xa0')[0] - size = td.find('font').text.split(',')[1].strip() - u = size.split(' ')[1].split(u'\xa0')[1].replace('i','') - size = size.split(' ')[1].split(u'\xa0')[0] + seed = tr.getchildren()[2].text + leech = tr.getchildren()[3].text - seed = tr.getchildren()[2].text - leech = tr.getchildren()[3].text - - torrent = Torrent(idt, - title, - url=url, - size=self.unit(float(size),u), - seeders=int(seed), - leechers=int(leech)) - yield torrent + torrent = Torrent(idt, + title, + url=url, + size=self.unit(float(size),u), + seeders=int(seed), + leechers=int(leech)) + yield torrent class TorrentPage(BasePage): def get_torrent(self, id):