From 0fef303764a84388847f0df86eaaf007df869af1 Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Sat, 27 Nov 2010 00:22:44 +0100 Subject: [PATCH] [piratebay] ok without descriptions --- weboob/backends/piratebay/browser.py | 1 - weboob/backends/piratebay/pages/torrents.py | 90 ++++----------------- 2 files changed, 17 insertions(+), 74 deletions(-) diff --git a/weboob/backends/piratebay/browser.py b/weboob/backends/piratebay/browser.py index ddd408f5..3182b12f 100644 --- a/weboob/backends/piratebay/browser.py +++ b/weboob/backends/piratebay/browser.py @@ -66,7 +66,6 @@ class PiratebayBrowser(BaseBrowser): return self.page.iter_torrents() def get_torrent(self, id): - print 'goto:'+id self.location('https://thepiratebay.org/torrent/%s/' % id) assert self.is_on_page(TorrentPage) diff --git a/weboob/backends/piratebay/pages/torrents.py b/weboob/backends/piratebay/pages/torrents.py index f95e546d..d75168cb 100644 --- a/weboob/backends/piratebay/pages/torrents.py +++ b/weboob/backends/piratebay/pages/torrents.py @@ -58,9 +58,7 @@ class TorrentsPage(BasePage): size = td.find('font').text.split(',')[1] size = size.split(' ')[2] u = size[-3:].replace('i','') - print "u:"+u size = size[:-3] - print 'size:'+size seed = tr.getchildren()[2].text leech = tr.getchildren()[3].text @@ -75,77 +73,23 @@ class TorrentsPage(BasePage): class TorrentPage(BasePage): def get_torrent(self, id): - table = self.document.getroot().cssselect('div.thin') - if not table: - warning('No div.thin found') - return None - - h2 = table[0].find('h2') - title = h2.text or '' - if h2.find('a') != None: - title += h2.find('a').text + h2.find('a').tail - + for div in self.document.getiterator('div'): + if div.attrib.get('id','') == 'title': + title = div.text + elif div.attrib.get('class','') == 'download': + url = div.getchildren()[0].attrib.get('href','') + elif div.attrib.get('id','') == 'details': + size = float(div.getchildren()[0].getchildren()[5].text.split('(')[1].split('Bytes')[0]) + seed = div.getchildren()[0].getchildren()[24].text + leech = div.getchildren()[0].getchildren()[26].text + elif div.attrib.get('class','') == 'nfo': + description = div.getchildren()[0].text torrent = Torrent(id, title) - table = self.document.getroot().cssselect('table.torrent_table') - if not table: - warning('No table found') - return None - - for tr in table[0].findall('tr'): - if tr.attrib.get('class', '').startswith('group_torrent'): - tds = tr.findall('td') - - if not len(tds) == 5: - continue - - url = tds[0].find('span').find('a').attrib['href'] - id = self.TORRENTID_REGEXP.match(url) - - if not id: - warning('ID not found') - continue - - id = id.group(1) - - if id != torrent.id: - continue - - torrent.url = self.format_url(url) - torrent.size = self.unit(*tds[1].text.split()) - torrent.seeders = int(tds[3].text) - torrent.leechers = int(tds[4].text) - break - - if not torrent.url: - warning('Torrent %d not found in list' % torrent.id) - return None - - div = self.document.getroot().cssselect('div.main_column') - if not div: - warning('WTF') - return None - - for box in div[0].cssselect('div.box'): - title = None - body = None - - title_t = box.cssselect('div.head') - if title_t: - title = title_t[0].find('strong').text.strip() - body_t = box.cssselect('div.body') - if body_t: - body = html2text(self.browser.parser.tostring(body_t[0])).strip() - - if title and body: - if torrent.description is NotLoaded: - torrent.description = u'' - torrent.description += u'%s\n\n%s\n' % (title, body) - - div = self.document.getroot().cssselect('div#files_%s' % torrent.id) - if div: - torrent.files = [] - for tr in div[0].find('table'): - if tr.attrib.get('class', None) != 'colhead_dark': - torrent.files.append(tr.find('td').text) + torrent.url = url + torrent.size = size + torrent.seeders = int(seed) + torrent.leechers = int(leech) + torrent.description = description + torrent.files = ['NYI'] return torrent