From 0cc2458513709cfa81faa1c280cee28853836bb9 Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Fri, 26 Nov 2010 17:15:13 +0100 Subject: [PATCH] iter_torrent wokay, unit fail --- weboob/backends/piratebay/browser.py | 6 ++- weboob/backends/piratebay/pages/torrents.py | 45 +++++++++++---------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/weboob/backends/piratebay/browser.py b/weboob/backends/piratebay/browser.py index 52105b3f..839a287f 100644 --- a/weboob/backends/piratebay/browser.py +++ b/weboob/backends/piratebay/browser.py @@ -26,6 +26,8 @@ __all__ = ['PiratebayBrowser'] class PiratebayBrowser(BaseBrowser): + PROTOCOL = 'https' + USER_AGENT = BaseBrowser.USER_AGENTS['wget'] PAGES = {'https://thepiratebay.org' : IndexPage, 'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage, #'https://thepiratebay.org/torrent/.*' : TorrentPage @@ -34,7 +36,7 @@ class PiratebayBrowser(BaseBrowser): def __init__(self, *args, **kwargs): #self.DOMAIN = domain #self.PROTOCOL = protocol - self.PAGES = {} + #self.PAGES = {} #for key, value in PiratebayBrowser.PAGES.iteritems(): # self.PAGES[key % domain] = value @@ -57,7 +59,7 @@ class PiratebayBrowser(BaseBrowser): def iter_torrents(self, pattern): #self.location(self.buildurl('/torrents.php', searchstr=pattern)) - self.location('https://thepiratebay.org/search/%s/0/7/0' % pattern) + self.location('https://thepiratebay.org/search/%s/0/7/0' % pattern, '') assert self.is_on_page(TorrentsPage) return self.page.iter_torrents() diff --git a/weboob/backends/piratebay/pages/torrents.py b/weboob/backends/piratebay/pages/torrents.py index f52e227e..99cffcc0 100644 --- a/weboob/backends/piratebay/pages/torrents.py +++ b/weboob/backends/piratebay/pages/torrents.py @@ -50,30 +50,33 @@ class TorrentsPage(BasePage): raise Exception('You''re in serious troubles!') else: for tr in table.getiterator('tr'): - td = tr.getchildren()[1] - div = td.getchildren()[0] - link = div.find('a').attrib('href') - title = div.find('a').text - idt = link.split('/')[2] + if tr.get('class','') != "header": + td = tr.getchildren()[1] + div = td.getchildren()[0] + link = div.find('a').attrib['href'] + title = div.find('a').text + idt = link.split('/')[2] - a = td.getchildren()[1] - url = a.attrib('href') + a = td.getchildren()[1] + url = a.attrib['href'] - size = td.find('font').text.split(',')[1] - size = size.split(' ')[2] - u = size[-3:].replace('i','') - size = size[:-3] - - seed = tr.getchildren()[2].text - leech = tr.getchildren()[3].text + size = td.find('font').text.split(',')[1] + size = size.split(' ')[2] + u = size[-3:].replace('i','') + print "u:"+u + size = size[:-3] + print 'size:'+size + + seed = tr.getchildren()[2].text + leech = tr.getchildren()[3].text - torrent = Torrent(idt, - title, - url=url, - size=size, - seeders=seeders, - leechers=leechers) - yield torrent + torrent = Torrent(idt, + title, + url=url, + size=self.unit(size.replace('.',','),u), + seeders=int(seed), + leechers=int(leech)) + yield torrent def get_torrent(self, id): table = self.document.getroot().cssselect('div.thin')