iter_torrent wokay, unit fail

This commit is contained in:
Julien Veyssier 2010-11-26 17:15:13 +01:00
commit 0cc2458513
2 changed files with 31 additions and 26 deletions

View file

@ -26,6 +26,8 @@ __all__ = ['PiratebayBrowser']
class PiratebayBrowser(BaseBrowser):
PROTOCOL = 'https'
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
PAGES = {'https://thepiratebay.org' : IndexPage,
'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage,
#'https://thepiratebay.org/torrent/.*' : TorrentPage
@ -34,7 +36,7 @@ class PiratebayBrowser(BaseBrowser):
def __init__(self, *args, **kwargs):
#self.DOMAIN = domain
#self.PROTOCOL = protocol
self.PAGES = {}
#self.PAGES = {}
#for key, value in PiratebayBrowser.PAGES.iteritems():
# self.PAGES[key % domain] = value
@ -57,7 +59,7 @@ class PiratebayBrowser(BaseBrowser):
def iter_torrents(self, pattern):
#self.location(self.buildurl('/torrents.php', searchstr=pattern))
self.location('https://thepiratebay.org/search/%s/0/7/0' % pattern)
self.location('https://thepiratebay.org/search/%s/0/7/0' % pattern, '')
assert self.is_on_page(TorrentsPage)
return self.page.iter_torrents()

View file

@ -50,30 +50,33 @@ class TorrentsPage(BasePage):
raise Exception('You''re in serious troubles!')
else:
for tr in table.getiterator('tr'):
td = tr.getchildren()[1]
div = td.getchildren()[0]
link = div.find('a').attrib('href')
title = div.find('a').text
idt = link.split('/')[2]
if tr.get('class','') != "header":
td = tr.getchildren()[1]
div = td.getchildren()[0]
link = div.find('a').attrib['href']
title = div.find('a').text
idt = link.split('/')[2]
a = td.getchildren()[1]
url = a.attrib('href')
a = td.getchildren()[1]
url = a.attrib['href']
size = td.find('font').text.split(',')[1]
size = size.split(' ')[2]
u = size[-3:].replace('i','')
size = size[:-3]
seed = tr.getchildren()[2].text
leech = tr.getchildren()[3].text
size = td.find('font').text.split(',')[1]
size = size.split(' ')[2]
u = size[-3:].replace('i','')
print "u:"+u
size = size[:-3]
print 'size:'+size
seed = tr.getchildren()[2].text
leech = tr.getchildren()[3].text
torrent = Torrent(idt,
title,
url=url,
size=size,
seeders=seeders,
leechers=leechers)
yield torrent
torrent = Torrent(idt,
title,
url=url,
size=self.unit(size.replace('.',','),u),
seeders=int(seed),
leechers=int(leech))
yield torrent
def get_torrent(self, id):
table = self.document.getroot().cssselect('div.thin')