[piratebay] ok without descriptions
This commit is contained in:
parent
856e55d214
commit
0fef303764
2 changed files with 17 additions and 74 deletions
|
|
@ -66,7 +66,6 @@ class PiratebayBrowser(BaseBrowser):
|
||||||
return self.page.iter_torrents()
|
return self.page.iter_torrents()
|
||||||
|
|
||||||
def get_torrent(self, id):
|
def get_torrent(self, id):
|
||||||
print 'goto:'+id
|
|
||||||
self.location('https://thepiratebay.org/torrent/%s/' % id)
|
self.location('https://thepiratebay.org/torrent/%s/' % id)
|
||||||
|
|
||||||
assert self.is_on_page(TorrentPage)
|
assert self.is_on_page(TorrentPage)
|
||||||
|
|
|
||||||
|
|
@ -58,9 +58,7 @@ class TorrentsPage(BasePage):
|
||||||
size = td.find('font').text.split(',')[1]
|
size = td.find('font').text.split(',')[1]
|
||||||
size = size.split(' ')[2]
|
size = size.split(' ')[2]
|
||||||
u = size[-3:].replace('i','')
|
u = size[-3:].replace('i','')
|
||||||
print "u:"+u
|
|
||||||
size = size[:-3]
|
size = size[:-3]
|
||||||
print 'size:'+size
|
|
||||||
|
|
||||||
seed = tr.getchildren()[2].text
|
seed = tr.getchildren()[2].text
|
||||||
leech = tr.getchildren()[3].text
|
leech = tr.getchildren()[3].text
|
||||||
|
|
@ -75,77 +73,23 @@ class TorrentsPage(BasePage):
|
||||||
|
|
||||||
class TorrentPage(BasePage):
|
class TorrentPage(BasePage):
|
||||||
def get_torrent(self, id):
|
def get_torrent(self, id):
|
||||||
table = self.document.getroot().cssselect('div.thin')
|
for div in self.document.getiterator('div'):
|
||||||
if not table:
|
if div.attrib.get('id','') == 'title':
|
||||||
warning('No div.thin found')
|
title = div.text
|
||||||
return None
|
elif div.attrib.get('class','') == 'download':
|
||||||
|
url = div.getchildren()[0].attrib.get('href','')
|
||||||
h2 = table[0].find('h2')
|
elif div.attrib.get('id','') == 'details':
|
||||||
title = h2.text or ''
|
size = float(div.getchildren()[0].getchildren()[5].text.split('(')[1].split('Bytes')[0])
|
||||||
if h2.find('a') != None:
|
seed = div.getchildren()[0].getchildren()[24].text
|
||||||
title += h2.find('a').text + h2.find('a').tail
|
leech = div.getchildren()[0].getchildren()[26].text
|
||||||
|
elif div.attrib.get('class','') == 'nfo':
|
||||||
|
description = div.getchildren()[0].text
|
||||||
torrent = Torrent(id, title)
|
torrent = Torrent(id, title)
|
||||||
table = self.document.getroot().cssselect('table.torrent_table')
|
torrent.url = url
|
||||||
if not table:
|
torrent.size = size
|
||||||
warning('No table found')
|
torrent.seeders = int(seed)
|
||||||
return None
|
torrent.leechers = int(leech)
|
||||||
|
torrent.description = description
|
||||||
for tr in table[0].findall('tr'):
|
torrent.files = ['NYI']
|
||||||
if tr.attrib.get('class', '').startswith('group_torrent'):
|
|
||||||
tds = tr.findall('td')
|
|
||||||
|
|
||||||
if not len(tds) == 5:
|
|
||||||
continue
|
|
||||||
|
|
||||||
url = tds[0].find('span').find('a').attrib['href']
|
|
||||||
id = self.TORRENTID_REGEXP.match(url)
|
|
||||||
|
|
||||||
if not id:
|
|
||||||
warning('ID not found')
|
|
||||||
continue
|
|
||||||
|
|
||||||
id = id.group(1)
|
|
||||||
|
|
||||||
if id != torrent.id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
torrent.url = self.format_url(url)
|
|
||||||
torrent.size = self.unit(*tds[1].text.split())
|
|
||||||
torrent.seeders = int(tds[3].text)
|
|
||||||
torrent.leechers = int(tds[4].text)
|
|
||||||
break
|
|
||||||
|
|
||||||
if not torrent.url:
|
|
||||||
warning('Torrent %d not found in list' % torrent.id)
|
|
||||||
return None
|
|
||||||
|
|
||||||
div = self.document.getroot().cssselect('div.main_column')
|
|
||||||
if not div:
|
|
||||||
warning('WTF')
|
|
||||||
return None
|
|
||||||
|
|
||||||
for box in div[0].cssselect('div.box'):
|
|
||||||
title = None
|
|
||||||
body = None
|
|
||||||
|
|
||||||
title_t = box.cssselect('div.head')
|
|
||||||
if title_t:
|
|
||||||
title = title_t[0].find('strong').text.strip()
|
|
||||||
body_t = box.cssselect('div.body')
|
|
||||||
if body_t:
|
|
||||||
body = html2text(self.browser.parser.tostring(body_t[0])).strip()
|
|
||||||
|
|
||||||
if title and body:
|
|
||||||
if torrent.description is NotLoaded:
|
|
||||||
torrent.description = u''
|
|
||||||
torrent.description += u'%s\n\n%s\n' % (title, body)
|
|
||||||
|
|
||||||
div = self.document.getroot().cssselect('div#files_%s' % torrent.id)
|
|
||||||
if div:
|
|
||||||
torrent.files = []
|
|
||||||
for tr in div[0].find('table'):
|
|
||||||
if tr.attrib.get('class', None) != 'colhead_dark':
|
|
||||||
torrent.files.append(tr.find('td').text)
|
|
||||||
|
|
||||||
return torrent
|
return torrent
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue