fix row identification problem in isohunt

because of their bad website which doesn't produce the same page each time
This commit is contained in:
Julien Veyssier 2010-12-14 13:51:34 +01:00
commit fdf09c4ad0

View file

@ -28,30 +28,32 @@ class TorrentsPage(BasePage):
def iter_torrents(self): def iter_torrents(self):
for tr in self.document.getiterator('tr'): for tr in self.document.getiterator('tr'):
if tr.attrib.get('class', '') == 'hlRow': if tr.attrib.get('class', '') == 'hlRow':
# TODO à corriger # sometimes the first tr also has the attribute hlRow
atitle = tr.getchildren()[2].getchildren()[1] # i use that to ditinct it from the others
title = atitle.text if tr.attrib.has_key('onmouseout'):
if not title: atitle = tr.getchildren()[2].getchildren()[1]
title = '' title = atitle.text
for bold in atitle.getchildren(): if not title:
if bold.text: title = ''
title += bold.text for bold in atitle.getchildren():
if bold.tail: if bold.text:
title += bold.tail title += bold.text
idt = tr.getchildren()[2].getchildren()[0].attrib.get('href','') if bold.tail:
idt = idt.split('/')[2] title += bold.tail
size = tr.getchildren()[3].text idt = tr.getchildren()[2].getchildren()[0].attrib.get('href','')
u = size[-2:] idt = idt.split('/')[2]
size = float(size[:-3]) size = tr.getchildren()[3].text
seed = tr.getchildren()[4].text u = size[-2:]
leech = tr.getchildren()[5].text size = float(size[:-3])
url = 'https://isohunt.com/download/%s/mon_joli_torrent.torrent' % idt seed = tr.getchildren()[4].text
yield Torrent(idt, leech = tr.getchildren()[5].text
title, url = 'https://isohunt.com/download/%s/mon_joli_torrent.torrent' % idt
url=url, yield Torrent(idt,
size=get_bytes_size(size, u), title,
seeders=int(seed), url=url,
leechers=int(leech)) size=get_bytes_size(size, u),
seeders=int(seed),
leechers=int(leech))
class TorrentPage(BasePage): class TorrentPage(BasePage):