fix row identification problem in isohunt
because of their bad website which doesn't produce the same page each time
This commit is contained in:
parent
638dc1f466
commit
fdf09c4ad0
1 changed files with 26 additions and 24 deletions
|
|
@ -28,30 +28,32 @@ class TorrentsPage(BasePage):
|
||||||
def iter_torrents(self):
|
def iter_torrents(self):
|
||||||
for tr in self.document.getiterator('tr'):
|
for tr in self.document.getiterator('tr'):
|
||||||
if tr.attrib.get('class', '') == 'hlRow':
|
if tr.attrib.get('class', '') == 'hlRow':
|
||||||
# TODO à corriger
|
# sometimes the first tr also has the attribute hlRow
|
||||||
atitle = tr.getchildren()[2].getchildren()[1]
|
# i use that to ditinct it from the others
|
||||||
title = atitle.text
|
if tr.attrib.has_key('onmouseout'):
|
||||||
if not title:
|
atitle = tr.getchildren()[2].getchildren()[1]
|
||||||
title = ''
|
title = atitle.text
|
||||||
for bold in atitle.getchildren():
|
if not title:
|
||||||
if bold.text:
|
title = ''
|
||||||
title += bold.text
|
for bold in atitle.getchildren():
|
||||||
if bold.tail:
|
if bold.text:
|
||||||
title += bold.tail
|
title += bold.text
|
||||||
idt = tr.getchildren()[2].getchildren()[0].attrib.get('href','')
|
if bold.tail:
|
||||||
idt = idt.split('/')[2]
|
title += bold.tail
|
||||||
size = tr.getchildren()[3].text
|
idt = tr.getchildren()[2].getchildren()[0].attrib.get('href','')
|
||||||
u = size[-2:]
|
idt = idt.split('/')[2]
|
||||||
size = float(size[:-3])
|
size = tr.getchildren()[3].text
|
||||||
seed = tr.getchildren()[4].text
|
u = size[-2:]
|
||||||
leech = tr.getchildren()[5].text
|
size = float(size[:-3])
|
||||||
url = 'https://isohunt.com/download/%s/mon_joli_torrent.torrent' % idt
|
seed = tr.getchildren()[4].text
|
||||||
yield Torrent(idt,
|
leech = tr.getchildren()[5].text
|
||||||
title,
|
url = 'https://isohunt.com/download/%s/mon_joli_torrent.torrent' % idt
|
||||||
url=url,
|
yield Torrent(idt,
|
||||||
size=get_bytes_size(size, u),
|
title,
|
||||||
seeders=int(seed),
|
url=url,
|
||||||
leechers=int(leech))
|
size=get_bytes_size(size, u),
|
||||||
|
seeders=int(seed),
|
||||||
|
leechers=int(leech))
|
||||||
|
|
||||||
|
|
||||||
class TorrentPage(BasePage):
|
class TorrentPage(BasePage):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue