[piratebay] ok without descriptions

2010-11-27 00:22:44 +01:00 · 2010-11-27 00:22:44 +01:00 · 0fef303764
commit 0fef303764
parent 856e55d214
2 changed files with 17 additions and 74 deletions
--- a/weboob/backends/piratebay/browser.py
+++ b/weboob/backends/piratebay/browser.py
@ -66,7 +66,6 @@ class PiratebayBrowser(BaseBrowser):
        return self.page.iter_torrents()

    def get_torrent(self, id):
-        print 'goto:'+id
        self.location('https://thepiratebay.org/torrent/%s/' % id)

        assert self.is_on_page(TorrentPage)
--- a/weboob/backends/piratebay/pages/torrents.py
+++ b/weboob/backends/piratebay/pages/torrents.py
@ -58,9 +58,7 @@ class TorrentsPage(BasePage):
                        size = td.find('font').text.split(',')[1]
                        size = size.split(' ')[2]
                        u = size[-3:].replace('i','')
-                        print "u:"+u
                        size = size[:-3]
-                        print 'size:'+size
                        
                        seed = tr.getchildren()[2].text
                        leech = tr.getchildren()[3].text
@ -75,77 +73,23 @@ class TorrentsPage(BasePage):

 class TorrentPage(BasePage):
    def get_torrent(self, id):
-        table = self.document.getroot().cssselect('div.thin')
-        if not table:
-            warning('No div.thin found')
-            return None
-
-        h2 = table[0].find('h2')
-        title = h2.text or ''
-        if h2.find('a') != None:
-            title += h2.find('a').text + h2.find('a').tail
-
+        for div in self.document.getiterator('div'):
+            if div.attrib.get('id','') == 'title':
+                title = div.text
+            elif div.attrib.get('class','') == 'download':
+                url = div.getchildren()[0].attrib.get('href','')
+            elif div.attrib.get('id','') == 'details':
+                size = float(div.getchildren()[0].getchildren()[5].text.split('(')[1].split('Bytes')[0])
+                seed = div.getchildren()[0].getchildren()[24].text
+                leech = div.getchildren()[0].getchildren()[26].text
+            elif div.attrib.get('class','') == 'nfo':
+                description = div.getchildren()[0].text
        torrent = Torrent(id, title)
-        table = self.document.getroot().cssselect('table.torrent_table')
-        if not table:
-            warning('No table found')
-            return None
-
-        for tr in table[0].findall('tr'):
-            if tr.attrib.get('class', '').startswith('group_torrent'):
-                tds = tr.findall('td')
-
-                if not len(tds) == 5:
-                    continue
-
-                url = tds[0].find('span').find('a').attrib['href']
-                id = self.TORRENTID_REGEXP.match(url)
-
-                if not id:
-                    warning('ID not found')
-                    continue
-
-                id = id.group(1)
-
-                if id != torrent.id:
-                    continue
-
-                torrent.url = self.format_url(url)
-                torrent.size = self.unit(*tds[1].text.split())
-                torrent.seeders = int(tds[3].text)
-                torrent.leechers = int(tds[4].text)
-                break
-
-        if not torrent.url:
-            warning('Torrent %d not found in list' % torrent.id)
-            return None
-
-        div = self.document.getroot().cssselect('div.main_column')
-        if not div:
-            warning('WTF')
-            return None
-
-        for box in div[0].cssselect('div.box'):
-            title = None
-            body = None
-
-            title_t = box.cssselect('div.head')
-            if title_t:
-                title = title_t[0].find('strong').text.strip()
-            body_t = box.cssselect('div.body')
-            if body_t:
-                body = html2text(self.browser.parser.tostring(body_t[0])).strip()
-
-            if title and body:
-                if torrent.description is NotLoaded:
-                    torrent.description = u''
-                torrent.description += u'%s\n\n%s\n' % (title, body)
-
-        div = self.document.getroot().cssselect('div#files_%s' % torrent.id)
-        if div:
-            torrent.files = []
-            for tr in div[0].find('table'):
-                if tr.attrib.get('class', None) != 'colhead_dark':
-                    torrent.files.append(tr.find('td').text)
+        torrent.url = url
+        torrent.size = size
+        torrent.seeders = int(seed)
+        torrent.leechers = int(leech)
+        torrent.description = description
+        torrent.files = ['NYI']

        return torrent