implement Gazelle.get_torrent() method

2010-05-02 18:55:56 +02:00 · 2010-05-02 18:55:56 +02:00 · 2ee1c761ea
commit 2ee1c761ea
parent 1c85848195
4 changed files with 93 additions and 7 deletions
--- a/weboob/backends/gazelle/browser.py
+++ b/weboob/backends/gazelle/browser.py
@ -66,4 +66,7 @@ class GazelleBrowser(BaseBrowser):
        return self.page.iter_torrents()

    def get_torrent(self, id):
-        pass
+        self.location('/torrents.php?torrentid=%s' % id)
+
+        assert self.is_on_page(TorrentsPage)
+        return self.page.get_torrent(id)
--- a/weboob/backends/gazelle/pages/torrents.py
+++ b/weboob/backends/gazelle/pages/torrents.py
@ -19,7 +19,9 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 """

 import re
+from logging import warning

+from weboob.tools.misc import html2text
 from weboob.tools.browser import BasePage
 from weboob.capabilities.torrent import Torrent

@ -37,11 +39,18 @@ class TorrentsPage(BasePage):
            }
        return float(n.replace(',', '')) * m.get(u, 1)

+    def format_url(self, url):
+        return '%s://%s/%s' % (self.browser.PROTOCOL,
+                               self.browser.DOMAIN,
+                               url)
+
    def iter_torrents(self):
-        table = self.document.getroot().cssselect('table#torrent_table')
+        table = self.document.getroot().cssselect('table.torrent_table')
        if not table:
            table = self.document.getroot().cssselect('table#browse_torrent_table')
-        if table:
+        if not table:
+            warning('No table found')
+        else:
            table = table[0]
            current_group = None
            for tr in table.findall('tr'):
@ -57,7 +66,7 @@ class TorrentsPage(BasePage):
                        if current_group:
                            current_group += ' - '
                        current_group += a.text
-                elif tr.attrib.get('class', '').startswith('group_torrent ') or \
+                elif tr.attrib.get('class', '').startswith('group_torrent') or \
                     tr.attrib.get('class', '').startswith('torrent'):
                    tds = tr.findall('td')

@ -87,8 +96,77 @@ class TorrentsPage(BasePage):

                    torrent = Torrent(id,
                                      title,
-                                      url=url,
+                                      url=self.format_url(url),
                                      size=size,
                                      seeders=seeders,
                                      leechers=leechers)
                    yield torrent
+                else:
+                    print tr.attrib
+
+    def get_torrent(self, id):
+        table = self.document.getroot().cssselect('div.thin')
+        if not table:
+            warning('No div.thin found')
+            return None
+
+        h2 = table[0].find('h2')
+        title = h2.text or ''
+        if h2.find('a') != None:
+            title += h2.find('a').text + h2.find('a').tail
+
+        torrent = Torrent(id, title)
+        table = self.document.getroot().cssselect('table.torrent_table')
+        if not table:
+            warning('No table found')
+            return None
+
+        for tr in table[0].findall('tr'):
+            if tr.attrib.get('class', '').startswith('group_torrent'):
+                tds = tr.findall('td')
+
+                if not len(tds) == 5:
+                    continue
+
+                url = tds[0].find('span').find('a').attrib['href']
+                id = self.TORRENTID_REGEXP.match(url)
+
+                if not id:
+                    warning('ID not found')
+                    continue
+
+                id = id.group(1)
+
+                if id != torrent.id:
+                    continue
+
+                torrent.url = self.format_url(url)
+                torrent.size = self.unit(*tds[1].text.split())
+                torrent.seeders = int(tds[3].text)
+                torrent.leechers = int(tds[4].text)
+                break
+
+        if not torrent.url:
+            warning('Torrent %d not found in list' % torrent.id)
+            return None
+
+        div = self.document.getroot().cssselect('div.main_column')
+        if not div:
+            warning('WTF')
+            return None
+
+        for box in div[0].cssselect('div.box'):
+            title = None
+            body = None
+
+            title_t = box.cssselect('div.head')
+            if title_t:
+                title = title_t[0].find('strong').text
+            body_t = box.cssselect('div.body')
+            if body_t:
+                body = html2text(self.browser.parser.tostring(body_t[0]))
+
+            if title and body:
+                torrent.description += '%s\n\n%s\n' % (title, body)
+
+        return torrent
--- a/weboob/capabilities/torrent.py
+++ b/weboob/capabilities/torrent.py
@ -24,15 +24,16 @@ from .cap import ICap
 __all__ = ['ICapTorrent']

 class Torrent(object):
-    def __init__(self, id, name, date=None, size=0.0, url=u'', seeders=0, leechers=0, files=[]):
+    def __init__(self, id, name, date=None, size=0.0, url=u'', seeders=0, leechers=0, files=[], description=u''):
        self.id = id
        self.name = name
        self.date = date
        self.size = size
-        self.ul = url
+        self.url = url
        self.seeders = seeders
        self.leechers = leechers
        self.files = files
+        self.description = description

 class ICapTorrent(ICap):
    def iter_torrents(self, pattern):
--- a/weboob/frontends/weboorrents/application.py
+++ b/weboob/frontends/weboorrents/application.py
@ -58,6 +58,10 @@ class Weboorrents(ConsoleApplication):
            rows.append(('ID', torrent.id))
            rows.append(('Name', torrent.name))
            rows.append(('Size', torrent.size))
+            rows.append(('URL', torrent.url))
+            rows.append(('Seeders', torrent.seeders))
+            rows.append(('Leechers', torrent.leechers))
+            rows.append(('Description', torrent.description))
            return {backend.name: rows}

    @ConsoleApplication.command('Search torrents')