Add btdigg backend

A resubmit of the previous one, this time with understandable name. Copied the original submission text. This is a simple backend for btdigg.org. This site is especially interesting because it is not an indexer where uploaders add their torrents; it crawls the DHT and listens to all infohashes being exchanged by the nodes. Because of this, btdigg.org provides no description and no torrent files, only magnets. Moreover, there are no seeders and leechers (although there is the number of peers in the swarms) Note that there is no icon. Signed-off-by: Matthieu Rakotojaona <matthieu.rakotojaona@gmail.com> Signed-off-by: Romain Bignon <romain@symlink.me>
2014-01-26 19:10:06 +01:00 · 2014-01-26 19:10:06 +01:00 · b55d83e6ab
commit b55d83e6ab
parent 7c26f58b39
7 changed files with 238 additions and 0 deletions
--- a/modules/bitedick/init.py
+++ b/modules/bitedick/init.py
@ -0,0 +1,3 @@
+from .backend import BTDiggBackend
+
+__all__ = ['BTDiggBackend']
--- a/modules/bitedick/backend.py
+++ b/modules/bitedick/backend.py
@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+from weboob.capabilities.torrent import ICapTorrent
+from weboob.tools.backend import BaseBackend
+
+from .browser import BTDiggBrowser
+
+
+__all__ = ['BTDiggBackend']
+
+class BTDiggBackend(BaseBackend, ICapTorrent):
+    NAME = 'btdigg'
+    MAINTAINER = u'Matthieu Rakotojaona'
+    EMAIL = 'matthieu.rakotojaona@gmail.com'
+    VERSION = '0.i'
+    DESCRIPTION = 'The BitTorrent DHT search engine.'
+    LICENSE = 'CC0'
+    BROWSER = BTDiggBrowser
+
+    def create_default_browser(self):
+        return self.create_browser()
+
+    def get_torrent(self, id):
+        return self.browser.get_torrent(id)
+
+    def get_torrent_file(self, id):
+        return self.browser.get_torrent_file(id)
+
+    def iter_torrents(self, pattern):
+        return self.browser.iter_torrents(pattern.replace(' ', '+'))
+
+    #def fill_torrent(self, torrent, fields):
+    #    if 'description' in fields or fields == None:
+    #        return self.get_torrent(torrent.id)
+
+    #OBJECTS = {
+    #    Torrent:fill_torrent
+    #}
--- a/modules/bitedick/browser.py
+++ b/modules/bitedick/browser.py
@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+import urllib
+
+from weboob.tools.browser import BaseBrowser
+
+from .pages.index import IndexPage
+from .pages.torrents import TorrentsPage, TorrentPage
+
+
+__all__ = ['BTDiggBrowser']
+
+
+class BTDiggBrowser(BaseBrowser):
+    DOMAIN = 'btdigg.org'
+    PROTOCOL = 'https'
+    ENCODING = 'utf-8'
+    USER_AGENT = BaseBrowser.USER_AGENTS['wget']
+    PAGES = {'https://btdigg.org/': IndexPage,
+             'https://btdigg.org/search?.*q=[^?]*': TorrentsPage,
+             'https://btdigg.org/search?.*info_hash=[^?]*': TorrentPage,
+             }
+
+    def home(self):
+        return self.location('https://btdigg.org')
+
+    def iter_torrents(self, pattern):
+        self.location('https://btdigg.org/search?q=%s' % urllib.quote_plus(pattern.encode('utf-8')))
+
+        assert self.is_on_page(TorrentsPage)
+        return self.page.iter_torrents()
+
+    def get_torrent(self, id):
+        self.location('https://btdigg.org/search?info_hash=%s' % id)
+
+        assert self.is_on_page(TorrentPage)
+        return self.page.get_torrent(id)
+
+    def get_torrent_file(self, id):
+        self.location('https://btdigg.org/search?info_hash=%s' % id)
+
+        assert self.is_on_page(TorrentPage)
+        return self.page.get_torrent_file(id)
--- a/modules/bitedick/pages/init.py
+++ b/modules/bitedick/pages/init.py
--- a/modules/bitedick/pages/index.py
+++ b/modules/bitedick/pages/index.py
@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+
+from weboob.tools.browser import BasePage
+
+__all__ = ['IndexPage']
+
+
+class IndexPage(BasePage):
+    pass
--- a/modules/bitedick/pages/torrents.py
+++ b/modules/bitedick/pages/torrents.py
@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+
+from datetime import datetime, timedelta
+from urlparse import urlparse, parse_qs
+
+from weboob.tools.browser import BasePage,BrokenPageError
+from weboob.capabilities.torrent import Torrent, MagnetOnly
+from weboob.capabilities.base import NotAvailable
+
+__all__ = ['TorrentsPage', 'TorrentPage']
+
+def fullsize(n, u):
+    m = {'B': 1,
+         'KB': 1024,
+         'MB': 1024 * 1024,
+         'GB': 1024 * 1024 * 1024,
+         'TB': 1024 * 1024 * 1024 * 1024,
+        }
+    return float(n * m[u])
+
+class TorrentsPage(BasePage):
+
+    def iter_torrents(self):
+        try:
+            table = self.document.getroot().cssselect('table.torrent_name_tbl')
+        except BrokenPageError:
+            return
+        for i in range(0, len(table), 2):
+            # Title
+            title = table[i].cssselect('td.torrent_name a')[0]
+            name = unicode(title.text)
+            url = unicode(title.attrib['href'])
+
+            # Other elems
+            elems = table[i+1].cssselect('td')
+
+            magnet = unicode(elems[0].cssselect('a')[0].attrib['href'])
+
+            query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
+            btih = parse_qs(query)['xt'][0] # urn:btih:<...>
+            ih = btih.split(':')[-1]
+
+            value, unit = elems[2].cssselect('span.attr_val')[0].text.split()
+
+            valueago, valueunit, _ = elems[5].cssselect('span.attr_val')[0].text.split()
+            delta = timedelta(**{valueunit: float(valueago)})
+            date = datetime.now() - delta
+
+            url = unicode('https://btdigg.org/search?info_hash=%s' % ih)
+
+            torrent = Torrent(ih, name)
+            torrent.url = url
+            torrent.size = fullsize(float(value), unit)
+            torrent.magnet = magnet
+            torrent.seeders = NotAvailable
+            torrent.leechers = NotAvailable
+            torrent.description = NotAvailable
+            torrent.files = NotAvailable
+            torrent.date = date
+            yield torrent
+
+class TorrentPage(BasePage):
+    def get_torrent(self, id):
+        trs = self.document.getroot().cssselect('table.torrent_info_tbl tr')
+
+        # magnet
+        download = trs[2].cssselect('td a')[0]
+        if download.attrib['href'].startswith('magnet:'):
+            magnet = unicode(download.attrib['href'])
+
+            query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
+            btih = parse_qs(query)['xt'][0] # urn:btih:<...>
+            ih = btih.split(':')[-1]
+
+        name = unicode(trs[3].cssselect('td')[1].text)
+
+        value, unit  = trs[5].cssselect('td')[1].text.split()
+
+        valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split()
+        delta = timedelta(**{valueunit: float(valueago)})
+        date = datetime.now() - delta
+
+
+        files = []
+        for tr in trs[15:]:
+            files.append(unicode(tr.cssselect('td')[1].text))
+
+        torrent = Torrent(ih, name)
+        torrent.url = unicode(self.url)
+        torrent.size = fullsize(float(value), unit)
+        torrent.magnet = magnet
+        torrent.seeders = NotAvailable
+        torrent.leechers = NotAvailable
+        torrent.description = NotAvailable
+        torrent.files = files
+        torrent.filename = NotAvailable
+        torrent.date = date
+
+        return torrent
+
+    def get_torrent_file(self, id):
+        raise MagnetOnly(self.get_torrent(id).magnet)
--- a/modules/bitedick/test.py
+++ b/modules/bitedick/test.py
@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+from weboob.tools.test import BackendTest
+from weboob.capabilities.torrent import MagnetOnly
+
+from random import choice
+
+__all__ = ['BTDiggTest']
+
+class BTDiggTest(BackendTest):
+    BACKEND = 'btdigg'
+
+    def test_iter_torrents(self):
+        # try something popular so we sometimes get a magnet-only torrent
+        l = list(self.backend.iter_torrents('ubuntu linux'))
+        self.assertTrue(len(l) == 10)
+        for torrent in l:
+            assert torrent.name
+            assert torrent.url
+            assert torrent.size
+            assert torrent.magnet
+            assert torrent.date
+
+            self.assertEquals(40, len(torrent.id))
+
+    def test_get_random_torrentfile(self):
+        torrent = choice(list(self.backend.iter_torrents('ubuntu linux')))
+        full_torrent = self.backend.get_torrent(torrent.id)
+        try:
+            self.backend.get_torrent_file(torrent.id)
+        except MagnetOnly as e:
+            assert e.magnet.startswith("magnet:")
+            assert e.magnet == full_torrent.magnet
+
+    def test_get_special_torrent(self):
+        torrent = self.backend.get_torrent("c2e018a16bf28520687e400580be08934d00373a")
+        assert torrent.name == u'Ubuntu Linux Toolbox - 1000+ Commands for Ubuntu and Debian Power Users~tqw~_darksiderg'
+        assert len(torrent.files) == 3
+        assert torrent.size == float(3376414.72)
+        assert torrent.url == "https://btdigg.org/search?info_hash=c2e018a16bf28520687e400580be08934d00373a"
+        dt = torrent.date
+        assert dt.year == 2011
+        assert dt.month == 2