Add btdigg backend

A resubmit of the previous one, this time with understandable name.
Copied the original submission text.

This is a simple backend for btdigg.org. This site is especially
interesting because it is not an indexer where uploaders add their
torrents; it crawls the DHT and listens to all infohashes being
exchanged by the nodes.

Because of this, btdigg.org provides no description and no torrent
files, only magnets. Moreover, there are no seeders and leechers
(although there is the number of peers in the swarms)

Note that there is no icon.

Signed-off-by: Matthieu Rakotojaona <matthieu.rakotojaona@gmail.com>
Signed-off-by: Romain Bignon <romain@symlink.me>
This commit is contained in:
Matthieu Rakotojaona 2014-01-26 19:10:06 +01:00 committed by Romain Bignon
commit b55d83e6ab
7 changed files with 238 additions and 0 deletions

View file

@ -0,0 +1,3 @@
from .backend import BTDiggBackend
__all__ = ['BTDiggBackend']

View file

@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
from weboob.capabilities.torrent import ICapTorrent
from weboob.tools.backend import BaseBackend
from .browser import BTDiggBrowser
__all__ = ['BTDiggBackend']
class BTDiggBackend(BaseBackend, ICapTorrent):
NAME = 'btdigg'
MAINTAINER = u'Matthieu Rakotojaona'
EMAIL = 'matthieu.rakotojaona@gmail.com'
VERSION = '0.i'
DESCRIPTION = 'The BitTorrent DHT search engine.'
LICENSE = 'CC0'
BROWSER = BTDiggBrowser
def create_default_browser(self):
return self.create_browser()
def get_torrent(self, id):
return self.browser.get_torrent(id)
def get_torrent_file(self, id):
return self.browser.get_torrent_file(id)
def iter_torrents(self, pattern):
return self.browser.iter_torrents(pattern.replace(' ', '+'))
#def fill_torrent(self, torrent, fields):
# if 'description' in fields or fields == None:
# return self.get_torrent(torrent.id)
#OBJECTS = {
# Torrent:fill_torrent
#}

View file

@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
import urllib
from weboob.tools.browser import BaseBrowser
from .pages.index import IndexPage
from .pages.torrents import TorrentsPage, TorrentPage
__all__ = ['BTDiggBrowser']
class BTDiggBrowser(BaseBrowser):
DOMAIN = 'btdigg.org'
PROTOCOL = 'https'
ENCODING = 'utf-8'
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
PAGES = {'https://btdigg.org/': IndexPage,
'https://btdigg.org/search?.*q=[^?]*': TorrentsPage,
'https://btdigg.org/search?.*info_hash=[^?]*': TorrentPage,
}
def home(self):
return self.location('https://btdigg.org')
def iter_torrents(self, pattern):
self.location('https://btdigg.org/search?q=%s' % urllib.quote_plus(pattern.encode('utf-8')))
assert self.is_on_page(TorrentsPage)
return self.page.iter_torrents()
def get_torrent(self, id):
self.location('https://btdigg.org/search?info_hash=%s' % id)
assert self.is_on_page(TorrentPage)
return self.page.get_torrent(id)
def get_torrent_file(self, id):
self.location('https://btdigg.org/search?info_hash=%s' % id)
assert self.is_on_page(TorrentPage)
return self.page.get_torrent_file(id)

View file

View file

@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
from weboob.tools.browser import BasePage
__all__ = ['IndexPage']
class IndexPage(BasePage):
pass

View file

@ -0,0 +1,102 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from urlparse import urlparse, parse_qs
from weboob.tools.browser import BasePage,BrokenPageError
from weboob.capabilities.torrent import Torrent, MagnetOnly
from weboob.capabilities.base import NotAvailable
__all__ = ['TorrentsPage', 'TorrentPage']
def fullsize(n, u):
m = {'B': 1,
'KB': 1024,
'MB': 1024 * 1024,
'GB': 1024 * 1024 * 1024,
'TB': 1024 * 1024 * 1024 * 1024,
}
return float(n * m[u])
class TorrentsPage(BasePage):
def iter_torrents(self):
try:
table = self.document.getroot().cssselect('table.torrent_name_tbl')
except BrokenPageError:
return
for i in range(0, len(table), 2):
# Title
title = table[i].cssselect('td.torrent_name a')[0]
name = unicode(title.text)
url = unicode(title.attrib['href'])
# Other elems
elems = table[i+1].cssselect('td')
magnet = unicode(elems[0].cssselect('a')[0].attrib['href'])
query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
btih = parse_qs(query)['xt'][0] # urn:btih:<...>
ih = btih.split(':')[-1]
value, unit = elems[2].cssselect('span.attr_val')[0].text.split()
valueago, valueunit, _ = elems[5].cssselect('span.attr_val')[0].text.split()
delta = timedelta(**{valueunit: float(valueago)})
date = datetime.now() - delta
url = unicode('https://btdigg.org/search?info_hash=%s' % ih)
torrent = Torrent(ih, name)
torrent.url = url
torrent.size = fullsize(float(value), unit)
torrent.magnet = magnet
torrent.seeders = NotAvailable
torrent.leechers = NotAvailable
torrent.description = NotAvailable
torrent.files = NotAvailable
torrent.date = date
yield torrent
class TorrentPage(BasePage):
def get_torrent(self, id):
trs = self.document.getroot().cssselect('table.torrent_info_tbl tr')
# magnet
download = trs[2].cssselect('td a')[0]
if download.attrib['href'].startswith('magnet:'):
magnet = unicode(download.attrib['href'])
query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
btih = parse_qs(query)['xt'][0] # urn:btih:<...>
ih = btih.split(':')[-1]
name = unicode(trs[3].cssselect('td')[1].text)
value, unit = trs[5].cssselect('td')[1].text.split()
valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split()
delta = timedelta(**{valueunit: float(valueago)})
date = datetime.now() - delta
files = []
for tr in trs[15:]:
files.append(unicode(tr.cssselect('td')[1].text))
torrent = Torrent(ih, name)
torrent.url = unicode(self.url)
torrent.size = fullsize(float(value), unit)
torrent.magnet = magnet
torrent.seeders = NotAvailable
torrent.leechers = NotAvailable
torrent.description = NotAvailable
torrent.files = files
torrent.filename = NotAvailable
torrent.date = date
return torrent
def get_torrent_file(self, id):
raise MagnetOnly(self.get_torrent(id).magnet)

43
modules/bitedick/test.py Normal file
View file

@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
from weboob.tools.test import BackendTest
from weboob.capabilities.torrent import MagnetOnly
from random import choice
__all__ = ['BTDiggTest']
class BTDiggTest(BackendTest):
BACKEND = 'btdigg'
def test_iter_torrents(self):
# try something popular so we sometimes get a magnet-only torrent
l = list(self.backend.iter_torrents('ubuntu linux'))
self.assertTrue(len(l) == 10)
for torrent in l:
assert torrent.name
assert torrent.url
assert torrent.size
assert torrent.magnet
assert torrent.date
self.assertEquals(40, len(torrent.id))
def test_get_random_torrentfile(self):
torrent = choice(list(self.backend.iter_torrents('ubuntu linux')))
full_torrent = self.backend.get_torrent(torrent.id)
try:
self.backend.get_torrent_file(torrent.id)
except MagnetOnly as e:
assert e.magnet.startswith("magnet:")
assert e.magnet == full_torrent.magnet
def test_get_special_torrent(self):
torrent = self.backend.get_torrent("c2e018a16bf28520687e400580be08934d00373a")
assert torrent.name == u'Ubuntu Linux Toolbox - 1000+ Commands for Ubuntu and Debian Power Users~tqw~_darksiderg'
assert len(torrent.files) == 3
assert torrent.size == float(3376414.72)
assert torrent.url == "https://btdigg.org/search?info_hash=c2e018a16bf28520687e400580be08934d00373a"
dt = torrent.date
assert dt.year == 2011
assert dt.month == 2