Add btdigg backend

A resubmit of the previous one, this time with understandable name.
Copied the original submission text.

This is a simple backend for btdigg.org. This site is especially
interesting because it is not an indexer where uploaders add their
torrents; it crawls the DHT and listens to all infohashes being
exchanged by the nodes.

Because of this, btdigg.org provides no description and no torrent
files, only magnets. Moreover, there are no seeders and leechers
(although there is the number of peers in the swarms)

Note that there is no icon.

Signed-off-by: Matthieu Rakotojaona <matthieu.rakotojaona@gmail.com>
Signed-off-by: Romain Bignon <romain@symlink.me>
This commit is contained in:
Matthieu Rakotojaona 2014-01-26 19:10:06 +01:00 committed by Romain Bignon
commit b55d83e6ab
7 changed files with 238 additions and 0 deletions

View file

View file

@ -0,0 +1,9 @@
# -*- coding: utf-8 -*-
from weboob.tools.browser import BasePage
__all__ = ['IndexPage']
class IndexPage(BasePage):
pass

View file

@ -0,0 +1,102 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from urlparse import urlparse, parse_qs
from weboob.tools.browser import BasePage,BrokenPageError
from weboob.capabilities.torrent import Torrent, MagnetOnly
from weboob.capabilities.base import NotAvailable
__all__ = ['TorrentsPage', 'TorrentPage']
def fullsize(n, u):
m = {'B': 1,
'KB': 1024,
'MB': 1024 * 1024,
'GB': 1024 * 1024 * 1024,
'TB': 1024 * 1024 * 1024 * 1024,
}
return float(n * m[u])
class TorrentsPage(BasePage):
def iter_torrents(self):
try:
table = self.document.getroot().cssselect('table.torrent_name_tbl')
except BrokenPageError:
return
for i in range(0, len(table), 2):
# Title
title = table[i].cssselect('td.torrent_name a')[0]
name = unicode(title.text)
url = unicode(title.attrib['href'])
# Other elems
elems = table[i+1].cssselect('td')
magnet = unicode(elems[0].cssselect('a')[0].attrib['href'])
query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
btih = parse_qs(query)['xt'][0] # urn:btih:<...>
ih = btih.split(':')[-1]
value, unit = elems[2].cssselect('span.attr_val')[0].text.split()
valueago, valueunit, _ = elems[5].cssselect('span.attr_val')[0].text.split()
delta = timedelta(**{valueunit: float(valueago)})
date = datetime.now() - delta
url = unicode('https://btdigg.org/search?info_hash=%s' % ih)
torrent = Torrent(ih, name)
torrent.url = url
torrent.size = fullsize(float(value), unit)
torrent.magnet = magnet
torrent.seeders = NotAvailable
torrent.leechers = NotAvailable
torrent.description = NotAvailable
torrent.files = NotAvailable
torrent.date = date
yield torrent
class TorrentPage(BasePage):
def get_torrent(self, id):
trs = self.document.getroot().cssselect('table.torrent_info_tbl tr')
# magnet
download = trs[2].cssselect('td a')[0]
if download.attrib['href'].startswith('magnet:'):
magnet = unicode(download.attrib['href'])
query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...>
btih = parse_qs(query)['xt'][0] # urn:btih:<...>
ih = btih.split(':')[-1]
name = unicode(trs[3].cssselect('td')[1].text)
value, unit = trs[5].cssselect('td')[1].text.split()
valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split()
delta = timedelta(**{valueunit: float(valueago)})
date = datetime.now() - delta
files = []
for tr in trs[15:]:
files.append(unicode(tr.cssselect('td')[1].text))
torrent = Torrent(ih, name)
torrent.url = unicode(self.url)
torrent.size = fullsize(float(value), unit)
torrent.magnet = magnet
torrent.seeders = NotAvailable
torrent.leechers = NotAvailable
torrent.description = NotAvailable
torrent.files = files
torrent.filename = NotAvailable
torrent.date = date
return torrent
def get_torrent_file(self, id):
raise MagnetOnly(self.get_torrent(id).magnet)