diff --git a/modules/bitedick/__init__.py b/modules/bitedick/__init__.py new file mode 100644 index 00000000..b3e12055 --- /dev/null +++ b/modules/bitedick/__init__.py @@ -0,0 +1,3 @@ +from .backend import BTDiggBackend + +__all__ = ['BTDiggBackend'] diff --git a/modules/bitedick/backend.py b/modules/bitedick/backend.py new file mode 100644 index 00000000..c45d088f --- /dev/null +++ b/modules/bitedick/backend.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +from weboob.capabilities.torrent import ICapTorrent +from weboob.tools.backend import BaseBackend + +from .browser import BTDiggBrowser + + +__all__ = ['BTDiggBackend'] + +class BTDiggBackend(BaseBackend, ICapTorrent): + NAME = 'btdigg' + MAINTAINER = u'Matthieu Rakotojaona' + EMAIL = 'matthieu.rakotojaona@gmail.com' + VERSION = '0.i' + DESCRIPTION = 'The BitTorrent DHT search engine.' + LICENSE = 'CC0' + BROWSER = BTDiggBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_torrent(self, id): + return self.browser.get_torrent(id) + + def get_torrent_file(self, id): + return self.browser.get_torrent_file(id) + + def iter_torrents(self, pattern): + return self.browser.iter_torrents(pattern.replace(' ', '+')) + + #def fill_torrent(self, torrent, fields): + # if 'description' in fields or fields == None: + # return self.get_torrent(torrent.id) + + #OBJECTS = { + # Torrent:fill_torrent + #} diff --git a/modules/bitedick/browser.py b/modules/bitedick/browser.py new file mode 100644 index 00000000..c48d6dc6 --- /dev/null +++ b/modules/bitedick/browser.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- + +import urllib + +from weboob.tools.browser import BaseBrowser + +from .pages.index import IndexPage +from .pages.torrents import TorrentsPage, TorrentPage + + +__all__ = ['BTDiggBrowser'] + + +class BTDiggBrowser(BaseBrowser): + DOMAIN = 'btdigg.org' + PROTOCOL = 'https' + ENCODING = 'utf-8' + USER_AGENT = BaseBrowser.USER_AGENTS['wget'] + PAGES = {'https://btdigg.org/': IndexPage, + 'https://btdigg.org/search?.*q=[^?]*': TorrentsPage, + 'https://btdigg.org/search?.*info_hash=[^?]*': TorrentPage, + } + + def home(self): + return self.location('https://btdigg.org') + + def iter_torrents(self, pattern): + self.location('https://btdigg.org/search?q=%s' % urllib.quote_plus(pattern.encode('utf-8'))) + + assert self.is_on_page(TorrentsPage) + return self.page.iter_torrents() + + def get_torrent(self, id): + self.location('https://btdigg.org/search?info_hash=%s' % id) + + assert self.is_on_page(TorrentPage) + return self.page.get_torrent(id) + + def get_torrent_file(self, id): + self.location('https://btdigg.org/search?info_hash=%s' % id) + + assert self.is_on_page(TorrentPage) + return self.page.get_torrent_file(id) diff --git a/modules/bitedick/pages/__init__.py b/modules/bitedick/pages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/bitedick/pages/index.py b/modules/bitedick/pages/index.py new file mode 100644 index 00000000..6f3833f7 --- /dev/null +++ b/modules/bitedick/pages/index.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from weboob.tools.browser import BasePage + +__all__ = ['IndexPage'] + + +class IndexPage(BasePage): + pass diff --git a/modules/bitedick/pages/torrents.py b/modules/bitedick/pages/torrents.py new file mode 100644 index 00000000..0839e1a9 --- /dev/null +++ b/modules/bitedick/pages/torrents.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- + +from datetime import datetime, timedelta +from urlparse import urlparse, parse_qs + +from weboob.tools.browser import BasePage,BrokenPageError +from weboob.capabilities.torrent import Torrent, MagnetOnly +from weboob.capabilities.base import NotAvailable + +__all__ = ['TorrentsPage', 'TorrentPage'] + +def fullsize(n, u): + m = {'B': 1, + 'KB': 1024, + 'MB': 1024 * 1024, + 'GB': 1024 * 1024 * 1024, + 'TB': 1024 * 1024 * 1024 * 1024, + } + return float(n * m[u]) + +class TorrentsPage(BasePage): + + def iter_torrents(self): + try: + table = self.document.getroot().cssselect('table.torrent_name_tbl') + except BrokenPageError: + return + for i in range(0, len(table), 2): + # Title + title = table[i].cssselect('td.torrent_name a')[0] + name = unicode(title.text) + url = unicode(title.attrib['href']) + + # Other elems + elems = table[i+1].cssselect('td') + + magnet = unicode(elems[0].cssselect('a')[0].attrib['href']) + + query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...> + btih = parse_qs(query)['xt'][0] # urn:btih:<...> + ih = btih.split(':')[-1] + + value, unit = elems[2].cssselect('span.attr_val')[0].text.split() + + valueago, valueunit, _ = elems[5].cssselect('span.attr_val')[0].text.split() + delta = timedelta(**{valueunit: float(valueago)}) + date = datetime.now() - delta + + url = unicode('https://btdigg.org/search?info_hash=%s' % ih) + + torrent = Torrent(ih, name) + torrent.url = url + torrent.size = fullsize(float(value), unit) + torrent.magnet = magnet + torrent.seeders = NotAvailable + torrent.leechers = NotAvailable + torrent.description = NotAvailable + torrent.files = NotAvailable + torrent.date = date + yield torrent + +class TorrentPage(BasePage): + def get_torrent(self, id): + trs = self.document.getroot().cssselect('table.torrent_info_tbl tr') + + # magnet + download = trs[2].cssselect('td a')[0] + if download.attrib['href'].startswith('magnet:'): + magnet = unicode(download.attrib['href']) + + query = urlparse(magnet).query # xt=urn:btih:<...>&dn=<...> + btih = parse_qs(query)['xt'][0] # urn:btih:<...> + ih = btih.split(':')[-1] + + name = unicode(trs[3].cssselect('td')[1].text) + + value, unit = trs[5].cssselect('td')[1].text.split() + + valueago, valueunit, _ = trs[6].cssselect('td')[1].text.split() + delta = timedelta(**{valueunit: float(valueago)}) + date = datetime.now() - delta + + + files = [] + for tr in trs[15:]: + files.append(unicode(tr.cssselect('td')[1].text)) + + torrent = Torrent(ih, name) + torrent.url = unicode(self.url) + torrent.size = fullsize(float(value), unit) + torrent.magnet = magnet + torrent.seeders = NotAvailable + torrent.leechers = NotAvailable + torrent.description = NotAvailable + torrent.files = files + torrent.filename = NotAvailable + torrent.date = date + + return torrent + + def get_torrent_file(self, id): + raise MagnetOnly(self.get_torrent(id).magnet) diff --git a/modules/bitedick/test.py b/modules/bitedick/test.py new file mode 100644 index 00000000..40657765 --- /dev/null +++ b/modules/bitedick/test.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- + +from weboob.tools.test import BackendTest +from weboob.capabilities.torrent import MagnetOnly + +from random import choice + +__all__ = ['BTDiggTest'] + +class BTDiggTest(BackendTest): + BACKEND = 'btdigg' + + def test_iter_torrents(self): + # try something popular so we sometimes get a magnet-only torrent + l = list(self.backend.iter_torrents('ubuntu linux')) + self.assertTrue(len(l) == 10) + for torrent in l: + assert torrent.name + assert torrent.url + assert torrent.size + assert torrent.magnet + assert torrent.date + + self.assertEquals(40, len(torrent.id)) + + def test_get_random_torrentfile(self): + torrent = choice(list(self.backend.iter_torrents('ubuntu linux'))) + full_torrent = self.backend.get_torrent(torrent.id) + try: + self.backend.get_torrent_file(torrent.id) + except MagnetOnly as e: + assert e.magnet.startswith("magnet:") + assert e.magnet == full_torrent.magnet + + def test_get_special_torrent(self): + torrent = self.backend.get_torrent("c2e018a16bf28520687e400580be08934d00373a") + assert torrent.name == u'Ubuntu Linux Toolbox - 1000+ Commands for Ubuntu and Debian Power Users~tqw~_darksiderg' + assert len(torrent.files) == 3 + assert torrent.size == float(3376414.72) + assert torrent.url == "https://btdigg.org/search?info_hash=c2e018a16bf28520687e400580be08934d00373a" + dt = torrent.date + assert dt.year == 2011 + assert dt.month == 2