starting dev of piratebay backend
This commit is contained in:
parent
8f8ac3d738
commit
a55a241ffb
7 changed files with 333 additions and 0 deletions
3
weboob/backends/piratebay/__init__.py
Normal file
3
weboob/backends/piratebay/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .backend import PiratebayBackend
|
||||
|
||||
__all__ = ['PiratebayBackend']
|
||||
55
weboob/backends/piratebay/backend.py
Normal file
55
weboob/backends/piratebay/backend.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010 Julien Veyssier
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3 of the License.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
from weboob.capabilities.torrent import ICapTorrent
|
||||
from weboob.tools.backend import BaseBackend
|
||||
from weboob.tools.value import ValuesDict, Value
|
||||
|
||||
from .browser import PiratebayBrowser
|
||||
|
||||
|
||||
__all__ = ['PiratebayBackend']
|
||||
|
||||
|
||||
class PiratebayBackend(BaseBackend, ICapTorrent):
|
||||
NAME = 'piratebay'
|
||||
MAINTAINER = 'Julien Veyssier'
|
||||
EMAIL = 'julien.veyssier@aiur.fr'
|
||||
VERSION = '0.4'
|
||||
DESCRIPTION = 'the pirate bay bittorrent tracker'
|
||||
LICENSE = 'GPLv3'
|
||||
#CONFIG = ValuesDict(Value('domain', label='Domain (example "ssl.what.cd")'),
|
||||
# Value('protocol', label='Protocol to use', choices=('http', 'https')),
|
||||
# Value('username', label='Username'),
|
||||
# Value('password', label='Password', masked=True))
|
||||
BROWSER = PiratebayBrowser
|
||||
|
||||
def create_default_browser(self):
|
||||
return self.create_browser()
|
||||
|
||||
def get_torrent(self, id):
|
||||
return self.browser.get_torrent(id)
|
||||
|
||||
def get_torrent_file(self, id):
|
||||
torrent = self.browser.get_torrent(id)
|
||||
if not torrent:
|
||||
return None
|
||||
|
||||
return self.browser.openurl(torrent.url.encode('utf-8')).read()
|
||||
|
||||
def iter_torrents(self, pattern):
|
||||
return self.browser.iter_torrents(pattern)
|
||||
69
weboob/backends/piratebay/browser.py
Normal file
69
weboob/backends/piratebay/browser.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010 Julien Veyssier
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3 of the License.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
|
||||
from weboob.tools.browser import BaseBrowser
|
||||
|
||||
from .pages.index import IndexPage
|
||||
from .pages.torrents import TorrentsPage
|
||||
|
||||
|
||||
__all__ = ['PiratebayBrowser']
|
||||
|
||||
|
||||
class PiratebayBrowser(BaseBrowser):
|
||||
PAGES = {'https://thepiratebay.org' : IndexPage,
|
||||
'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage,
|
||||
#'https://thepiratebay.org/torrent/.*' : TorrentPage
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
#self.DOMAIN = domain
|
||||
#self.PROTOCOL = protocol
|
||||
self.PAGES = {}
|
||||
#for key, value in PiratebayBrowser.PAGES.iteritems():
|
||||
# self.PAGES[key % domain] = value
|
||||
|
||||
BaseBrowser.__init__(self, *args, **kwargs)
|
||||
|
||||
#def login(self):
|
||||
# if not self.is_on_page(LoginPage):
|
||||
# self.home()
|
||||
# self.page.login(self.username, self.password)
|
||||
|
||||
#def is_logged(self):
|
||||
# if not self.page or self.is_on_page(LoginPage):
|
||||
# return False
|
||||
# if self.is_on_page(IndexPage):
|
||||
# return self.page.is_logged()
|
||||
# return True
|
||||
|
||||
def home(self):
|
||||
return self.location('https://thepiratebay.org')
|
||||
|
||||
def iter_torrents(self, pattern):
|
||||
#self.location(self.buildurl('/torrents.php', searchstr=pattern))
|
||||
self.location('https://thepiratebay.org/search/%s/0/7/0' % pattern)
|
||||
|
||||
assert self.is_on_page(TorrentsPage)
|
||||
return self.page.iter_torrents()
|
||||
|
||||
def get_torrent(self, id):
|
||||
self.location('https://thepiratebay.org/torrent/%s/' % id)
|
||||
|
||||
assert self.is_on_page(TorrentPage)
|
||||
return self.page.get_torrent(id)
|
||||
0
weboob/backends/piratebay/pages/__init__.py
Normal file
0
weboob/backends/piratebay/pages/__init__.py
Normal file
28
weboob/backends/piratebay/pages/index.py
Normal file
28
weboob/backends/piratebay/pages/index.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010 Julien Veyssier
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3 of the License.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
|
||||
|
||||
__all__ = ['IndexPage']
|
||||
|
||||
|
||||
class IndexPage(BasePage):
|
||||
def is_logged(self):
|
||||
return 'id' in self.document.find('body').attrib
|
||||
|
||||
152
weboob/backends/piratebay/pages/torrents.py
Normal file
152
weboob/backends/piratebay/pages/torrents.py
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010 Julien Veyssier
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3 of the License.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
|
||||
import re
|
||||
from logging import warning, debug
|
||||
|
||||
from weboob.tools.misc import html2text
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.capabilities.torrent import Torrent
|
||||
from weboob.capabilities.base import NotLoaded
|
||||
|
||||
|
||||
__all__ = ['TorrentsPage']
|
||||
|
||||
|
||||
class TorrentsPage(BasePage):
|
||||
#TORRENTID_REGEXP = re.compile('torrents\.php\?action=download&id=(\d+)')
|
||||
def unit(self, n, u):
|
||||
m = {'KB': 1024,
|
||||
'MB': 1024*1024,
|
||||
'GB': 1024*1024*1024,
|
||||
'TB': 1024*1024*1024*1024,
|
||||
}
|
||||
return float(n.replace(',', '')) * m.get(u, 1)
|
||||
|
||||
#def format_url(self, url):
|
||||
# return '%s://%s/%s' % (self.browser.PROTOCOL,
|
||||
# self.browser.DOMAIN,
|
||||
# url)
|
||||
|
||||
def iter_torrents(self):
|
||||
|
||||
for table in self.document.getiterator('table'):
|
||||
if table.attrib.get('id','') != 'searchResult':
|
||||
raise Exception('You''re in serious troubles!')
|
||||
else:
|
||||
for tr in table.getiterator('tr'):
|
||||
td = tr.getchildren()[1]
|
||||
div = td.getchildren()[0]
|
||||
link = div.find('a').attrib('href')
|
||||
title = div.find('a').text
|
||||
idt = link.split('/')[2]
|
||||
|
||||
a = td.getchildren()[1]
|
||||
url = a.attrib('href')
|
||||
|
||||
size = td.find('font').text.split(',')[1]
|
||||
size = size.split(' ')[2]
|
||||
u = size[-3:].replace('i','')
|
||||
size = size[:-3]
|
||||
|
||||
seed = tr.getchildren()[2].text
|
||||
leech = tr.getchildren()[3].text
|
||||
|
||||
torrent = Torrent(idt,
|
||||
title,
|
||||
url=url,
|
||||
size=size,
|
||||
seeders=seeders,
|
||||
leechers=leechers)
|
||||
yield torrent
|
||||
|
||||
def get_torrent(self, id):
|
||||
table = self.document.getroot().cssselect('div.thin')
|
||||
if not table:
|
||||
warning('No div.thin found')
|
||||
return None
|
||||
|
||||
h2 = table[0].find('h2')
|
||||
title = h2.text or ''
|
||||
if h2.find('a') != None:
|
||||
title += h2.find('a').text + h2.find('a').tail
|
||||
|
||||
torrent = Torrent(id, title)
|
||||
table = self.document.getroot().cssselect('table.torrent_table')
|
||||
if not table:
|
||||
warning('No table found')
|
||||
return None
|
||||
|
||||
for tr in table[0].findall('tr'):
|
||||
if tr.attrib.get('class', '').startswith('group_torrent'):
|
||||
tds = tr.findall('td')
|
||||
|
||||
if not len(tds) == 5:
|
||||
continue
|
||||
|
||||
url = tds[0].find('span').find('a').attrib['href']
|
||||
id = self.TORRENTID_REGEXP.match(url)
|
||||
|
||||
if not id:
|
||||
warning('ID not found')
|
||||
continue
|
||||
|
||||
id = id.group(1)
|
||||
|
||||
if id != torrent.id:
|
||||
continue
|
||||
|
||||
torrent.url = self.format_url(url)
|
||||
torrent.size = self.unit(*tds[1].text.split())
|
||||
torrent.seeders = int(tds[3].text)
|
||||
torrent.leechers = int(tds[4].text)
|
||||
break
|
||||
|
||||
if not torrent.url:
|
||||
warning('Torrent %d not found in list' % torrent.id)
|
||||
return None
|
||||
|
||||
div = self.document.getroot().cssselect('div.main_column')
|
||||
if not div:
|
||||
warning('WTF')
|
||||
return None
|
||||
|
||||
for box in div[0].cssselect('div.box'):
|
||||
title = None
|
||||
body = None
|
||||
|
||||
title_t = box.cssselect('div.head')
|
||||
if title_t:
|
||||
title = title_t[0].find('strong').text.strip()
|
||||
body_t = box.cssselect('div.body')
|
||||
if body_t:
|
||||
body = html2text(self.browser.parser.tostring(body_t[0])).strip()
|
||||
|
||||
if title and body:
|
||||
if torrent.description is NotLoaded:
|
||||
torrent.description = u''
|
||||
torrent.description += u'%s\n\n%s\n' % (title, body)
|
||||
|
||||
div = self.document.getroot().cssselect('div#files_%s' % torrent.id)
|
||||
if div:
|
||||
torrent.files = []
|
||||
for tr in div[0].find('table'):
|
||||
if tr.attrib.get('class', None) != 'colhead_dark':
|
||||
torrent.files.append(tr.find('td').text)
|
||||
|
||||
return torrent
|
||||
26
weboob/backends/piratebay/test.py
Normal file
26
weboob/backends/piratebay/test.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010 Julien Veyssier
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, version 3 of the License.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
from weboob.tools.test import BackendTest
|
||||
|
||||
class PiratebayTest(BackendTest):
|
||||
BACKEND = 'piratebay'
|
||||
|
||||
def test_torrent(self):
|
||||
l = list(self.backend.iter_torrents('debian'))
|
||||
if len(l) > 0:
|
||||
self.backend.get_torrent_file(l[0].id)
|
||||
Loading…
Add table
Add a link
Reference in a new issue