From a55a241ffbd18b25c418dee5e89d7e8b9c8699db Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Fri, 26 Nov 2010 15:52:43 +0100 Subject: [PATCH] starting dev of piratebay backend --- weboob/backends/piratebay/__init__.py | 3 + weboob/backends/piratebay/backend.py | 55 +++++++ weboob/backends/piratebay/browser.py | 69 +++++++++ weboob/backends/piratebay/pages/__init__.py | 0 weboob/backends/piratebay/pages/index.py | 28 ++++ weboob/backends/piratebay/pages/torrents.py | 152 ++++++++++++++++++++ weboob/backends/piratebay/test.py | 26 ++++ 7 files changed, 333 insertions(+) create mode 100644 weboob/backends/piratebay/__init__.py create mode 100644 weboob/backends/piratebay/backend.py create mode 100644 weboob/backends/piratebay/browser.py create mode 100644 weboob/backends/piratebay/pages/__init__.py create mode 100644 weboob/backends/piratebay/pages/index.py create mode 100644 weboob/backends/piratebay/pages/torrents.py create mode 100644 weboob/backends/piratebay/test.py diff --git a/weboob/backends/piratebay/__init__.py b/weboob/backends/piratebay/__init__.py new file mode 100644 index 00000000..97955e8c --- /dev/null +++ b/weboob/backends/piratebay/__init__.py @@ -0,0 +1,3 @@ +from .backend import PiratebayBackend + +__all__ = ['PiratebayBackend'] diff --git a/weboob/backends/piratebay/backend.py b/weboob/backends/piratebay/backend.py new file mode 100644 index 00000000..f7e6031d --- /dev/null +++ b/weboob/backends/piratebay/backend.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010 Julien Veyssier +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from weboob.capabilities.torrent import ICapTorrent +from weboob.tools.backend import BaseBackend +from weboob.tools.value import ValuesDict, Value + +from .browser import PiratebayBrowser + + +__all__ = ['PiratebayBackend'] + + +class PiratebayBackend(BaseBackend, ICapTorrent): + NAME = 'piratebay' + MAINTAINER = 'Julien Veyssier' + EMAIL = 'julien.veyssier@aiur.fr' + VERSION = '0.4' + DESCRIPTION = 'the pirate bay bittorrent tracker' + LICENSE = 'GPLv3' + #CONFIG = ValuesDict(Value('domain', label='Domain (example "ssl.what.cd")'), + # Value('protocol', label='Protocol to use', choices=('http', 'https')), + # Value('username', label='Username'), + # Value('password', label='Password', masked=True)) + BROWSER = PiratebayBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_torrent(self, id): + return self.browser.get_torrent(id) + + def get_torrent_file(self, id): + torrent = self.browser.get_torrent(id) + if not torrent: + return None + + return self.browser.openurl(torrent.url.encode('utf-8')).read() + + def iter_torrents(self, pattern): + return self.browser.iter_torrents(pattern) diff --git a/weboob/backends/piratebay/browser.py b/weboob/backends/piratebay/browser.py new file mode 100644 index 00000000..52105b3f --- /dev/null +++ b/weboob/backends/piratebay/browser.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010 Julien Veyssier +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +from weboob.tools.browser import BaseBrowser + +from .pages.index import IndexPage +from .pages.torrents import TorrentsPage + + +__all__ = ['PiratebayBrowser'] + + +class PiratebayBrowser(BaseBrowser): + PAGES = {'https://thepiratebay.org' : IndexPage, + 'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage, + #'https://thepiratebay.org/torrent/.*' : TorrentPage + } + + def __init__(self, *args, **kwargs): + #self.DOMAIN = domain + #self.PROTOCOL = protocol + self.PAGES = {} + #for key, value in PiratebayBrowser.PAGES.iteritems(): + # self.PAGES[key % domain] = value + + BaseBrowser.__init__(self, *args, **kwargs) + + #def login(self): + # if not self.is_on_page(LoginPage): + # self.home() + # self.page.login(self.username, self.password) + + #def is_logged(self): + # if not self.page or self.is_on_page(LoginPage): + # return False + # if self.is_on_page(IndexPage): + # return self.page.is_logged() + # return True + + def home(self): + return self.location('https://thepiratebay.org') + + def iter_torrents(self, pattern): + #self.location(self.buildurl('/torrents.php', searchstr=pattern)) + self.location('https://thepiratebay.org/search/%s/0/7/0' % pattern) + + assert self.is_on_page(TorrentsPage) + return self.page.iter_torrents() + + def get_torrent(self, id): + self.location('https://thepiratebay.org/torrent/%s/' % id) + + assert self.is_on_page(TorrentPage) + return self.page.get_torrent(id) diff --git a/weboob/backends/piratebay/pages/__init__.py b/weboob/backends/piratebay/pages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weboob/backends/piratebay/pages/index.py b/weboob/backends/piratebay/pages/index.py new file mode 100644 index 00000000..4396a7e3 --- /dev/null +++ b/weboob/backends/piratebay/pages/index.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010 Julien Veyssier +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +from weboob.tools.browser import BasePage + + +__all__ = ['IndexPage'] + + +class IndexPage(BasePage): + def is_logged(self): + return 'id' in self.document.find('body').attrib + diff --git a/weboob/backends/piratebay/pages/torrents.py b/weboob/backends/piratebay/pages/torrents.py new file mode 100644 index 00000000..f52e227e --- /dev/null +++ b/weboob/backends/piratebay/pages/torrents.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010 Julien Veyssier +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +import re +from logging import warning, debug + +from weboob.tools.misc import html2text +from weboob.tools.browser import BasePage +from weboob.capabilities.torrent import Torrent +from weboob.capabilities.base import NotLoaded + + +__all__ = ['TorrentsPage'] + + +class TorrentsPage(BasePage): + #TORRENTID_REGEXP = re.compile('torrents\.php\?action=download&id=(\d+)') + def unit(self, n, u): + m = {'KB': 1024, + 'MB': 1024*1024, + 'GB': 1024*1024*1024, + 'TB': 1024*1024*1024*1024, + } + return float(n.replace(',', '')) * m.get(u, 1) + + #def format_url(self, url): + # return '%s://%s/%s' % (self.browser.PROTOCOL, + # self.browser.DOMAIN, + # url) + + def iter_torrents(self): + + for table in self.document.getiterator('table'): + if table.attrib.get('id','') != 'searchResult': + raise Exception('You''re in serious troubles!') + else: + for tr in table.getiterator('tr'): + td = tr.getchildren()[1] + div = td.getchildren()[0] + link = div.find('a').attrib('href') + title = div.find('a').text + idt = link.split('/')[2] + + a = td.getchildren()[1] + url = a.attrib('href') + + size = td.find('font').text.split(',')[1] + size = size.split(' ')[2] + u = size[-3:].replace('i','') + size = size[:-3] + + seed = tr.getchildren()[2].text + leech = tr.getchildren()[3].text + + torrent = Torrent(idt, + title, + url=url, + size=size, + seeders=seeders, + leechers=leechers) + yield torrent + + def get_torrent(self, id): + table = self.document.getroot().cssselect('div.thin') + if not table: + warning('No div.thin found') + return None + + h2 = table[0].find('h2') + title = h2.text or '' + if h2.find('a') != None: + title += h2.find('a').text + h2.find('a').tail + + torrent = Torrent(id, title) + table = self.document.getroot().cssselect('table.torrent_table') + if not table: + warning('No table found') + return None + + for tr in table[0].findall('tr'): + if tr.attrib.get('class', '').startswith('group_torrent'): + tds = tr.findall('td') + + if not len(tds) == 5: + continue + + url = tds[0].find('span').find('a').attrib['href'] + id = self.TORRENTID_REGEXP.match(url) + + if not id: + warning('ID not found') + continue + + id = id.group(1) + + if id != torrent.id: + continue + + torrent.url = self.format_url(url) + torrent.size = self.unit(*tds[1].text.split()) + torrent.seeders = int(tds[3].text) + torrent.leechers = int(tds[4].text) + break + + if not torrent.url: + warning('Torrent %d not found in list' % torrent.id) + return None + + div = self.document.getroot().cssselect('div.main_column') + if not div: + warning('WTF') + return None + + for box in div[0].cssselect('div.box'): + title = None + body = None + + title_t = box.cssselect('div.head') + if title_t: + title = title_t[0].find('strong').text.strip() + body_t = box.cssselect('div.body') + if body_t: + body = html2text(self.browser.parser.tostring(body_t[0])).strip() + + if title and body: + if torrent.description is NotLoaded: + torrent.description = u'' + torrent.description += u'%s\n\n%s\n' % (title, body) + + div = self.document.getroot().cssselect('div#files_%s' % torrent.id) + if div: + torrent.files = [] + for tr in div[0].find('table'): + if tr.attrib.get('class', None) != 'colhead_dark': + torrent.files.append(tr.find('td').text) + + return torrent diff --git a/weboob/backends/piratebay/test.py b/weboob/backends/piratebay/test.py new file mode 100644 index 00000000..42e495e9 --- /dev/null +++ b/weboob/backends/piratebay/test.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010 Julien Veyssier +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from weboob.tools.test import BackendTest + +class PiratebayTest(BackendTest): + BACKEND = 'piratebay' + + def test_torrent(self): + l = list(self.backend.iter_torrents('debian')) + if len(l) > 0: + self.backend.get_torrent_file(l[0].id)