starting dev of piratebay backend

2010-11-26 15:52:43 +01:00 · 2010-11-26 15:52:43 +01:00 · a55a241ffb
commit a55a241ffb
parent 8f8ac3d738
7 changed files with 333 additions and 0 deletions
--- a/weboob/backends/piratebay/init.py
+++ b/weboob/backends/piratebay/init.py
@ -0,0 +1,3 @@
+from .backend import PiratebayBackend
+
+__all__ = ['PiratebayBackend']
--- a/weboob/backends/piratebay/backend.py
+++ b/weboob/backends/piratebay/backend.py
@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010  Julien Veyssier
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+from weboob.capabilities.torrent import ICapTorrent
+from weboob.tools.backend import BaseBackend
+from weboob.tools.value import ValuesDict, Value
+
+from .browser import PiratebayBrowser
+
+
+__all__ = ['PiratebayBackend']
+
+
+class PiratebayBackend(BaseBackend, ICapTorrent):
+    NAME = 'piratebay'
+    MAINTAINER = 'Julien Veyssier'
+    EMAIL = 'julien.veyssier@aiur.fr'
+    VERSION = '0.4'
+    DESCRIPTION = 'the pirate bay bittorrent tracker'
+    LICENSE = 'GPLv3'
+    #CONFIG = ValuesDict(Value('domain',   label='Domain (example "ssl.what.cd")'),
+    #                    Value('protocol', label='Protocol to use', choices=('http', 'https')),
+    #                    Value('username', label='Username'),
+    #                    Value('password', label='Password', masked=True))
+    BROWSER = PiratebayBrowser
+
+    def create_default_browser(self):
+        return self.create_browser()
+
+    def get_torrent(self, id):
+        return self.browser.get_torrent(id)
+
+    def get_torrent_file(self, id):
+        torrent = self.browser.get_torrent(id)
+        if not torrent:
+            return None
+
+        return self.browser.openurl(torrent.url.encode('utf-8')).read()
+
+    def iter_torrents(self, pattern):
+        return self.browser.iter_torrents(pattern)
--- a/weboob/backends/piratebay/browser.py
+++ b/weboob/backends/piratebay/browser.py
@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010  Julien Veyssier
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+from weboob.tools.browser import BaseBrowser
+
+from .pages.index import IndexPage
+from .pages.torrents import TorrentsPage
+
+
+__all__ = ['PiratebayBrowser']
+
+
+class PiratebayBrowser(BaseBrowser):
+    PAGES = {'https://thepiratebay.org' : IndexPage,
+             'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage,
+             #'https://thepiratebay.org/torrent/.*' : TorrentPage
+             }
+
+    def __init__(self, *args, **kwargs):
+        #self.DOMAIN = domain
+        #self.PROTOCOL = protocol
+        self.PAGES = {}
+        #for key, value in PiratebayBrowser.PAGES.iteritems():
+        #    self.PAGES[key % domain] = value
+
+        BaseBrowser.__init__(self, *args, **kwargs)
+
+    #def login(self):
+    #    if not self.is_on_page(LoginPage):
+    #        self.home()
+    #    self.page.login(self.username, self.password)
+
+    #def is_logged(self):
+    #    if not self.page or self.is_on_page(LoginPage):
+    #        return False
+    #    if self.is_on_page(IndexPage):
+    #        return self.page.is_logged()
+    #    return True
+
+    def home(self):
+        return self.location('https://thepiratebay.org')
+
+    def iter_torrents(self, pattern):
+        #self.location(self.buildurl('/torrents.php', searchstr=pattern))
+        self.location('https://thepiratebay.org/search/%s/0/7/0' % pattern)
+
+        assert self.is_on_page(TorrentsPage)
+        return self.page.iter_torrents()
+
+    def get_torrent(self, id):
+        self.location('https://thepiratebay.org/torrent/%s/' % id)
+
+        assert self.is_on_page(TorrentPage)
+        return self.page.get_torrent(id)
--- a/weboob/backends/piratebay/pages/init.py
+++ b/weboob/backends/piratebay/pages/init.py
--- a/weboob/backends/piratebay/pages/index.py
+++ b/weboob/backends/piratebay/pages/index.py
@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010  Julien Veyssier
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+from weboob.tools.browser import BasePage
+
+
+__all__ = ['IndexPage']
+
+
+class IndexPage(BasePage):
+    def is_logged(self):
+        return 'id' in self.document.find('body').attrib
+
--- a/weboob/backends/piratebay/pages/torrents.py
+++ b/weboob/backends/piratebay/pages/torrents.py
@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010  Julien Veyssier
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+import re
+from logging import warning, debug
+
+from weboob.tools.misc import html2text
+from weboob.tools.browser import BasePage
+from weboob.capabilities.torrent import Torrent
+from weboob.capabilities.base import NotLoaded
+
+
+__all__ = ['TorrentsPage']
+
+
+class TorrentsPage(BasePage):
+    #TORRENTID_REGEXP = re.compile('torrents\.php\?action=download&id=(\d+)')
+    def unit(self, n, u):
+        m = {'KB': 1024,
+             'MB': 1024*1024,
+             'GB': 1024*1024*1024,
+             'TB': 1024*1024*1024*1024,
+            }
+        return float(n.replace(',', '')) * m.get(u, 1)
+
+    #def format_url(self, url):
+    #    return '%s://%s/%s' % (self.browser.PROTOCOL,
+    #                           self.browser.DOMAIN,
+    #                           url)
+
+    def iter_torrents(self):
+
+        for table in self.document.getiterator('table'):
+            if table.attrib.get('id','') != 'searchResult':
+                raise Exception('You''re in serious troubles!')
+            else:
+                for tr in table.getiterator('tr'):
+                    td = tr.getchildren()[1]
+                    div = td.getchildren()[0]
+                    link = div.find('a').attrib('href')
+                    title = div.find('a').text
+                    idt = link.split('/')[2]
+
+                    a = td.getchildren()[1]
+                    url = a.attrib('href')
+
+                    size = td.find('font').text.split(',')[1]
+                    size = size.split(' ')[2]
+                    u = size[-3:].replace('i','')
+                    size = size[:-3]
+                    
+                    seed = tr.getchildren()[2].text
+                    leech = tr.getchildren()[3].text
+
+                    torrent = Torrent(idt,
+                                      title,
+                                      url=url,
+                                      size=size,
+                                      seeders=seeders,
+                                      leechers=leechers)
+                    yield torrent
+
+    def get_torrent(self, id):
+        table = self.document.getroot().cssselect('div.thin')
+        if not table:
+            warning('No div.thin found')
+            return None
+
+        h2 = table[0].find('h2')
+        title = h2.text or ''
+        if h2.find('a') != None:
+            title += h2.find('a').text + h2.find('a').tail
+
+        torrent = Torrent(id, title)
+        table = self.document.getroot().cssselect('table.torrent_table')
+        if not table:
+            warning('No table found')
+            return None
+
+        for tr in table[0].findall('tr'):
+            if tr.attrib.get('class', '').startswith('group_torrent'):
+                tds = tr.findall('td')
+
+                if not len(tds) == 5:
+                    continue
+
+                url = tds[0].find('span').find('a').attrib['href']
+                id = self.TORRENTID_REGEXP.match(url)
+
+                if not id:
+                    warning('ID not found')
+                    continue
+
+                id = id.group(1)
+
+                if id != torrent.id:
+                    continue
+
+                torrent.url = self.format_url(url)
+                torrent.size = self.unit(*tds[1].text.split())
+                torrent.seeders = int(tds[3].text)
+                torrent.leechers = int(tds[4].text)
+                break
+
+        if not torrent.url:
+            warning('Torrent %d not found in list' % torrent.id)
+            return None
+
+        div = self.document.getroot().cssselect('div.main_column')
+        if not div:
+            warning('WTF')
+            return None
+
+        for box in div[0].cssselect('div.box'):
+            title = None
+            body = None
+
+            title_t = box.cssselect('div.head')
+            if title_t:
+                title = title_t[0].find('strong').text.strip()
+            body_t = box.cssselect('div.body')
+            if body_t:
+                body = html2text(self.browser.parser.tostring(body_t[0])).strip()
+
+            if title and body:
+                if torrent.description is NotLoaded:
+                    torrent.description = u''
+                torrent.description += u'%s\n\n%s\n' % (title, body)
+
+        div = self.document.getroot().cssselect('div#files_%s' % torrent.id)
+        if div:
+            torrent.files = []
+            for tr in div[0].find('table'):
+                if tr.attrib.get('class', None) != 'colhead_dark':
+                    torrent.files.append(tr.find('td').text)
+
+        return torrent
--- a/weboob/backends/piratebay/test.py
+++ b/weboob/backends/piratebay/test.py
@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010  Julien Veyssier
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+from weboob.tools.test import BackendTest
+
+class PiratebayTest(BackendTest):
+    BACKEND = 'piratebay'
+
+    def test_torrent(self):
+        l = list(self.backend.iter_torrents('debian'))
+        if len(l) > 0:
+            self.backend.get_torrent_file(l[0].id)