Fix piratebay

Move to .se, remove https for downloads, use the correct link for downloads. And some cosmetic enhancements
2012-02-05 16:40:43 +01:00 · 2012-02-05 16:40:43 +01:00 · 988542463d
commit 988542463d
parent 664e4cc094
4 changed files with 30 additions and 28 deletions
--- a/modules/piratebay/browser.py
+++ b/modules/piratebay/browser.py
@ -30,26 +30,26 @@ __all__ = ['PiratebayBrowser']


 class PiratebayBrowser(BaseBrowser):
-    DOMAIN = 'thepiratebay.org'
+    DOMAIN = 'thepiratebay.se'
    PROTOCOL = 'https'
    ENCODING = 'utf-8'
    USER_AGENT = BaseBrowser.USER_AGENTS['wget']
-    PAGES = {'https://thepiratebay.org' : IndexPage,
-             'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage,
-             'https://thepiratebay.org/torrent/.*' : TorrentPage
+    PAGES = {'https://thepiratebay.se': IndexPage,
+             'https://thepiratebay.se/search/.*/0/7/0': TorrentsPage,
+             'https://thepiratebay.se/torrent/.*': TorrentPage
             }

    def home(self):
-        return self.location('https://thepiratebay.org')
+        return self.location('https://thepiratebay.se')

    def iter_torrents(self, pattern):
-        self.location('https://thepiratebay.org/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8')))
+        self.location('https://thepiratebay.se/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8')))

        assert self.is_on_page(TorrentsPage)
        return self.page.iter_torrents()

    def get_torrent(self, id):
-        self.location('https://thepiratebay.org/torrent/%s/' % id)
+        self.location('https://thepiratebay.se/torrent/%s/' % id)

        assert self.is_on_page(TorrentPage)
        return self.page.get_torrent(id)
--- a/modules/piratebay/pages/index.py
+++ b/modules/piratebay/pages/index.py
@ -27,4 +27,3 @@ __all__ = ['IndexPage']
 class IndexPage(BasePage):
    def is_logged(self):
        return 'id' in self.document.find('body').attrib
-
--- a/modules/piratebay/pages/torrents.py
+++ b/modules/piratebay/pages/torrents.py
@ -18,7 +18,6 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.


-
 from weboob.tools.browser import BasePage
 from weboob.capabilities.torrent import Torrent

@ -34,7 +33,6 @@ class TorrentsPage(BasePage):
                'GB': 1024 * 1024 * 1024,
                'TB': 1024 * 1024 * 1024 * 1024,
                }
-        #return float(n.replace(',', '')) * m.get(u, 1)
        return float(n * m[u])

    def iter_torrents(self):
@ -65,13 +63,18 @@ class TorrentsPage(BasePage):
                                  leechers=int(leech))
                yield torrent

+
 class TorrentPage(BasePage):
    def get_torrent(self, id):
        for div in self.document.getiterator('div'):
            if div.attrib.get('id', '') == 'title':
                title = div.text.strip()
            elif div.attrib.get('class', '') == 'download':
-                url = div.getchildren()[0].attrib.get('href','')
+                # the last link is now the one with http
+                url = self.parser.select(div, 'a')[-1].attrib.get('href', '')
+                # https fails on the download server, so strip it
+                if url.startswith('https://'):
+                    url = url.replace('https://', 'http://', 1)
            elif div.attrib.get('id', '') == 'details':
                size = float(div.getchildren()[0].getchildren()[5].text.split('(')[1].split('Bytes')[0])
                if len(div.getchildren()) > 1 \
@ -91,11 +94,11 @@ class TorrentPage(BasePage):
            elif div.attrib.get('class', '') == 'nfo':
                description = div.getchildren()[0].text
        torrent = Torrent(id, title)
-        torrent.url = url
+        torrent.url = url or None
        torrent.size = size
        torrent.seeders = int(seed)
        torrent.leechers = int(leech)
-        torrent.description = description
+        torrent.description = description.strip()
        torrent.files = ['NYI']

        return torrent