Fix piratebay
Move to .se, remove https for downloads, use the correct link for downloads. And some cosmetic enhancements
This commit is contained in:
parent
664e4cc094
commit
988542463d
4 changed files with 30 additions and 28 deletions
|
|
@ -30,26 +30,26 @@ __all__ = ['PiratebayBrowser']
|
|||
|
||||
|
||||
class PiratebayBrowser(BaseBrowser):
|
||||
DOMAIN = 'thepiratebay.org'
|
||||
DOMAIN = 'thepiratebay.se'
|
||||
PROTOCOL = 'https'
|
||||
ENCODING = 'utf-8'
|
||||
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
||||
PAGES = {'https://thepiratebay.org' : IndexPage,
|
||||
'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage,
|
||||
'https://thepiratebay.org/torrent/.*' : TorrentPage
|
||||
PAGES = {'https://thepiratebay.se': IndexPage,
|
||||
'https://thepiratebay.se/search/.*/0/7/0': TorrentsPage,
|
||||
'https://thepiratebay.se/torrent/.*': TorrentPage
|
||||
}
|
||||
|
||||
def home(self):
|
||||
return self.location('https://thepiratebay.org')
|
||||
return self.location('https://thepiratebay.se')
|
||||
|
||||
def iter_torrents(self, pattern):
|
||||
self.location('https://thepiratebay.org/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8')))
|
||||
self.location('https://thepiratebay.se/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8')))
|
||||
|
||||
assert self.is_on_page(TorrentsPage)
|
||||
return self.page.iter_torrents()
|
||||
|
||||
def get_torrent(self, id):
|
||||
self.location('https://thepiratebay.org/torrent/%s/' % id)
|
||||
self.location('https://thepiratebay.se/torrent/%s/' % id)
|
||||
|
||||
assert self.is_on_page(TorrentPage)
|
||||
return self.page.get_torrent(id)
|
||||
|
|
|
|||
|
|
@ -27,4 +27,3 @@ __all__ = ['IndexPage']
|
|||
class IndexPage(BasePage):
|
||||
def is_logged(self):
|
||||
return 'id' in self.document.find('body').attrib
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.capabilities.torrent import Torrent
|
||||
|
||||
|
|
@ -34,7 +33,6 @@ class TorrentsPage(BasePage):
|
|||
'GB': 1024 * 1024 * 1024,
|
||||
'TB': 1024 * 1024 * 1024 * 1024,
|
||||
}
|
||||
#return float(n.replace(',', '')) * m.get(u, 1)
|
||||
return float(n * m[u])
|
||||
|
||||
def iter_torrents(self):
|
||||
|
|
@ -65,13 +63,18 @@ class TorrentsPage(BasePage):
|
|||
leechers=int(leech))
|
||||
yield torrent
|
||||
|
||||
|
||||
class TorrentPage(BasePage):
|
||||
def get_torrent(self, id):
|
||||
for div in self.document.getiterator('div'):
|
||||
if div.attrib.get('id', '') == 'title':
|
||||
title = div.text.strip()
|
||||
elif div.attrib.get('class', '') == 'download':
|
||||
url = div.getchildren()[0].attrib.get('href','')
|
||||
# the last link is now the one with http
|
||||
url = self.parser.select(div, 'a')[-1].attrib.get('href', '')
|
||||
# https fails on the download server, so strip it
|
||||
if url.startswith('https://'):
|
||||
url = url.replace('https://', 'http://', 1)
|
||||
elif div.attrib.get('id', '') == 'details':
|
||||
size = float(div.getchildren()[0].getchildren()[5].text.split('(')[1].split('Bytes')[0])
|
||||
if len(div.getchildren()) > 1 \
|
||||
|
|
@ -91,11 +94,11 @@ class TorrentPage(BasePage):
|
|||
elif div.attrib.get('class', '') == 'nfo':
|
||||
description = div.getchildren()[0].text
|
||||
torrent = Torrent(id, title)
|
||||
torrent.url = url
|
||||
torrent.url = url or None
|
||||
torrent.size = size
|
||||
torrent.seeders = int(seed)
|
||||
torrent.leechers = int(leech)
|
||||
torrent.description = description
|
||||
torrent.description = description.strip()
|
||||
torrent.files = ['NYI']
|
||||
|
||||
return torrent
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue