Fix piratebay
Move to .se, remove https for downloads, use the correct link for downloads. And some cosmetic enhancements
This commit is contained in:
parent
664e4cc094
commit
988542463d
4 changed files with 30 additions and 28 deletions
|
|
@ -30,26 +30,26 @@ __all__ = ['PiratebayBrowser']
|
||||||
|
|
||||||
|
|
||||||
class PiratebayBrowser(BaseBrowser):
|
class PiratebayBrowser(BaseBrowser):
|
||||||
DOMAIN = 'thepiratebay.org'
|
DOMAIN = 'thepiratebay.se'
|
||||||
PROTOCOL = 'https'
|
PROTOCOL = 'https'
|
||||||
ENCODING = 'utf-8'
|
ENCODING = 'utf-8'
|
||||||
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
||||||
PAGES = {'https://thepiratebay.org' : IndexPage,
|
PAGES = {'https://thepiratebay.se': IndexPage,
|
||||||
'https://thepiratebay.org/search/.*/0/7/0' : TorrentsPage,
|
'https://thepiratebay.se/search/.*/0/7/0': TorrentsPage,
|
||||||
'https://thepiratebay.org/torrent/.*' : TorrentPage
|
'https://thepiratebay.se/torrent/.*': TorrentPage
|
||||||
}
|
}
|
||||||
|
|
||||||
def home(self):
|
def home(self):
|
||||||
return self.location('https://thepiratebay.org')
|
return self.location('https://thepiratebay.se')
|
||||||
|
|
||||||
def iter_torrents(self, pattern):
|
def iter_torrents(self, pattern):
|
||||||
self.location('https://thepiratebay.org/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8')))
|
self.location('https://thepiratebay.se/search/%s/0/7/0' % urllib.quote_plus(pattern.encode('utf-8')))
|
||||||
|
|
||||||
assert self.is_on_page(TorrentsPage)
|
assert self.is_on_page(TorrentsPage)
|
||||||
return self.page.iter_torrents()
|
return self.page.iter_torrents()
|
||||||
|
|
||||||
def get_torrent(self, id):
|
def get_torrent(self, id):
|
||||||
self.location('https://thepiratebay.org/torrent/%s/' % id)
|
self.location('https://thepiratebay.se/torrent/%s/' % id)
|
||||||
|
|
||||||
assert self.is_on_page(TorrentPage)
|
assert self.is_on_page(TorrentPage)
|
||||||
return self.page.get_torrent(id)
|
return self.page.get_torrent(id)
|
||||||
|
|
|
||||||
|
|
@ -27,4 +27,3 @@ __all__ = ['IndexPage']
|
||||||
class IndexPage(BasePage):
|
class IndexPage(BasePage):
|
||||||
def is_logged(self):
|
def is_logged(self):
|
||||||
return 'id' in self.document.find('body').attrib
|
return 'id' in self.document.find('body').attrib
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
from weboob.capabilities.torrent import Torrent
|
from weboob.capabilities.torrent import Torrent
|
||||||
|
|
||||||
|
|
@ -34,7 +33,6 @@ class TorrentsPage(BasePage):
|
||||||
'GB': 1024 * 1024 * 1024,
|
'GB': 1024 * 1024 * 1024,
|
||||||
'TB': 1024 * 1024 * 1024 * 1024,
|
'TB': 1024 * 1024 * 1024 * 1024,
|
||||||
}
|
}
|
||||||
#return float(n.replace(',', '')) * m.get(u, 1)
|
|
||||||
return float(n * m[u])
|
return float(n * m[u])
|
||||||
|
|
||||||
def iter_torrents(self):
|
def iter_torrents(self):
|
||||||
|
|
@ -65,13 +63,18 @@ class TorrentsPage(BasePage):
|
||||||
leechers=int(leech))
|
leechers=int(leech))
|
||||||
yield torrent
|
yield torrent
|
||||||
|
|
||||||
|
|
||||||
class TorrentPage(BasePage):
|
class TorrentPage(BasePage):
|
||||||
def get_torrent(self, id):
|
def get_torrent(self, id):
|
||||||
for div in self.document.getiterator('div'):
|
for div in self.document.getiterator('div'):
|
||||||
if div.attrib.get('id', '') == 'title':
|
if div.attrib.get('id', '') == 'title':
|
||||||
title = div.text.strip()
|
title = div.text.strip()
|
||||||
elif div.attrib.get('class', '') == 'download':
|
elif div.attrib.get('class', '') == 'download':
|
||||||
url = div.getchildren()[0].attrib.get('href','')
|
# the last link is now the one with http
|
||||||
|
url = self.parser.select(div, 'a')[-1].attrib.get('href', '')
|
||||||
|
# https fails on the download server, so strip it
|
||||||
|
if url.startswith('https://'):
|
||||||
|
url = url.replace('https://', 'http://', 1)
|
||||||
elif div.attrib.get('id', '') == 'details':
|
elif div.attrib.get('id', '') == 'details':
|
||||||
size = float(div.getchildren()[0].getchildren()[5].text.split('(')[1].split('Bytes')[0])
|
size = float(div.getchildren()[0].getchildren()[5].text.split('(')[1].split('Bytes')[0])
|
||||||
if len(div.getchildren()) > 1 \
|
if len(div.getchildren()) > 1 \
|
||||||
|
|
@ -91,11 +94,11 @@ class TorrentPage(BasePage):
|
||||||
elif div.attrib.get('class', '') == 'nfo':
|
elif div.attrib.get('class', '') == 'nfo':
|
||||||
description = div.getchildren()[0].text
|
description = div.getchildren()[0].text
|
||||||
torrent = Torrent(id, title)
|
torrent = Torrent(id, title)
|
||||||
torrent.url = url
|
torrent.url = url or None
|
||||||
torrent.size = size
|
torrent.size = size
|
||||||
torrent.seeders = int(seed)
|
torrent.seeders = int(seed)
|
||||||
torrent.leechers = int(leech)
|
torrent.leechers = int(leech)
|
||||||
torrent.description = description
|
torrent.description = description.strip()
|
||||||
torrent.files = ['NYI']
|
torrent.files = ['NYI']
|
||||||
|
|
||||||
return torrent
|
return torrent
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue