support repositories to manage backends (closes #747)
This commit is contained in:
parent
ef16a5b726
commit
14a7a1d362
410 changed files with 1079 additions and 297 deletions
3
modules/gazelle/__init__.py
Normal file
3
modules/gazelle/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .backend import GazelleBackend
|
||||
|
||||
__all__ = ['GazelleBackend']
|
||||
58
modules/gazelle/backend.py
Normal file
58
modules/gazelle/backend.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from weboob.capabilities.torrent import ICapTorrent
|
||||
from weboob.tools.backend import BaseBackend, BackendConfig
|
||||
from weboob.tools.value import ValueBackendPassword, Value
|
||||
|
||||
from .browser import GazelleBrowser
|
||||
|
||||
|
||||
__all__ = ['GazelleBackend']
|
||||
|
||||
|
||||
class GazelleBackend(BaseBackend, ICapTorrent):
|
||||
NAME = 'gazelle'
|
||||
MAINTAINER = 'Romain Bignon'
|
||||
EMAIL = 'romain@weboob.org'
|
||||
VERSION = '0.a'
|
||||
DESCRIPTION = 'gazelle bittorrent tracker'
|
||||
LICENSE = 'AGPLv3+'
|
||||
CONFIG = BackendConfig(Value('domain', label='Domain (example "ssl.what.cd")'),
|
||||
Value('protocol', label='Protocol to use', choices=('http', 'https')),
|
||||
Value('username', label='Username'),
|
||||
ValueBackendPassword('password', label='Password'))
|
||||
BROWSER = GazelleBrowser
|
||||
|
||||
def create_default_browser(self):
|
||||
return self.create_browser(self.config['protocol'].get(), self.config['domain'].get(),
|
||||
self.config['username'].get(), self.config['password'].get())
|
||||
|
||||
def get_torrent(self, id):
|
||||
return self.browser.get_torrent(id)
|
||||
|
||||
def get_torrent_file(self, id):
|
||||
torrent = self.browser.get_torrent(id)
|
||||
if not torrent:
|
||||
return None
|
||||
|
||||
return self.browser.openurl(torrent.url.encode('utf-8')).read()
|
||||
|
||||
def iter_torrents(self, pattern):
|
||||
return self.browser.iter_torrents(pattern)
|
||||
69
modules/gazelle/browser.py
Normal file
69
modules/gazelle/browser.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BaseBrowser
|
||||
|
||||
from .pages.index import IndexPage, LoginPage
|
||||
from .pages.torrents import TorrentsPage
|
||||
|
||||
|
||||
__all__ = ['GazelleBrowser']
|
||||
|
||||
|
||||
class GazelleBrowser(BaseBrowser):
|
||||
PAGES = {'https?://[^/]+/?(index.php)?': IndexPage,
|
||||
'https?://[^/]+/login.php': LoginPage,
|
||||
'https?://[^/]+/torrents.php.*': TorrentsPage,
|
||||
}
|
||||
|
||||
def __init__(self, protocol, domain, *args, **kwargs):
|
||||
self.DOMAIN = domain
|
||||
self.PROTOCOL = protocol
|
||||
BaseBrowser.__init__(self, *args, **kwargs)
|
||||
|
||||
def login(self):
|
||||
if not self.is_on_page(LoginPage):
|
||||
self.location('/login.php', no_login=True)
|
||||
self.page.login(self.username, self.password)
|
||||
|
||||
def is_logged(self):
|
||||
if not self.page or self.is_on_page(LoginPage):
|
||||
return False
|
||||
if self.is_on_page(IndexPage):
|
||||
return self.page.is_logged()
|
||||
return True
|
||||
|
||||
def home(self):
|
||||
return self.location('%s://%s/' % (self.PROTOCOL, self.DOMAIN))
|
||||
|
||||
def iter_torrents(self, pattern):
|
||||
self.location(self.buildurl('/torrents.php', searchstr=pattern.encode('utf-8')))
|
||||
|
||||
assert self.is_on_page(TorrentsPage)
|
||||
return self.page.iter_torrents()
|
||||
|
||||
def get_torrent(self, fullid):
|
||||
if not '.' in fullid:
|
||||
return None
|
||||
id, torrentid = fullid.split('.', 1)
|
||||
self.location(self.buildurl('/torrents.php', id=id, torrentid=torrentid))
|
||||
|
||||
assert self.is_on_page(TorrentsPage)
|
||||
return self.page.get_torrent(fullid)
|
||||
BIN
modules/gazelle/favicon.png
Normal file
BIN
modules/gazelle/favicon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.3 KiB |
0
modules/gazelle/pages/__init__.py
Normal file
0
modules/gazelle/pages/__init__.py
Normal file
34
modules/gazelle/pages/base.py
Normal file
34
modules/gazelle/pages/base.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BrowserUnavailable, BasePage as _BasePage
|
||||
|
||||
|
||||
__all__ = ['BasePage']
|
||||
|
||||
|
||||
class BasePage(_BasePage):
|
||||
def on_loaded(self):
|
||||
errors = []
|
||||
for div in self.parser.select(self.document.getroot(), 'div.poetry'):
|
||||
errors.append(self.parser.tocleanstring(div))
|
||||
|
||||
if len(errors) > 0:
|
||||
raise BrowserUnavailable(', '.join(errors))
|
||||
49
modules/gazelle/pages/index.py
Normal file
49
modules/gazelle/pages/index.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BrowserIncorrectPassword, BrowserBanned
|
||||
from .base import BasePage
|
||||
|
||||
|
||||
__all__ = ['IndexPage', 'LoginPage']
|
||||
|
||||
|
||||
class IndexPage(BasePage):
|
||||
def is_logged(self):
|
||||
return 'id' in self.document.find('body').attrib
|
||||
|
||||
|
||||
class LoginPage(BasePage):
|
||||
def on_loaded(self):
|
||||
BasePage.on_loaded(self)
|
||||
|
||||
warns = self.parser.select(self.document.getroot(), 'span.warning')
|
||||
for warn in warns:
|
||||
text = self.parser.tocleanstring(warn)
|
||||
if text.startswith('Your username'):
|
||||
raise BrowserIncorrectPassword(text)
|
||||
if text.startswith('You are banned'):
|
||||
raise BrowserBanned(text)
|
||||
|
||||
def login(self, login, password):
|
||||
self.browser.select_form(nr=0)
|
||||
self.browser['username'] = login
|
||||
self.browser['password'] = password
|
||||
self.browser.submit()
|
||||
209
modules/gazelle/pages/torrents.py
Normal file
209
modules/gazelle/pages/torrents.py
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
import re
|
||||
import urlparse
|
||||
from logging import warning, debug
|
||||
|
||||
from weboob.tools.misc import html2text, get_bytes_size
|
||||
from weboob.capabilities.torrent import Torrent
|
||||
from weboob.capabilities.base import NotLoaded
|
||||
|
||||
from .base import BasePage
|
||||
|
||||
|
||||
__all__ = ['TorrentsPage']
|
||||
|
||||
|
||||
class TorrentsPage(BasePage):
|
||||
TORRENTID_REGEXP = re.compile('torrents\.php\?action=download&id=(\d+)')
|
||||
def format_url(self, url):
|
||||
return '%s://%s/%s' % (self.browser.PROTOCOL,
|
||||
self.browser.DOMAIN,
|
||||
url)
|
||||
|
||||
def iter_torrents(self):
|
||||
table = self.document.getroot().cssselect('table.torrent_table')
|
||||
if not table:
|
||||
table = self.document.getroot().cssselect('table#browse_torrent_table')
|
||||
if table:
|
||||
table = table[0]
|
||||
current_group = None
|
||||
for tr in table.findall('tr'):
|
||||
if tr.attrib.get('class', '') == 'colhead':
|
||||
# ignore
|
||||
continue
|
||||
if tr.attrib.get('class', '') == 'group':
|
||||
tds = tr.findall('td')
|
||||
current_group = u''
|
||||
div = tds[-6]
|
||||
if div.getchildren()[0].tag == 'div':
|
||||
div = div.getchildren()[0]
|
||||
for a in div.findall('a'):
|
||||
if not a.text:
|
||||
continue
|
||||
if current_group:
|
||||
current_group += ' - '
|
||||
current_group += a.text
|
||||
elif tr.attrib.get('class', '').startswith('group_torrent') or \
|
||||
tr.attrib.get('class', '').startswith('torrent'):
|
||||
tds = tr.findall('td')
|
||||
|
||||
title = current_group
|
||||
if len(tds) == 7:
|
||||
# Under a group
|
||||
i = 0
|
||||
elif len(tds) in (8,9):
|
||||
# An alone torrent
|
||||
i = len(tds) - 1
|
||||
while i >= 0 and tds[i].find('a') is None:
|
||||
i -= 1
|
||||
else:
|
||||
# Useless title
|
||||
continue
|
||||
|
||||
if title:
|
||||
title += u' (%s)' % tds[i].find('a').text
|
||||
else:
|
||||
title = tds[i].find('a').text
|
||||
url = urlparse.urlparse(tds[i].find('a').attrib['href'])
|
||||
params = urlparse.parse_qs(url.query)
|
||||
if 'torrentid' in params:
|
||||
id = '%s.%s' % (params['id'][0], params['torrentid'][0])
|
||||
else:
|
||||
url = tds[i].find('span').find('a').attrib['href']
|
||||
m = self.TORRENTID_REGEXP.match(url)
|
||||
if not m:
|
||||
continue
|
||||
id = '%s.%s' % (params['id'][0], m.group(1))
|
||||
size, unit = tds[i+3].text.split()
|
||||
size = get_bytes_size(float(size.replace(',','')), unit)
|
||||
seeders = int(tds[-2].text)
|
||||
leechers = int(tds[-1].text)
|
||||
|
||||
torrent = Torrent(id,
|
||||
title,
|
||||
url=self.format_url(url),
|
||||
size=size,
|
||||
seeders=seeders,
|
||||
leechers=leechers)
|
||||
yield torrent
|
||||
else:
|
||||
debug('unknown attrib: %s' % tr.attrib)
|
||||
|
||||
def get_torrent(self, id):
|
||||
table = self.browser.parser.select(self.document.getroot(), 'div.thin', 1)
|
||||
|
||||
h2 = table.find('h2')
|
||||
if h2 is not None:
|
||||
title = h2.text or ''
|
||||
if h2.find('a') != None:
|
||||
title += (h2.find('a').text or '') + (h2.find('a').tail or '')
|
||||
else:
|
||||
title = self.browser.parser.select(table, 'div.title_text', 1).text
|
||||
|
||||
torrent = Torrent(id, title)
|
||||
if '.' in id:
|
||||
torrentid = id.split('.', 1)[1]
|
||||
else:
|
||||
torrentid = id
|
||||
table = self.browser.parser.select(self.document.getroot(), 'table.torrent_table')
|
||||
if len(table) == 0:
|
||||
table = self.browser.parser.select(self.document.getroot(), 'div.main_column', 1)
|
||||
is_table = False
|
||||
else:
|
||||
table = table[0]
|
||||
is_table = True
|
||||
|
||||
for tr in table.findall('tr' if is_table else 'div'):
|
||||
if is_table and 'group_torrent' in tr.attrib.get('class', ''):
|
||||
tds = tr.findall('td')
|
||||
|
||||
if not len(tds) == 5:
|
||||
continue
|
||||
|
||||
url = tds[0].find('span').find('a').attrib['href']
|
||||
m = self.TORRENTID_REGEXP.match(url)
|
||||
if not m:
|
||||
warning('ID not found')
|
||||
continue
|
||||
if m.group(1) != torrentid:
|
||||
continue
|
||||
|
||||
torrent.url = self.format_url(url)
|
||||
size, unit = tds[1].text.split()
|
||||
torrent.size = get_bytes_size(float(size.replace(',', '')), unit)
|
||||
torrent.seeders = int(tds[3].text)
|
||||
torrent.leechers = int(tds[4].text)
|
||||
break
|
||||
elif not is_table and tr.attrib.get('class', '').startswith('torrent_widget') and \
|
||||
tr.attrib.get('class', '').endswith('pad'):
|
||||
url = tr.cssselect('a[title=Download]')[0].attrib['href']
|
||||
m = self.TORRENTID_REGEXP.match(url)
|
||||
if not m:
|
||||
warning('ID not found')
|
||||
continue
|
||||
if m.group(1) != torrentid:
|
||||
continue
|
||||
|
||||
torrent.url = self.format_url(url)
|
||||
size, unit = tr.cssselect('div.details_title strong')[-1].text.strip('()').split()
|
||||
torrent.size = get_bytes_size(float(size.replace(',', '')), unit)
|
||||
torrent.seeders = int(tr.cssselect('img[title=Seeders]')[0].tail)
|
||||
torrent.leechers = int(tr.cssselect('img[title=Leechers]')[0].tail)
|
||||
break
|
||||
|
||||
if not torrent.url:
|
||||
warning('Torrent %s not found in list' % torrentid)
|
||||
return None
|
||||
|
||||
div = self.parser.select(self.document.getroot(), 'div.main_column', 1)
|
||||
for box in div.cssselect('div.box'):
|
||||
title = None
|
||||
body = None
|
||||
|
||||
title_t = box.cssselect('div.head')
|
||||
if len(title_t) > 0:
|
||||
title_t = title_t[0]
|
||||
if title_t.find('strong') is not None:
|
||||
title_t = title_t.find('strong')
|
||||
title = title_t.text.strip()
|
||||
|
||||
body_t = box.cssselect('div.body,div.desc')
|
||||
if body_t:
|
||||
body = html2text(self.parser.tostring(body_t[-1])).strip()
|
||||
|
||||
if title and body:
|
||||
if torrent.description is NotLoaded:
|
||||
torrent.description = u''
|
||||
torrent.description += u'%s\n\n%s\n' % (title, body)
|
||||
|
||||
divs = self.document.getroot().cssselect('div#files_%s,div#filelist_%s,tr#torrent_%s td' % (torrentid, torrentid, torrentid))
|
||||
if divs:
|
||||
torrent.files = []
|
||||
for div in divs:
|
||||
table = div.find('table')
|
||||
if table is None:
|
||||
continue
|
||||
for tr in table:
|
||||
if tr.attrib.get('class', None) != 'colhead_dark':
|
||||
torrent.files.append(tr.find('td').text)
|
||||
|
||||
return torrent
|
||||
28
modules/gazelle/test.py
Normal file
28
modules/gazelle/test.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from weboob.tools.test import BackendTest
|
||||
|
||||
class GazelleTest(BackendTest):
|
||||
BACKEND = 'gazelle'
|
||||
|
||||
def test_torrent(self):
|
||||
l = list(self.backend.iter_torrents('sex'))
|
||||
if len(l) > 0:
|
||||
self.backend.get_torrent_file(l[0].id)
|
||||
Loading…
Add table
Add a link
Reference in a new issue