diff --git a/weboob/backends/gazelle/browser.py b/weboob/backends/gazelle/browser.py index 9d5884de..e4905300 100644 --- a/weboob/backends/gazelle/browser.py +++ b/weboob/backends/gazelle/browser.py @@ -66,4 +66,7 @@ class GazelleBrowser(BaseBrowser): return self.page.iter_torrents() def get_torrent(self, id): - pass + self.location('/torrents.php?torrentid=%s' % id) + + assert self.is_on_page(TorrentsPage) + return self.page.get_torrent(id) diff --git a/weboob/backends/gazelle/pages/torrents.py b/weboob/backends/gazelle/pages/torrents.py index be7e050f..e065a5f3 100644 --- a/weboob/backends/gazelle/pages/torrents.py +++ b/weboob/backends/gazelle/pages/torrents.py @@ -19,7 +19,9 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ import re +from logging import warning +from weboob.tools.misc import html2text from weboob.tools.browser import BasePage from weboob.capabilities.torrent import Torrent @@ -37,11 +39,18 @@ class TorrentsPage(BasePage): } return float(n.replace(',', '')) * m.get(u, 1) + def format_url(self, url): + return '%s://%s/%s' % (self.browser.PROTOCOL, + self.browser.DOMAIN, + url) + def iter_torrents(self): - table = self.document.getroot().cssselect('table#torrent_table') + table = self.document.getroot().cssselect('table.torrent_table') if not table: table = self.document.getroot().cssselect('table#browse_torrent_table') - if table: + if not table: + warning('No table found') + else: table = table[0] current_group = None for tr in table.findall('tr'): @@ -57,7 +66,7 @@ class TorrentsPage(BasePage): if current_group: current_group += ' - ' current_group += a.text - elif tr.attrib.get('class', '').startswith('group_torrent ') or \ + elif tr.attrib.get('class', '').startswith('group_torrent') or \ tr.attrib.get('class', '').startswith('torrent'): tds = tr.findall('td') @@ -87,8 +96,77 @@ class TorrentsPage(BasePage): torrent = Torrent(id, title, - url=url, + url=self.format_url(url), size=size, seeders=seeders, leechers=leechers) yield torrent + else: + print tr.attrib + + def get_torrent(self, id): + table = self.document.getroot().cssselect('div.thin') + if not table: + warning('No div.thin found') + return None + + h2 = table[0].find('h2') + title = h2.text or '' + if h2.find('a') != None: + title += h2.find('a').text + h2.find('a').tail + + torrent = Torrent(id, title) + table = self.document.getroot().cssselect('table.torrent_table') + if not table: + warning('No table found') + return None + + for tr in table[0].findall('tr'): + if tr.attrib.get('class', '').startswith('group_torrent'): + tds = tr.findall('td') + + if not len(tds) == 5: + continue + + url = tds[0].find('span').find('a').attrib['href'] + id = self.TORRENTID_REGEXP.match(url) + + if not id: + warning('ID not found') + continue + + id = id.group(1) + + if id != torrent.id: + continue + + torrent.url = self.format_url(url) + torrent.size = self.unit(*tds[1].text.split()) + torrent.seeders = int(tds[3].text) + torrent.leechers = int(tds[4].text) + break + + if not torrent.url: + warning('Torrent %d not found in list' % torrent.id) + return None + + div = self.document.getroot().cssselect('div.main_column') + if not div: + warning('WTF') + return None + + for box in div[0].cssselect('div.box'): + title = None + body = None + + title_t = box.cssselect('div.head') + if title_t: + title = title_t[0].find('strong').text + body_t = box.cssselect('div.body') + if body_t: + body = html2text(self.browser.parser.tostring(body_t[0])) + + if title and body: + torrent.description += '%s\n\n%s\n' % (title, body) + + return torrent diff --git a/weboob/capabilities/torrent.py b/weboob/capabilities/torrent.py index 014df559..56343cc9 100644 --- a/weboob/capabilities/torrent.py +++ b/weboob/capabilities/torrent.py @@ -24,15 +24,16 @@ from .cap import ICap __all__ = ['ICapTorrent'] class Torrent(object): - def __init__(self, id, name, date=None, size=0.0, url=u'', seeders=0, leechers=0, files=[]): + def __init__(self, id, name, date=None, size=0.0, url=u'', seeders=0, leechers=0, files=[], description=u''): self.id = id self.name = name self.date = date self.size = size - self.ul = url + self.url = url self.seeders = seeders self.leechers = leechers self.files = files + self.description = description class ICapTorrent(ICap): def iter_torrents(self, pattern): diff --git a/weboob/frontends/weboorrents/application.py b/weboob/frontends/weboorrents/application.py index 363cf216..ae50f9ad 100644 --- a/weboob/frontends/weboorrents/application.py +++ b/weboob/frontends/weboorrents/application.py @@ -58,6 +58,10 @@ class Weboorrents(ConsoleApplication): rows.append(('ID', torrent.id)) rows.append(('Name', torrent.name)) rows.append(('Size', torrent.size)) + rows.append(('URL', torrent.url)) + rows.append(('Seeders', torrent.seeders)) + rows.append(('Leechers', torrent.leechers)) + rows.append(('Description', torrent.description)) return {backend.name: rows} @ConsoleApplication.command('Search torrents')