weboob-devel/modules/podnapisi/pages.py
2013-04-15 09:42:06 +02:00

96 lines
3.5 KiB
Python

# -*- coding: utf-8 -*-
# Copyright(C) 2013 Julien Veyssier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.subtitle import Subtitle
from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.browser import BasePage
from weboob.applications.suboob.suboob import LANGUAGE_CONV
__all__ = ['SubtitlePage', 'SearchPage']
LANGUAGE_NUMBERS = {
'fr': 8,
'en': 2,
}
class SearchPage(BasePage):
""" Page which contains results as a list of movies
"""
def iter_subtitles(self, language):
linksresults = self.parser.select(self.document.getroot(), 'a.subtitle_page_link')
for link in linksresults:
id = unicode(link.attrib.get('href', '').split('es-p')[-1])
name = unicode(link.text_content())
tr = link.getparent().getparent().getparent()
cdtd = self.parser.select(tr, 'td')[4]
nb_cd = int(cdtd.text)
description = NotLoaded
subtitle = Subtitle(id, name)
subtitle.nb_cd = nb_cd
subtitle.language = language
subtitle.description = description
yield subtitle
class SubtitlePage(BasePage):
""" Page which contains a single subtitle for a movie
"""
def get_subtitle(self, id):
language = NotAvailable
url = NotAvailable
nb_cd = NotAvailable
links_info = self.parser.select(self.document.getroot(), 'fieldset.information a')
for link in links_info:
href = link.attrib.get('href', '')
if '/fr/ppodnapisi/kategorija/jezik/' in href:
nlang = href.split('/')[-1]
for lang, langnum in LANGUAGE_NUMBERS.items():
if str(langnum) == str(nlang):
language = unicode(lang)
break
desc = u''
infos = self.parser.select(self.document.getroot(), 'fieldset.information')
for info in infos:
for p in self.parser.select(info, 'p'):
desc += '%s\n' % (u' '.join(p.text_content().strip().split()))
spans = self.parser.select(info, 'span')
for span in spans:
if span.text is not None and 'CD' in span.text:
nb_cd = int(self.parser.select(span.getparent(), 'span')[1].text)
title = unicode(self.parser.select(self.document.getroot(), 'head title', 1).text)
name = title.split(' - ')[0]
dllinks = self.parser.select(self.document.getroot(), 'div.footer > a.download')
for link in dllinks:
href = link.attrib.get('href', '')
if id in href:
url = u'http://www.podnapisi.net%s' % href
subtitle = Subtitle(id, name)
subtitle.url = url
subtitle.language = language
subtitle.nb_cd = nb_cd
subtitle.description = desc
return subtitle