diff --git a/modules/opensubtitles/browser.py b/modules/opensubtitles/browser.py index 596e43b7..65d922f5 100644 --- a/modules/opensubtitles/browser.py +++ b/modules/opensubtitles/browser.py @@ -62,11 +62,6 @@ class OpensubtitlesBrowser(BaseBrowser): return self.page.iter_subtitles() def get_subtitle(self, id): - """ the id is formed this way : id_movie|id_file - the id_movie helps to find the page - the id_file help to find the file into the page - if NO id_movie set, using id_file to form the URL - """ self.location('http://www.opensubtitles.org/subtitles/%s' % id) assert self.is_on_page(SubtitlePage) return self.page.get_subtitle() diff --git a/modules/opensubtitles/pages.py b/modules/opensubtitles/pages.py index 4e376714..e765fd84 100644 --- a/modules/opensubtitles/pages.py +++ b/modules/opensubtitles/pages.py @@ -29,7 +29,6 @@ from weboob.capabilities.subtitle import Subtitle from weboob.capabilities.base import NotAvailable from weboob.tools.browser import BasePage from weboob.tools.misc import get_bytes_size -import time __all__ = ['SubtitlesPage','SearchPage'] diff --git a/modules/tvsubtitles/__init__.py b/modules/tvsubtitles/__init__.py new file mode 100644 index 00000000..5ee86610 --- /dev/null +++ b/modules/tvsubtitles/__init__.py @@ -0,0 +1,3 @@ +from .backend import TvsubtitlesBackend + +__all__ = ['TvsubtitlesBackend'] diff --git a/modules/tvsubtitles/backend.py b/modules/tvsubtitles/backend.py new file mode 100644 index 00000000..5fb9508a --- /dev/null +++ b/modules/tvsubtitles/backend.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.subtitle import ICapSubtitle +from weboob.tools.backend import BaseBackend + +from .browser import TvsubtitlesBrowser,LANGUAGE_CONV + +from urllib import quote_plus + +__all__ = ['TvsubtitlesBackend'] + + +class TvsubtitlesBackend(BaseBackend, ICapSubtitle): + NAME = 'tvsubtitles' + MAINTAINER = u'Julien Veyssier' + EMAIL = 'julien.veyssier@aiur.fr' + VERSION = '0.f' + DESCRIPTION = 'Tvsubtitles subtitle website' + LICENSE = 'AGPLv3+' + BROWSER = TvsubtitlesBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_subtitle(self, id): + return self.browser.get_subtitle(id) + + def get_subtitle_file(self, id): + subtitle = self.browser.get_subtitle(id) + if not subtitle: + return None + + return self.browser.openurl(subtitle.url.encode('utf-8')).read() + + def iter_subtitles(self, language, pattern): + if language not in LANGUAGE_CONV.keys(): + return [] + return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8'))) diff --git a/modules/tvsubtitles/browser.py b/modules/tvsubtitles/browser.py new file mode 100644 index 00000000..4aad8b2a --- /dev/null +++ b/modules/tvsubtitles/browser.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser + +from .pages import SeriePage, SearchPage, SeasonPage,HomePage + + +__all__ = ['OpensubtitlesBrowser'] + +LANGUAGE_CONV = { +'ar':'ara', 'eo':'epo', 'ga':'', 'ru':'rus', +'af':'' , 'et':'est', 'it':'ita', 'sr':'scc', +'sq':'alb', 'tl':'' , 'ja':'jpn', 'sk':'slo', +'hy':'arm', 'fi':'fin', 'kn':'', 'sl':'slv', +'az':'' , 'fr':'fre', 'ko':'kor', 'es':'spa', +'eu':'baq', 'gl':'glg', 'la':'', 'sw':'swa', +'be':'' , 'ka':'geo', 'lv':'lav', 'sv':'swe', +'bn':'ben', 'de':'ger', 'lt':'lit', 'ta':'', +'bg':'bul', 'gr':'ell', 'mk':'mac', 'te':'tel', +'ca':'cat', 'gu':'' , 'ms':'may', 'th':'tha', +'zh':'chi', 'ht':'' , 'mt':'', 'tr':'tur', +'hr':'hrv', 'iw':'heb', 'no':'nor', 'uk':'ukr', +'cz':'cze', 'hi':'hin', 'fa':'per', 'ur':'urd', +'da':'dan', 'hu':'hun', 'pl':'pol', 'vi':'vie', +'nl':'dut', 'is':'ice', 'pt':'por', 'cy':'', +'en':'eng', 'id':'ind', 'ro':'rum', 'yi':''} + +class TvsubtitlesBrowser(BaseBrowser): + DOMAIN = 'www.tvsubtitles.net' + PROTOCOL = 'http' + ENCODING = 'utf-8' + USER_AGENT = BaseBrowser.USER_AGENTS['wget'] + PAGES = { + 'http://www.tvsubtitles.net': HomePage, + 'http://www.tvsubtitles.net/search.php': SearchPage, + 'http://www.tvsubtitles.net/tvshow-.*.html': SeriePage, + 'http://www.tvsubtitles.net/subtitle-[0-9]*-[0-9]*-.*.html' : SeasonPage + } + + def iter_subtitles(self, language, pattern): + self.location('http://www.tvsubtitles.net') + assert self.is_on_page(HomePage) + return self.page.iter_subtitles(language,pattern) + + def get_subtitle(self, id): + self.location('http://www.tvsubtitles.net/subtitle-%s.html' % id) + assert self.is_on_page(SeasonPage) + return self.page.get_subtitle() diff --git a/modules/tvsubtitles/favicon.png b/modules/tvsubtitles/favicon.png new file mode 100644 index 00000000..253ee0a9 Binary files /dev/null and b/modules/tvsubtitles/favicon.png differ diff --git a/modules/tvsubtitles/pages.py b/modules/tvsubtitles/pages.py new file mode 100644 index 00000000..ed08f0d1 --- /dev/null +++ b/modules/tvsubtitles/pages.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2012 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +try: + from urlparse import parse_qs +except ImportError: + from cgi import parse_qs # NOQA + +from urlparse import urlsplit + +from weboob.capabilities.subtitle import Subtitle +from weboob.capabilities.base import NotAvailable +from weboob.tools.browser import BasePage +from weboob.tools.misc import get_bytes_size + + +__all__ = ['HomePage','SearchPage','SeriePage','SeasonPage'] + + +class HomePage(BasePage): + def iter_subtitles(self,language,pattern): + self.browser.select_form(nr=0) + self.browser['q'] = pattern.encode('utf-8') + self.browser.submit() + assert self.browser.is_on_page(SearchPage) + for subtitle in self.browser.page.iter_subtitles(language): + yield subtitle + + +class SearchPage(BasePage): + """ Page which contains results as a list of series + """ + def iter_subtitles(self,language): + list_result = self.parser.select(self.document.getroot(),'div.left_articles ul',1) + li_result = self.parser.select(list_result,'li') + for line in li_result: + if len(self.parser.select(line,'img[alt=%s]'%language)) > 0: + link = self.parser.select(line,'a',1) + href = link.attrib.get('href','') + self.browser.location("http://%s%s"%(self.browser.DOMAIN,href)) + assert self.browser.is_on_page(SeriePage) + for subtitle in self.browser.page.iter_subtitles(language): + yield subtitle + + +class SeriePage(BasePage): + """ Page of all seasons + """ + def iter_subtitles(self,language,only_one_season=False): + # handle the current season + last_table_line = self.parser.select(self.document.getroot(),'table#table5 tr')[-1] + amount = int(self.parser.select(last_table_line,'td')[2].text_content()) + if amount > 0: + my_lang_img = self.parser.select(last_table_line,'img[alt=%s]'%language) + if len(my_lang_img) > 0: + url_current_season = self.browser.geturl().split('/')[-1].replace('tvshow','subtitle').replace('.html','-%s.html'%language) + self.browser.location(url_current_season) + assert self.browser.is_on_page(SeasonPage) + yield self.browser.page.iter_subtitles() + + if not only_one_season: + # handle the other seasons by following top links + other_seasons_links = self.parser.select(self.document.getroot(),'p.description a') + for link in other_seasons_links: + href = link.attrib.get('href','') + self.browser.location("http://%s/%s"%(self.browser.DOMAIN,href)) + assert self.browser.is_on_page(SeriePage) + for subtitle in self.browser.page.iter_subtitles(language,True): + yield subtitle + + +class SeasonPage(BasePage): + """ Page of a season with the right language + """ + def get_subtitle(self): + filename_line = self.parser.select(self.document.getroot(),'img[alt=filename]',1).getparent().getparent() + name = self.parser.select(filename_line,'td')[2].text + id = self.browser.geturl().split('/')[-1].replace('.html','').replace('subtitle-','') + url = "http://%s/download-%s.html"%(self.browser.DOMAIN,id) + amount_line = self.parser.select(self.document.getroot(),'tr[title~=amount]',1) + nb_cd = int(self.parser.select(amount_line,'td')[2].text) + lang = url.split('-')[-1].split('.html')[0] + filenames_line = self.parser.select(self.document.getroot(),'tr[title~=list]',1) + file_names = self.parser.select(filenames_line,'td')[2].text_content().strip().replace('.srt','.srt\n') + desc = u"files :\n" + desc += file_names + fps = 0 + + subtitle = Subtitle(id,name) + subtitle.url = url + subtitle.fps = fps + subtitle.language = lang + subtitle.nb_cd = nb_cd + subtitle.description = desc + return subtitle + + def iter_subtitles(self): + return self.get_subtitle() diff --git a/modules/tvsubtitles/test.py b/modules/tvsubtitles/test.py new file mode 100644 index 00000000..ef14f2ba --- /dev/null +++ b/modules/tvsubtitles/test.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Julien Veyssier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.test import BackendTest +from weboob.capabilities.base import NotLoaded + +import urllib +from random import choice + +class TvsubtitlesTest(BackendTest): + BACKEND = 'tvsubtitles' + + def test_subtitle(self): + subtitles = list(self.backend.iter_subtitles('fr','sopranos')) + assert (len(subtitles) > 0) + for subtitle in subtitles: + path, qs = urllib.splitquery(subtitle.url) + assert path.endswith('.zip') + + # get the file of a random sub + if len(subtitles): + subtitle = choice(subtitles) + self.backend.get_subtitle_file(subtitle.id)