# -*- coding: utf-8 -*- # Copyright(C) 2013 Julien Veyssier # # This file is part of weboob. # # weboob is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # weboob is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . from weboob.capabilities.subtitle import Subtitle from weboob.tools.browser import BasePage __all__ = ['HomePage','SearchPage','SeriePage','SeasonPage'] class HomePage(BasePage): def iter_subtitles(self,language,pattern): self.browser.select_form(nr=0) self.browser['q'] = pattern.encode('utf-8') self.browser.submit() assert self.browser.is_on_page(SearchPage) for subtitle in self.browser.page.iter_subtitles(language): yield subtitle class SearchPage(BasePage): """ Page which contains results as a list of series """ def iter_subtitles(self,language): list_result = self.parser.select(self.document.getroot(),'div.left_articles ul') if len(list_result) > 0: li_result = self.parser.select(list_result[0],'li') for line in li_result: if len(self.parser.select(line,'img[alt=%s]'%language)) > 0: link = self.parser.select(line,'a',1) href = link.attrib.get('href','') self.browser.location("http://%s%s"%(self.browser.DOMAIN,href)) assert self.browser.is_on_page(SeriePage) for subtitle in self.browser.page.iter_subtitles(language): yield subtitle class SeriePage(BasePage): """ Page of all seasons """ def iter_subtitles(self,language,only_one_season=False): # handle the current season last_table_line = self.parser.select(self.document.getroot(),'table#table5 tr')[-1] amount = int(self.parser.select(last_table_line,'td')[2].text_content()) if amount > 0: my_lang_img = self.parser.select(last_table_line,'img[alt=%s]'%language) if len(my_lang_img) > 0: url_current_season = self.browser.geturl().split('/')[-1].replace('tvshow','subtitle').replace('.html','-%s.html'%language) self.browser.location(url_current_season) assert self.browser.is_on_page(SeasonPage) yield self.browser.page.iter_subtitles() if not only_one_season: # handle the other seasons by following top links other_seasons_links = self.parser.select(self.document.getroot(),'p.description a') for link in other_seasons_links: href = link.attrib.get('href','') self.browser.location("http://%s/%s"%(self.browser.DOMAIN,href)) assert self.browser.is_on_page(SeriePage) for subtitle in self.browser.page.iter_subtitles(language,True): yield subtitle class SeasonPage(BasePage): """ Page of a season with the right language """ def get_subtitle(self): filename_line = self.parser.select(self.document.getroot(),'img[alt=filename]',1).getparent().getparent() name = self.parser.select(filename_line,'td')[2].text id = self.browser.geturl().split('/')[-1].replace('.html','').replace('subtitle-','') url = "http://%s/download-%s.html"%(self.browser.DOMAIN,id) amount_line = self.parser.select(self.document.getroot(),'tr[title~=amount]',1) nb_cd = int(self.parser.select(amount_line,'td')[2].text) lang = url.split('-')[-1].split('.html')[0] filenames_line = self.parser.select(self.document.getroot(),'tr[title~=list]',1) file_names = self.parser.select(filenames_line,'td')[2].text_content().strip().replace('.srt','.srt\n') desc = u"files :\n" desc += file_names subtitle = Subtitle(id,name) subtitle.url = url subtitle.language = lang subtitle.nb_cd = nb_cd subtitle.description = desc return subtitle def iter_subtitles(self): return self.get_subtitle()