From 54e21fb21a1c921313acdc98aa2fc542809d15d9 Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Fri, 22 Feb 2013 14:47:46 +0100 Subject: [PATCH] [subtitle] add language in search --- modules/attilasub/backend.py | 7 +++++-- modules/attilasub/browser.py | 4 ++-- modules/attilasub/pages.py | 11 ++++++++--- modules/attilasub/test.py | 2 +- weboob/applications/suboob/suboob.py | 10 ++++++---- weboob/capabilities/subtitle.py | 5 +++-- 6 files changed, 25 insertions(+), 14 deletions(-) diff --git a/modules/attilasub/backend.py b/modules/attilasub/backend.py index 6b9bc6b9..bfe0446e 100644 --- a/modules/attilasub/backend.py +++ b/modules/attilasub/backend.py @@ -34,6 +34,7 @@ class AttilasubBackend(BaseBackend, ICapSubtitle): VERSION = '0.f' DESCRIPTION = '"Attila'' s Website 2.0" french subtitles' LICENSE = 'AGPLv3+' + LANGUAGE_LIST = ['fr'] BROWSER = AttilasubBrowser def create_default_browser(self): @@ -49,5 +50,7 @@ class AttilasubBackend(BaseBackend, ICapSubtitle): return self.browser.openurl(subtitle.url.encode('utf-8')).read() - def iter_subtitles(self, pattern): - return self.browser.iter_subtitles(quote_plus(pattern.encode('utf-8'))) + def iter_subtitles(self, language, pattern): + if language not in self.LANGUAGE_LIST: + return [] + return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8'))) diff --git a/modules/attilasub/browser.py b/modules/attilasub/browser.py index 758a7fc3..2ef69bd2 100644 --- a/modules/attilasub/browser.py +++ b/modules/attilasub/browser.py @@ -36,10 +36,10 @@ class AttilasubBrowser(BaseBrowser): 'http://davidbillemont3.free.fr/.*.htm': SubtitlesPage, } - def iter_subtitles(self, pattern): + def iter_subtitles(self, language, pattern): self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' % pattern.encode('utf-8')) assert self.is_on_page(SearchPage) - return self.page.iter_subtitles(pattern) + return self.page.iter_subtitles(language,pattern) def get_subtitle(self, id): url_end = id.split('|')[0] diff --git a/modules/attilasub/pages.py b/modules/attilasub/pages.py index f0222c64..f630729e 100644 --- a/modules/attilasub/pages.py +++ b/modules/attilasub/pages.py @@ -35,7 +35,7 @@ __all__ = ['SubtitlesPage','SearchPage'] class SearchPage(BasePage): - def iter_subtitles(self,pattern): + def iter_subtitles(self, language, pattern): fontresult = self.parser.select(self.document.getroot(),'div.search-results font.search-results') # for each result in freefind, explore the subtitle list page to iter subtitles for res in fontresult: @@ -44,7 +44,7 @@ class SearchPage(BasePage): self.browser.location(url) assert self.browser.is_on_page(SubtitlesPage) # subtitles page does the job - for subtitle in self.browser.page.iter_subtitles(pattern): + for subtitle in self.browser.page.iter_subtitles(language, pattern): yield subtitle @@ -72,11 +72,12 @@ class SubtitlesPage(BasePage): subtitle = Subtitle(id,name) subtitle.url = url subtitle.fps = 0 + subtitle.language = "fre" subtitle.nb_cd = nb_cd subtitle.description = "no desc" return subtitle - def iter_subtitles(self,pattern): + def iter_subtitles(self,language, pattern): pattern = pattern.strip().replace('+',' ') pattern_words = pattern.split() tab = self.parser.select(self.document.getroot(),'table[bordercolor="#B8C0B2"]') @@ -84,6 +85,9 @@ class SubtitlesPage(BasePage): tab = self.parser.select(self.document.getroot(),'table[bordercolordark="#B8C0B2"]') if len(tab) == 0: return + # some results of freefind point on useless pages + if tab[0].attrib.get('width','') != '100%': + return for line in tab[0].getiterator('tr'): cols = self.parser.select(line,'td') traduced_title = self.parser.select(cols[0],'font',1).text.lower() @@ -112,6 +116,7 @@ class SubtitlesPage(BasePage): subtitle = Subtitle(id,name) subtitle.url = url subtitle.fps = 0 + subtitle.language = "fre" subtitle.nb_cd = nb_cd subtitle.description = "no desc" yield subtitle diff --git a/modules/attilasub/test.py b/modules/attilasub/test.py index d7f4411c..702ffc31 100644 --- a/modules/attilasub/test.py +++ b/modules/attilasub/test.py @@ -27,7 +27,7 @@ class AttilasubTest(BackendTest): BACKEND = 'attilasub' def test_subtitle(self): - subtitles = list(self.backend.iter_subtitles('spiderman')) + subtitles = list(self.backend.iter_subtitles('fr','spiderman')) assert (len(subtitles) > 0) for subtitle in subtitles: path, qs = urllib.splitquery(subtitle.url) diff --git a/weboob/applications/suboob/suboob.py b/weboob/applications/suboob/suboob.py index 419244e5..c7f3c42b 100644 --- a/weboob/applications/suboob/suboob.py +++ b/weboob/applications/suboob/suboob.py @@ -45,6 +45,7 @@ class SubtitleInfoFormatter(IFormatter): result += 'ID: %s\n' % obj.fullid result += 'URL: %s\n' % obj.url result += 'FPS: %s\n' % obj.fps + result += 'LANG: %s\n' % obj.language result += 'NB CD: %s\n' % obj.nb_cd result += '\n%sDescription%s\n' % (self.BOLD, self.NC) result += obj.description @@ -58,7 +59,7 @@ class SubtitleListFormatter(PrettyFormatter): return obj.name def get_description(self, obj): - return '%s CD ; url : %s' % (obj.nb_cd,obj.url) + return 'lang : %s ; %s CD ; url : %s' % (obj.language,obj.nb_cd,obj.url) class Suboob(ReplApplication): @@ -140,17 +141,18 @@ class Suboob(ReplApplication): print >>sys.stderr, 'Subtitle "%s" not found' % id return 3 - def do_search(self, pattern): + def do_search(self,line): """ - search [PATTERN] + search language [PATTERN] Search subtitles. """ + language, pattern = self.parse_command_args(line, 2, 1) self.change_path([u'search']) if not pattern: pattern = None self.start_format(pattern=pattern) - for backend, subtitle in self.do('iter_subtitles', pattern=pattern): + for backend, subtitle in self.do('iter_subtitles', language=language, pattern=pattern): self.cached_format(subtitle) self.flush() diff --git a/weboob/capabilities/subtitle.py b/weboob/capabilities/subtitle.py index 3ec4557c..db0b2c2e 100644 --- a/weboob/capabilities/subtitle.py +++ b/weboob/capabilities/subtitle.py @@ -31,8 +31,9 @@ class Subtitle(CapBaseObject): """ name = StringField('Name of subtitle') url = StringField('Direct url to subtitle file') - fps = StringField('Framerate of corresponding video') - nb_cd = StringField('Number of cd') + fps = FloatField('Framerate of corresponding video') + nb_cd = IntField('Number of cd') + language = StringField('Language of the subtitle') description=StringField('Description of corresponding video') def __init__(self, id, name):