[subtitle] add language in search

This commit is contained in:
Julien Veyssier 2013-02-22 14:47:46 +01:00
commit 54e21fb21a
6 changed files with 25 additions and 14 deletions

View file

@ -34,6 +34,7 @@ class AttilasubBackend(BaseBackend, ICapSubtitle):
VERSION = '0.f' VERSION = '0.f'
DESCRIPTION = '"Attila'' s Website 2.0" french subtitles' DESCRIPTION = '"Attila'' s Website 2.0" french subtitles'
LICENSE = 'AGPLv3+' LICENSE = 'AGPLv3+'
LANGUAGE_LIST = ['fr']
BROWSER = AttilasubBrowser BROWSER = AttilasubBrowser
def create_default_browser(self): def create_default_browser(self):
@ -49,5 +50,7 @@ class AttilasubBackend(BaseBackend, ICapSubtitle):
return self.browser.openurl(subtitle.url.encode('utf-8')).read() return self.browser.openurl(subtitle.url.encode('utf-8')).read()
def iter_subtitles(self, pattern): def iter_subtitles(self, language, pattern):
return self.browser.iter_subtitles(quote_plus(pattern.encode('utf-8'))) if language not in self.LANGUAGE_LIST:
return []
return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8')))

View file

@ -36,10 +36,10 @@ class AttilasubBrowser(BaseBrowser):
'http://davidbillemont3.free.fr/.*.htm': SubtitlesPage, 'http://davidbillemont3.free.fr/.*.htm': SubtitlesPage,
} }
def iter_subtitles(self, pattern): def iter_subtitles(self, language, pattern):
self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' % pattern.encode('utf-8')) self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' % pattern.encode('utf-8'))
assert self.is_on_page(SearchPage) assert self.is_on_page(SearchPage)
return self.page.iter_subtitles(pattern) return self.page.iter_subtitles(language,pattern)
def get_subtitle(self, id): def get_subtitle(self, id):
url_end = id.split('|')[0] url_end = id.split('|')[0]

View file

@ -35,7 +35,7 @@ __all__ = ['SubtitlesPage','SearchPage']
class SearchPage(BasePage): class SearchPage(BasePage):
def iter_subtitles(self,pattern): def iter_subtitles(self, language, pattern):
fontresult = self.parser.select(self.document.getroot(),'div.search-results font.search-results') fontresult = self.parser.select(self.document.getroot(),'div.search-results font.search-results')
# for each result in freefind, explore the subtitle list page to iter subtitles # for each result in freefind, explore the subtitle list page to iter subtitles
for res in fontresult: for res in fontresult:
@ -44,7 +44,7 @@ class SearchPage(BasePage):
self.browser.location(url) self.browser.location(url)
assert self.browser.is_on_page(SubtitlesPage) assert self.browser.is_on_page(SubtitlesPage)
# subtitles page does the job # subtitles page does the job
for subtitle in self.browser.page.iter_subtitles(pattern): for subtitle in self.browser.page.iter_subtitles(language, pattern):
yield subtitle yield subtitle
@ -72,11 +72,12 @@ class SubtitlesPage(BasePage):
subtitle = Subtitle(id,name) subtitle = Subtitle(id,name)
subtitle.url = url subtitle.url = url
subtitle.fps = 0 subtitle.fps = 0
subtitle.language = "fre"
subtitle.nb_cd = nb_cd subtitle.nb_cd = nb_cd
subtitle.description = "no desc" subtitle.description = "no desc"
return subtitle return subtitle
def iter_subtitles(self,pattern): def iter_subtitles(self,language, pattern):
pattern = pattern.strip().replace('+',' ') pattern = pattern.strip().replace('+',' ')
pattern_words = pattern.split() pattern_words = pattern.split()
tab = self.parser.select(self.document.getroot(),'table[bordercolor="#B8C0B2"]') tab = self.parser.select(self.document.getroot(),'table[bordercolor="#B8C0B2"]')
@ -84,6 +85,9 @@ class SubtitlesPage(BasePage):
tab = self.parser.select(self.document.getroot(),'table[bordercolordark="#B8C0B2"]') tab = self.parser.select(self.document.getroot(),'table[bordercolordark="#B8C0B2"]')
if len(tab) == 0: if len(tab) == 0:
return return
# some results of freefind point on useless pages
if tab[0].attrib.get('width','') != '100%':
return
for line in tab[0].getiterator('tr'): for line in tab[0].getiterator('tr'):
cols = self.parser.select(line,'td') cols = self.parser.select(line,'td')
traduced_title = self.parser.select(cols[0],'font',1).text.lower() traduced_title = self.parser.select(cols[0],'font',1).text.lower()
@ -112,6 +116,7 @@ class SubtitlesPage(BasePage):
subtitle = Subtitle(id,name) subtitle = Subtitle(id,name)
subtitle.url = url subtitle.url = url
subtitle.fps = 0 subtitle.fps = 0
subtitle.language = "fre"
subtitle.nb_cd = nb_cd subtitle.nb_cd = nb_cd
subtitle.description = "no desc" subtitle.description = "no desc"
yield subtitle yield subtitle

View file

@ -27,7 +27,7 @@ class AttilasubTest(BackendTest):
BACKEND = 'attilasub' BACKEND = 'attilasub'
def test_subtitle(self): def test_subtitle(self):
subtitles = list(self.backend.iter_subtitles('spiderman')) subtitles = list(self.backend.iter_subtitles('fr','spiderman'))
assert (len(subtitles) > 0) assert (len(subtitles) > 0)
for subtitle in subtitles: for subtitle in subtitles:
path, qs = urllib.splitquery(subtitle.url) path, qs = urllib.splitquery(subtitle.url)

View file

@ -45,6 +45,7 @@ class SubtitleInfoFormatter(IFormatter):
result += 'ID: %s\n' % obj.fullid result += 'ID: %s\n' % obj.fullid
result += 'URL: %s\n' % obj.url result += 'URL: %s\n' % obj.url
result += 'FPS: %s\n' % obj.fps result += 'FPS: %s\n' % obj.fps
result += 'LANG: %s\n' % obj.language
result += 'NB CD: %s\n' % obj.nb_cd result += 'NB CD: %s\n' % obj.nb_cd
result += '\n%sDescription%s\n' % (self.BOLD, self.NC) result += '\n%sDescription%s\n' % (self.BOLD, self.NC)
result += obj.description result += obj.description
@ -58,7 +59,7 @@ class SubtitleListFormatter(PrettyFormatter):
return obj.name return obj.name
def get_description(self, obj): def get_description(self, obj):
return '%s CD ; url : %s' % (obj.nb_cd,obj.url) return 'lang : %s ; %s CD ; url : %s' % (obj.language,obj.nb_cd,obj.url)
class Suboob(ReplApplication): class Suboob(ReplApplication):
@ -140,17 +141,18 @@ class Suboob(ReplApplication):
print >>sys.stderr, 'Subtitle "%s" not found' % id print >>sys.stderr, 'Subtitle "%s" not found' % id
return 3 return 3
def do_search(self, pattern): def do_search(self,line):
""" """
search [PATTERN] search language [PATTERN]
Search subtitles. Search subtitles.
""" """
language, pattern = self.parse_command_args(line, 2, 1)
self.change_path([u'search']) self.change_path([u'search'])
if not pattern: if not pattern:
pattern = None pattern = None
self.start_format(pattern=pattern) self.start_format(pattern=pattern)
for backend, subtitle in self.do('iter_subtitles', pattern=pattern): for backend, subtitle in self.do('iter_subtitles', language=language, pattern=pattern):
self.cached_format(subtitle) self.cached_format(subtitle)
self.flush() self.flush()

View file

@ -31,8 +31,9 @@ class Subtitle(CapBaseObject):
""" """
name = StringField('Name of subtitle') name = StringField('Name of subtitle')
url = StringField('Direct url to subtitle file') url = StringField('Direct url to subtitle file')
fps = StringField('Framerate of corresponding video') fps = FloatField('Framerate of corresponding video')
nb_cd = StringField('Number of cd') nb_cd = IntField('Number of cd')
language = StringField('Language of the subtitle')
description=StringField('Description of corresponding video') description=StringField('Description of corresponding video')
def __init__(self, id, name): def __init__(self, id, name):