opensub all subtitles are built from the detailed page

This commit is contained in:
Julien Veyssier 2013-02-25 18:47:37 +01:00
commit d8e9f779b9

View file

@ -76,35 +76,43 @@ class SubtitlesPage(BasePage):
links = self.parser.select(line,'a') links = self.parser.select(line,'a')
a = links[0] a = links[0]
urldetail = a.attrib.get('href','') urldetail = a.attrib.get('href','')
name = u" ".join(a.text.strip().split()) self.browser.location("http://www.opensubtitles.org%s"%urldetail)
spanlist = self.parser.select(first_cell,'span') assert self.browser.is_on_page(SubtitlePage)
if len(spanlist) > 0: # subtitle page does the job
long_name = spanlist[0].attrib.get('title','') return self.browser.page.get_subtitle()
else: """
texts = first_cell.itertext() faster but less accurate
long_name = texts.next() we can already get the subtitle from the list page
long_name = texts.next() """
if "Download at 25" in long_name: #name = u" ".join(a.text.strip().split())
long_name = "---" #spanlist = self.parser.select(first_cell,'span')
name = "%s (%s)"%(name,long_name) #if len(spanlist) > 0:
second_cell = cells[1] # long_name = spanlist[0].attrib.get('title','')
link = self.parser.select(second_cell,'a',1) #else:
lang = link.attrib.get('href','').split('/')[-1].split('-')[-1] # texts = first_cell.itertext()
nb_cd = int(cells[2].text.strip().lower().replace('cd','')) # long_name = texts.next()
fps = 0 # long_name = texts.next()
desc = '' # if "Download at 25" in long_name:
cell_dl = cells[4] # long_name = "---"
href = self.parser.select(cell_dl,'a',1).attrib.get('href','') #name = "%s (%s)"%(name,long_name)
url = "http://www.opensubtitles.org%s"%href #second_cell = cells[1]
id = href.split('/')[-1] #link = self.parser.select(second_cell,'a',1)
#lang = link.attrib.get('href','').split('/')[-1].split('-')[-1]
#nb_cd = int(cells[2].text.strip().lower().replace('cd',''))
#fps = 0
#desc = ''
#cell_dl = cells[4]
#href = self.parser.select(cell_dl,'a',1).attrib.get('href','')
#url = "http://www.opensubtitles.org%s"%href
#id = href.split('/')[-1]
subtitle = Subtitle(id,name) #subtitle = Subtitle(id,name)
subtitle.url = url #subtitle.url = url
subtitle.fps = fps #subtitle.fps = fps
subtitle.language = lang #subtitle.language = lang
subtitle.nb_cd = nb_cd #subtitle.nb_cd = nb_cd
subtitle.description = "no desc" #subtitle.description = "no desc"
return subtitle #return subtitle
class SubtitlePage(BasePage): class SubtitlePage(BasePage):