autopep8 with 120 chars line length on my modules
This commit is contained in:
parent
6a7bc0924d
commit
5d923bc73b
39 changed files with 434 additions and 426 deletions
|
|
@ -17,7 +17,7 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from weboob.capabilities.recipe import ICapRecipe,Recipe
|
from weboob.capabilities.recipe import ICapRecipe, Recipe
|
||||||
from weboob.tools.backend import BaseBackend
|
from weboob.tools.backend import BaseBackend
|
||||||
|
|
||||||
from .browser import SevenFiftyGramsBrowser
|
from .browser import SevenFiftyGramsBrowser
|
||||||
|
|
@ -46,16 +46,16 @@ class SevenFiftyGramsBackend(BaseBackend, ICapRecipe):
|
||||||
def fill_recipe(self, recipe, fields):
|
def fill_recipe(self, recipe, fields):
|
||||||
if 'nb_person' in fields or 'instructions' in fields:
|
if 'nb_person' in fields or 'instructions' in fields:
|
||||||
rec = self.get_recipe(recipe.id)
|
rec = self.get_recipe(recipe.id)
|
||||||
recipe.picture_url = rec.picture_url
|
recipe.picture_url = rec.picture_url
|
||||||
recipe.instructions = rec.instructions
|
recipe.instructions = rec.instructions
|
||||||
recipe.ingredients = rec.ingredients
|
recipe.ingredients = rec.ingredients
|
||||||
recipe.comments = rec.comments
|
recipe.comments = rec.comments
|
||||||
recipe.nb_person = rec.nb_person
|
recipe.nb_person = rec.nb_person
|
||||||
recipe.cooking_time = rec.cooking_time
|
recipe.cooking_time = rec.cooking_time
|
||||||
recipe.preparation_time = rec.preparation_time
|
recipe.preparation_time = rec.preparation_time
|
||||||
|
|
||||||
return recipe
|
return recipe
|
||||||
|
|
||||||
OBJECTS = {
|
OBJECTS = {
|
||||||
Recipe:fill_recipe,
|
Recipe: fill_recipe,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,10 +34,10 @@ class SevenFiftyGramsBrowser(BaseBrowser):
|
||||||
PAGES = {
|
PAGES = {
|
||||||
'http://www.750g.com/recettes_.*.htm': ResultsPage,
|
'http://www.750g.com/recettes_.*.htm': ResultsPage,
|
||||||
'http://www.750g.com/fiche_de_cuisine_complete.htm\?recettes_id=[0-9]*': RecipePage,
|
'http://www.750g.com/fiche_de_cuisine_complete.htm\?recettes_id=[0-9]*': RecipePage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_recipes(self, pattern):
|
def iter_recipes(self, pattern):
|
||||||
self.location('http://www.750g.com/recettes_%s.htm' % (pattern.replace(' ','_')))
|
self.location('http://www.750g.com/recettes_%s.htm' % (pattern.replace(' ', '_')))
|
||||||
assert self.is_on_page(ResultsPage)
|
assert self.is_on_page(ResultsPage)
|
||||||
return self.page.iter_recipes()
|
return self.page.iter_recipes()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,35 +30,36 @@ class ResultsPage(BasePage):
|
||||||
""" Page which contains results as a list of recipies
|
""" Page which contains results as a list of recipies
|
||||||
"""
|
"""
|
||||||
def iter_recipes(self):
|
def iter_recipes(self):
|
||||||
for div in self.parser.select(self.document.getroot(),'div.recette_description > div.data'):
|
for div in self.parser.select(self.document.getroot(), 'div.recette_description > div.data'):
|
||||||
links = self.parser.select(div,'div.info > p.title > a.fn')
|
links = self.parser.select(div, 'div.info > p.title > a.fn')
|
||||||
if len(links) > 0:
|
if len(links) > 0:
|
||||||
link = links[0]
|
link = links[0]
|
||||||
title = unicode(link.text)
|
title = unicode(link.text)
|
||||||
#id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm'))
|
# id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm'))
|
||||||
id = unicode(self.parser.select(div,'div.carnet-add a',1).attrib.get('href','').split('=')[-1])
|
id = unicode(self.parser.select(div, 'div.carnet-add a', 1).attrib.get('href', '').split('=')[-1])
|
||||||
thumbnail_url = NotAvailable
|
thumbnail_url = NotAvailable
|
||||||
short_description = NotAvailable
|
short_description = NotAvailable
|
||||||
|
|
||||||
imgs = self.parser.select(div,'img.recipe-image')
|
imgs = self.parser.select(div, 'img.recipe-image')
|
||||||
if len(imgs) > 0:
|
if len(imgs) > 0:
|
||||||
thumbnail_url = unicode(imgs[0].attrib.get('src',''))
|
thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
|
||||||
short_description = unicode(' '.join(self.parser.select(div,'div.infos_column',1).text_content().split()).strip())
|
short_description = unicode(' '.join(self.parser.select(
|
||||||
imgs_cost = self.parser.select(div,'div.infos_column img')
|
div, 'div.infos_column', 1).text_content().split()).strip())
|
||||||
|
imgs_cost = self.parser.select(div, 'div.infos_column img')
|
||||||
cost_tot = len(imgs_cost)
|
cost_tot = len(imgs_cost)
|
||||||
cost_on = 0
|
cost_on = 0
|
||||||
for img in imgs_cost:
|
for img in imgs_cost:
|
||||||
if img.attrib.get('src','').endswith('euro_on.png'):
|
if img.attrib.get('src', '').endswith('euro_on.png'):
|
||||||
cost_on += 1
|
cost_on += 1
|
||||||
short_description += u' %s/%s'%(cost_on,cost_tot)
|
short_description += u' %s/%s' % (cost_on, cost_tot)
|
||||||
|
|
||||||
recipe = Recipe(id,title)
|
recipe = Recipe(id, title)
|
||||||
recipe.thumbnail_url = thumbnail_url
|
recipe.thumbnail_url = thumbnail_url
|
||||||
recipe.short_description= short_description
|
recipe.short_description = short_description
|
||||||
recipe.instructions = NotLoaded
|
recipe.instructions = NotLoaded
|
||||||
recipe.ingredients = NotLoaded
|
recipe.ingredients = NotLoaded
|
||||||
recipe.nb_person = NotLoaded
|
recipe.nb_person = NotLoaded
|
||||||
recipe.cooking_time = NotLoaded
|
recipe.cooking_time = NotLoaded
|
||||||
recipe.preparation_time = NotLoaded
|
recipe.preparation_time = NotLoaded
|
||||||
yield recipe
|
yield recipe
|
||||||
|
|
||||||
|
|
@ -76,10 +77,10 @@ class RecipePage(BasePage):
|
||||||
instructions = NotAvailable
|
instructions = NotAvailable
|
||||||
comments = []
|
comments = []
|
||||||
|
|
||||||
title = unicode(self.parser.select(self.document.getroot(),'head > title',1).text.split(' - ')[1])
|
title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1])
|
||||||
main = self.parser.select(self.document.getroot(),'div.recette_description',1)
|
main = self.parser.select(self.document.getroot(), 'div.recette_description', 1)
|
||||||
|
|
||||||
rec_infos = self.parser.select(self.document.getroot(),'div.recette_infos div.infos_column strong')
|
rec_infos = self.parser.select(self.document.getroot(), 'div.recette_infos div.infos_column strong')
|
||||||
for info_title in rec_infos:
|
for info_title in rec_infos:
|
||||||
if u'Temps de préparation' in unicode(info_title.text):
|
if u'Temps de préparation' in unicode(info_title.text):
|
||||||
if info_title.tail.strip() != '':
|
if info_title.tail.strip() != '':
|
||||||
|
|
@ -96,31 +97,31 @@ class RecipePage(BasePage):
|
||||||
nb_person = int(info_title.tail)
|
nb_person = int(info_title.tail)
|
||||||
|
|
||||||
ingredients = []
|
ingredients = []
|
||||||
p_ing = self.parser.select(main,'div.data.top.left > div.content p')
|
p_ing = self.parser.select(main, 'div.data.top.left > div.content p')
|
||||||
for ing in p_ing:
|
for ing in p_ing:
|
||||||
ingtxt = unicode(ing.text_content().strip())
|
ingtxt = unicode(ing.text_content().strip())
|
||||||
if ingtxt != '':
|
if ingtxt != '':
|
||||||
ingredients.append(ingtxt)
|
ingredients.append(ingtxt)
|
||||||
|
|
||||||
lines_instr = self.parser.select(main,'div.data.top.right div.content li')
|
lines_instr = self.parser.select(main, 'div.data.top.right div.content li')
|
||||||
if len(lines_instr) > 0:
|
if len(lines_instr) > 0:
|
||||||
instructions = u''
|
instructions = u''
|
||||||
for line in lines_instr:
|
for line in lines_instr:
|
||||||
inst = ' '.join(line.text_content().strip().split())
|
inst = ' '.join(line.text_content().strip().split())
|
||||||
instructions += '%s\n'% inst
|
instructions += '%s\n' % inst
|
||||||
instructions = instructions.strip('\n')
|
instructions = instructions.strip('\n')
|
||||||
|
|
||||||
imgillu = self.parser.select(self.document.getroot(),'div.resume_recette_illustree img.photo')
|
imgillu = self.parser.select(self.document.getroot(), 'div.resume_recette_illustree img.photo')
|
||||||
if len(imgillu) > 0:
|
if len(imgillu) > 0:
|
||||||
picture_url = unicode(imgillu[0].attrib.get('src',''))
|
picture_url = unicode(imgillu[0].attrib.get('src', ''))
|
||||||
|
|
||||||
for divcom in self.parser.select(self.document.getroot(),'div.comment-outer'):
|
for divcom in self.parser.select(self.document.getroot(), 'div.comment-outer'):
|
||||||
comtxt = unicode(' '.join(divcom.text_content().strip().split()))
|
comtxt = unicode(' '.join(divcom.text_content().strip().split()))
|
||||||
if u'| Répondre' in comtxt:
|
if u'| Répondre' in comtxt:
|
||||||
comtxt = comtxt.strip('0123456789').replace(u' | Répondre','')
|
comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')
|
||||||
comments.append(comtxt)
|
comments.append(comtxt)
|
||||||
|
|
||||||
recipe = Recipe(id,title)
|
recipe = Recipe(id, title)
|
||||||
recipe.preparation_time = preparation_time
|
recipe.preparation_time = preparation_time
|
||||||
recipe.cooking_time = cooking_time
|
recipe.cooking_time = cooking_time
|
||||||
recipe.nb_person = nb_person
|
recipe.nb_person = nb_person
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from weboob.capabilities.subtitle import ICapSubtitle,LanguageNotSupported
|
from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported
|
||||||
from weboob.tools.backend import BaseBackend
|
from weboob.tools.backend import BaseBackend
|
||||||
|
|
||||||
from .browser import AttilasubBrowser
|
from .browser import AttilasubBrowser
|
||||||
|
|
@ -53,4 +53,4 @@ class AttilasubBackend(BaseBackend, ICapSubtitle):
|
||||||
def iter_subtitles(self, language, pattern):
|
def iter_subtitles(self, language, pattern):
|
||||||
if language not in self.LANGUAGE_LIST:
|
if language not in self.LANGUAGE_LIST:
|
||||||
raise LanguageNotSupported()
|
raise LanguageNotSupported()
|
||||||
return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8')))
|
return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8')))
|
||||||
|
|
|
||||||
|
|
@ -34,12 +34,13 @@ class AttilasubBrowser(BaseBrowser):
|
||||||
PAGES = {
|
PAGES = {
|
||||||
'http://search.freefind.com/find.html.*': SearchPage,
|
'http://search.freefind.com/find.html.*': SearchPage,
|
||||||
'http://davidbillemont3.free.fr/.*.htm': SubtitlesPage,
|
'http://davidbillemont3.free.fr/.*.htm': SubtitlesPage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_subtitles(self, language, pattern):
|
def iter_subtitles(self, language, pattern):
|
||||||
self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' % pattern.encode('utf-8'))
|
self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' %
|
||||||
|
pattern.encode('utf-8'))
|
||||||
assert self.is_on_page(SearchPage)
|
assert self.is_on_page(SearchPage)
|
||||||
return self.page.iter_subtitles(language,pattern)
|
return self.page.iter_subtitles(language, pattern)
|
||||||
|
|
||||||
def get_subtitle(self, id):
|
def get_subtitle(self, id):
|
||||||
url_end = id.split('|')[0]
|
url_end = id.split('|')[0]
|
||||||
|
|
|
||||||
|
|
@ -23,16 +23,16 @@ from weboob.capabilities.base import NotAvailable
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['SubtitlesPage','SearchPage']
|
__all__ = ['SubtitlesPage', 'SearchPage']
|
||||||
|
|
||||||
|
|
||||||
class SearchPage(BasePage):
|
class SearchPage(BasePage):
|
||||||
def iter_subtitles(self, language, pattern):
|
def iter_subtitles(self, language, pattern):
|
||||||
fontresult = self.parser.select(self.document.getroot(),'div.search-results font.search-results')
|
fontresult = self.parser.select(self.document.getroot(), 'div.search-results font.search-results')
|
||||||
# for each result in freefind, explore the subtitle list page to iter subtitles
|
# for each result in freefind, explore the subtitle list page to iter subtitles
|
||||||
for res in fontresult:
|
for res in fontresult:
|
||||||
a = self.parser.select(res,'a',1)
|
a = self.parser.select(res, 'a', 1)
|
||||||
url = a.attrib.get('href','')
|
url = a.attrib.get('href', '')
|
||||||
self.browser.location(url)
|
self.browser.location(url)
|
||||||
assert self.browser.is_on_page(SubtitlesPage)
|
assert self.browser.is_on_page(SubtitlesPage)
|
||||||
# subtitles page does the job
|
# subtitles page does the job
|
||||||
|
|
@ -41,15 +41,15 @@ class SearchPage(BasePage):
|
||||||
|
|
||||||
|
|
||||||
class SubtitlesPage(BasePage):
|
class SubtitlesPage(BasePage):
|
||||||
def get_subtitle(self,id):
|
def get_subtitle(self, id):
|
||||||
href = id.split('|')[1]
|
href = id.split('|')[1]
|
||||||
# we have to find the 'tr' which contains the link to this address
|
# we have to find the 'tr' which contains the link to this address
|
||||||
a = self.parser.select(self.document.getroot(),'a[href="%s"]'%href,1)
|
a = self.parser.select(self.document.getroot(), 'a[href="%s"]' % href, 1)
|
||||||
line = a.getparent().getparent().getparent().getparent().getparent()
|
line = a.getparent().getparent().getparent().getparent().getparent()
|
||||||
cols = self.parser.select(line,'td')
|
cols = self.parser.select(line, 'td')
|
||||||
traduced_title = self.parser.select(cols[0],'font',1).text.lower()
|
traduced_title = self.parser.select(cols[0], 'font', 1).text.lower()
|
||||||
original_title = self.parser.select(cols[1],'font',1).text.lower()
|
original_title = self.parser.select(cols[1], 'font', 1).text.lower()
|
||||||
nb_cd = self.parser.select(cols[2],'font',1).text.strip()
|
nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
|
||||||
nb_cd = int(nb_cd.split()[0])
|
nb_cd = int(nb_cd.split()[0])
|
||||||
|
|
||||||
traduced_title_words = traduced_title.split()
|
traduced_title_words = traduced_title.split()
|
||||||
|
|
@ -59,30 +59,30 @@ class SubtitlesPage(BasePage):
|
||||||
traduced_title = " ".join(traduced_title_words)
|
traduced_title = " ".join(traduced_title_words)
|
||||||
original_title = " ".join(original_title_words)
|
original_title = " ".join(original_title_words)
|
||||||
|
|
||||||
name = unicode('%s (%s)'%(original_title,traduced_title))
|
name = unicode('%s (%s)' % (original_title, traduced_title))
|
||||||
url = unicode('http://davidbillemont3.free.fr/%s'%href)
|
url = unicode('http://davidbillemont3.free.fr/%s' % href)
|
||||||
subtitle = Subtitle(id,name)
|
subtitle = Subtitle(id, name)
|
||||||
subtitle.url = url
|
subtitle.url = url
|
||||||
subtitle.language = unicode('fr')
|
subtitle.language = unicode('fr')
|
||||||
subtitle.nb_cd = nb_cd
|
subtitle.nb_cd = nb_cd
|
||||||
subtitle.description = NotAvailable
|
subtitle.description = NotAvailable
|
||||||
return subtitle
|
return subtitle
|
||||||
|
|
||||||
def iter_subtitles(self,language, pattern):
|
def iter_subtitles(self, language, pattern):
|
||||||
pattern = pattern.strip().replace('+',' ').lower()
|
pattern = pattern.strip().replace('+', ' ').lower()
|
||||||
pattern_words = pattern.split()
|
pattern_words = pattern.split()
|
||||||
tab = self.parser.select(self.document.getroot(),'table[bordercolor="#B8C0B2"]')
|
tab = self.parser.select(self.document.getroot(), 'table[bordercolor="#B8C0B2"]')
|
||||||
if len(tab) == 0:
|
if len(tab) == 0:
|
||||||
tab = self.parser.select(self.document.getroot(),'table[bordercolordark="#B8C0B2"]')
|
tab = self.parser.select(self.document.getroot(), 'table[bordercolordark="#B8C0B2"]')
|
||||||
if len(tab) == 0:
|
if len(tab) == 0:
|
||||||
return
|
return
|
||||||
# some results of freefind point on useless pages
|
# some results of freefind point on useless pages
|
||||||
if tab[0].attrib.get('width','') != '100%':
|
if tab[0].attrib.get('width', '') != '100%':
|
||||||
return
|
return
|
||||||
for line in tab[0].getiterator('tr'):
|
for line in tab[0].getiterator('tr'):
|
||||||
cols = self.parser.select(line,'td')
|
cols = self.parser.select(line, 'td')
|
||||||
traduced_title = self.parser.select(cols[0],'font',1).text.lower()
|
traduced_title = self.parser.select(cols[0], 'font', 1).text.lower()
|
||||||
original_title = self.parser.select(cols[1],'font',1).text.lower()
|
original_title = self.parser.select(cols[1], 'font', 1).text.lower()
|
||||||
|
|
||||||
traduced_title_words = traduced_title.split()
|
traduced_title_words = traduced_title.split()
|
||||||
original_title_words = original_title.split()
|
original_title_words = original_title.split()
|
||||||
|
|
@ -98,13 +98,13 @@ class SubtitlesPage(BasePage):
|
||||||
traduced_title = " ".join(traduced_title_words)
|
traduced_title = " ".join(traduced_title_words)
|
||||||
original_title = " ".join(original_title_words)
|
original_title = " ".join(original_title_words)
|
||||||
|
|
||||||
nb_cd = self.parser.select(cols[2],'font',1).text.strip()
|
nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
|
||||||
nb_cd = int(nb_cd.strip(' CD'))
|
nb_cd = int(nb_cd.strip(' CD'))
|
||||||
name = unicode('%s (%s)'%(original_title,traduced_title))
|
name = unicode('%s (%s)' % (original_title, traduced_title))
|
||||||
href = self.parser.select(cols[3],'a',1).attrib.get('href','')
|
href = self.parser.select(cols[3], 'a', 1).attrib.get('href', '')
|
||||||
url = unicode('http://davidbillemont3.free.fr/%s'%href)
|
url = unicode('http://davidbillemont3.free.fr/%s' % href)
|
||||||
id = unicode('%s|%s'%(self.browser.geturl().split('/')[-1],href))
|
id = unicode('%s|%s' % (self.browser.geturl().split('/')[-1], href))
|
||||||
subtitle = Subtitle(id,name)
|
subtitle = Subtitle(id, name)
|
||||||
subtitle.url = url
|
subtitle.url = url
|
||||||
subtitle.language = unicode('fr')
|
subtitle.language = unicode('fr')
|
||||||
subtitle.nb_cd = nb_cd
|
subtitle.nb_cd = nb_cd
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ class AttilasubTest(BackendTest):
|
||||||
BACKEND = 'attilasub'
|
BACKEND = 'attilasub'
|
||||||
|
|
||||||
def test_subtitle(self):
|
def test_subtitle(self):
|
||||||
subtitles = list(self.backend.iter_subtitles('fr','spiderman'))
|
subtitles = list(self.backend.iter_subtitles('fr', 'spiderman'))
|
||||||
assert (len(subtitles) > 0)
|
assert (len(subtitles) > 0)
|
||||||
for subtitle in subtitles:
|
for subtitle in subtitles:
|
||||||
path, qs = urllib.splitquery(subtitle.url)
|
path, qs = urllib.splitquery(subtitle.url)
|
||||||
|
|
|
||||||
|
|
@ -67,25 +67,25 @@ class ImdbBackend(BaseBackend, ICapCinema):
|
||||||
return self.browser.get_person_biography(id)
|
return self.browser.get_person_biography(id)
|
||||||
|
|
||||||
def get_movie_releases(self, id, country=None):
|
def get_movie_releases(self, id, country=None):
|
||||||
return self.browser.get_movie_releases(id,country)
|
return self.browser.get_movie_releases(id, country)
|
||||||
|
|
||||||
def fill_person(self, person, fields):
|
def fill_person(self, person, fields):
|
||||||
if 'real_name' in fields or 'birth_place' in fields\
|
if 'real_name' in fields or 'birth_place' in fields\
|
||||||
or 'death_date' in fields or 'nationality' in fields\
|
or 'death_date' in fields or 'nationality' in fields\
|
||||||
or 'short_biography' in fields or 'roles' in fields\
|
or 'short_biography' in fields or 'roles' in fields\
|
||||||
or 'birth_date' in fields or 'thumbnail_url' in fields\
|
or 'birth_date' in fields or 'thumbnail_url' in fields\
|
||||||
or 'gender' in fields or fields is None:
|
or 'gender' in fields or fields is None:
|
||||||
per = self.get_person(person.id)
|
per = self.get_person(person.id)
|
||||||
person.real_name = per.real_name
|
person.real_name = per.real_name
|
||||||
person.birth_date = per.birth_date
|
person.birth_date = per.birth_date
|
||||||
person.death_date = per.death_date
|
person.death_date = per.death_date
|
||||||
person.birth_place = per.birth_place
|
person.birth_place = per.birth_place
|
||||||
person.gender = per.gender
|
person.gender = per.gender
|
||||||
person.nationality = per.nationality
|
person.nationality = per.nationality
|
||||||
person.short_biography = per.short_biography
|
person.short_biography = per.short_biography
|
||||||
person.short_description = per.short_description
|
person.short_description = per.short_description
|
||||||
person.roles = per.roles
|
person.roles = per.roles
|
||||||
person.thumbnail_url = per.thumbnail_url
|
person.thumbnail_url = per.thumbnail_url
|
||||||
|
|
||||||
if 'biography' in fields:
|
if 'biography' in fields:
|
||||||
person.biography = self.get_person_biography(person.id)
|
person.biography = self.get_person_biography(person.id)
|
||||||
|
|
@ -94,19 +94,19 @@ class ImdbBackend(BaseBackend, ICapCinema):
|
||||||
|
|
||||||
def fill_movie(self, movie, fields):
|
def fill_movie(self, movie, fields):
|
||||||
if 'other_titles' in fields or 'release_date' in fields\
|
if 'other_titles' in fields or 'release_date' in fields\
|
||||||
or 'duration' in fields or 'country' in fields\
|
or 'duration' in fields or 'country' in fields\
|
||||||
or 'roles' in fields or 'note' in fields\
|
or 'roles' in fields or 'note' in fields\
|
||||||
or 'thumbnail_url' in fields:
|
or 'thumbnail_url' in fields:
|
||||||
mov = self.get_movie(movie.id)
|
mov = self.get_movie(movie.id)
|
||||||
movie.other_titles = mov.other_titles
|
movie.other_titles = mov.other_titles
|
||||||
movie.release_date = mov.release_date
|
movie.release_date = mov.release_date
|
||||||
movie.duration = mov.duration
|
movie.duration = mov.duration
|
||||||
movie.pitch = mov.pitch
|
movie.pitch = mov.pitch
|
||||||
movie.country = mov.country
|
movie.country = mov.country
|
||||||
movie.note = mov.note
|
movie.note = mov.note
|
||||||
movie.roles = mov.roles
|
movie.roles = mov.roles
|
||||||
movie.short_description= mov.short_description
|
movie.short_description = mov.short_description
|
||||||
movie.thumbnail_url = mov.thumbnail_url
|
movie.thumbnail_url = mov.thumbnail_url
|
||||||
|
|
||||||
if 'all_release_dates' in fields:
|
if 'all_release_dates' in fields:
|
||||||
movie.all_release_dates = self.get_movie_releases(movie.id)
|
movie.all_release_dates = self.get_movie_releases(movie.id)
|
||||||
|
|
@ -114,6 +114,6 @@ class ImdbBackend(BaseBackend, ICapCinema):
|
||||||
return movie
|
return movie
|
||||||
|
|
||||||
OBJECTS = {
|
OBJECTS = {
|
||||||
Person:fill_person,
|
Person: fill_person,
|
||||||
Movie:fill_movie
|
Movie: fill_movie
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -42,53 +42,55 @@ class ImdbBrowser(BaseBrowser):
|
||||||
'http://www.imdb.com/name/nm[0-9]*/*': PersonPage,
|
'http://www.imdb.com/name/nm[0-9]*/*': PersonPage,
|
||||||
'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage,
|
'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage,
|
||||||
'http://www.imdb.com/name/nm[0-9]*/filmo.*': FilmographyPage,
|
'http://www.imdb.com/name/nm[0-9]*/filmo.*': FilmographyPage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_movies(self, pattern):
|
def iter_movies(self, pattern):
|
||||||
res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q=%s' % pattern.encode('utf-8'))
|
res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q=%s' % pattern.encode('utf-8'))
|
||||||
jres = json.loads(res)
|
jres = json.loads(res)
|
||||||
for cat in ['title_popular','title_exact','title_approx']:
|
for cat in ['title_popular', 'title_exact', 'title_approx']:
|
||||||
if cat in jres:
|
if cat in jres:
|
||||||
for m in jres[cat]:
|
for m in jres[cat]:
|
||||||
tdesc = unicode(m['title_description'])
|
tdesc = unicode(m['title_description'])
|
||||||
if '<a' in tdesc and '>' in tdesc:
|
if '<a' in tdesc and '>' in tdesc:
|
||||||
short_description = u'%s %s'%(tdesc.split('<')[0].strip(', '), tdesc.split('>')[1].split('<')[0])
|
short_description = u'%s %s' % (tdesc.split('<')[
|
||||||
|
0].strip(', '), tdesc.split('>')[1].split('<')[0])
|
||||||
else:
|
else:
|
||||||
short_description = tdesc.strip(', ')
|
short_description = tdesc.strip(', ')
|
||||||
movie = Movie(m['id'],latin2unicode(m['title']))
|
movie = Movie(m['id'], latin2unicode(m['title']))
|
||||||
movie.other_titles = NotLoaded
|
movie.other_titles = NotLoaded
|
||||||
movie.release_date = NotLoaded
|
movie.release_date = NotLoaded
|
||||||
movie.duration = NotLoaded
|
movie.duration = NotLoaded
|
||||||
movie.short_description = latin2unicode(short_description)
|
movie.short_description = latin2unicode(short_description)
|
||||||
movie.pitch = NotLoaded
|
movie.pitch = NotLoaded
|
||||||
movie.country = NotLoaded
|
movie.country = NotLoaded
|
||||||
movie.note = NotLoaded
|
movie.note = NotLoaded
|
||||||
movie.roles = NotLoaded
|
movie.roles = NotLoaded
|
||||||
movie.all_release_dates= NotLoaded
|
movie.all_release_dates = NotLoaded
|
||||||
movie.thumbnail_url = NotLoaded
|
movie.thumbnail_url = NotLoaded
|
||||||
yield movie
|
yield movie
|
||||||
|
|
||||||
def iter_persons(self, pattern):
|
def iter_persons(self, pattern):
|
||||||
res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&nm=on&q=%s' % pattern.encode('utf-8'))
|
res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&nm=on&q=%s' % pattern.encode('utf-8'))
|
||||||
jres = json.loads(res)
|
jres = json.loads(res)
|
||||||
for cat in ['name_popular','name_exact','name_approx']:
|
for cat in ['name_popular', 'name_exact', 'name_approx']:
|
||||||
if cat in jres:
|
if cat in jres:
|
||||||
for p in jres[cat]:
|
for p in jres[cat]:
|
||||||
person = Person(p['id'],latin2unicode(p['name']))
|
person = Person(p['id'], latin2unicode(p['name']))
|
||||||
person.real_name = NotLoaded
|
person.real_name = NotLoaded
|
||||||
person.birth_place = NotLoaded
|
person.birth_place = NotLoaded
|
||||||
person.birth_date = NotLoaded
|
person.birth_date = NotLoaded
|
||||||
person.death_date = NotLoaded
|
person.death_date = NotLoaded
|
||||||
person.gender = NotLoaded
|
person.gender = NotLoaded
|
||||||
person.nationality = NotLoaded
|
person.nationality = NotLoaded
|
||||||
person.short_biography= NotLoaded
|
person.short_biography = NotLoaded
|
||||||
person.short_description= latin2unicode(p['description'])
|
person.short_description = latin2unicode(p['description'])
|
||||||
person.roles = NotLoaded
|
person.roles = NotLoaded
|
||||||
person.thumbnail_url = NotLoaded
|
person.thumbnail_url = NotLoaded
|
||||||
yield person
|
yield person
|
||||||
|
|
||||||
def get_movie(self, id):
|
def get_movie(self, id):
|
||||||
res = self.readurl('http://imdbapi.org/?id=%s&type=json&plot=simple&episode=1&lang=en-US&aka=full&release=simple&business=0&tech=0' % id )
|
res = self.readurl(
|
||||||
|
'http://imdbapi.org/?id=%s&type=json&plot=simple&episode=1&lang=en-US&aka=full&release=simple&business=0&tech=0' % id)
|
||||||
if res is not None:
|
if res is not None:
|
||||||
jres = json.loads(res)
|
jres = json.loads(res)
|
||||||
else:
|
else:
|
||||||
|
|
@ -122,7 +124,7 @@ class ImdbBrowser(BaseBrowser):
|
||||||
if 'also_known_as' in jres:
|
if 'also_known_as' in jres:
|
||||||
for other_t in jres['also_known_as']:
|
for other_t in jres['also_known_as']:
|
||||||
if 'country' in other_t and 'title' in other_t:
|
if 'country' in other_t and 'title' in other_t:
|
||||||
other_titles.append('%s : %s' % (other_t['country'],htmlparser.unescape(other_t['title'])))
|
other_titles.append('%s : %s' % (other_t['country'], htmlparser.unescape(other_t['title'])))
|
||||||
if 'release_date' in jres:
|
if 'release_date' in jres:
|
||||||
dstr = str(jres['release_date'])
|
dstr = str(jres['release_date'])
|
||||||
year = int(dstr[:4])
|
year = int(dstr[:4])
|
||||||
|
|
@ -134,31 +136,31 @@ class ImdbBrowser(BaseBrowser):
|
||||||
day = int(dstr[-2:])
|
day = int(dstr[-2:])
|
||||||
if day == 0:
|
if day == 0:
|
||||||
day = 1
|
day = 1
|
||||||
release_date = datetime(year,month,day)
|
release_date = datetime(year, month, day)
|
||||||
if 'country' in jres:
|
if 'country' in jres:
|
||||||
country = u''
|
country = u''
|
||||||
for c in jres['country']:
|
for c in jres['country']:
|
||||||
country += '%s, '%c
|
country += '%s, ' % c
|
||||||
country = country[:-2]
|
country = country[:-2]
|
||||||
if 'plot_simple' in jres:
|
if 'plot_simple' in jres:
|
||||||
pitch = unicode(jres['plot_simple'])
|
pitch = unicode(jres['plot_simple'])
|
||||||
if 'rating' in jres and 'rating_count' in jres:
|
if 'rating' in jres and 'rating_count' in jres:
|
||||||
note = u'%s/10 (%s votes)'%(jres['rating'],jres['rating_count'])
|
note = u'%s/10 (%s votes)' % (jres['rating'], jres['rating_count'])
|
||||||
for r in ['actor','director','writer']:
|
for r in ['actor', 'director', 'writer']:
|
||||||
if '%ss'%r in jres:
|
if '%ss' % r in jres:
|
||||||
roles['%s'%r] = list(jres['%ss'%r])
|
roles['%s' % r] = list(jres['%ss' % r])
|
||||||
|
|
||||||
movie = Movie(id,title)
|
movie = Movie(id, title)
|
||||||
movie.other_titles = other_titles
|
movie.other_titles = other_titles
|
||||||
movie.release_date = release_date
|
movie.release_date = release_date
|
||||||
movie.duration = duration
|
movie.duration = duration
|
||||||
movie.pitch = pitch
|
movie.pitch = pitch
|
||||||
movie.country = country
|
movie.country = country
|
||||||
movie.note = note
|
movie.note = note
|
||||||
movie.roles = roles
|
movie.roles = roles
|
||||||
movie.short_description= short_description
|
movie.short_description = short_description
|
||||||
movie.all_release_dates= NotLoaded
|
movie.all_release_dates = NotLoaded
|
||||||
movie.thumbnail_url = thumbnail_url
|
movie.thumbnail_url = thumbnail_url
|
||||||
return movie
|
return movie
|
||||||
|
|
||||||
def get_person(self, id):
|
def get_person(self, id):
|
||||||
|
|
@ -175,7 +177,7 @@ class ImdbBrowser(BaseBrowser):
|
||||||
return self.page.get_biography()
|
return self.page.get_biography()
|
||||||
|
|
||||||
def iter_movie_persons(self, movie_id, role):
|
def iter_movie_persons(self, movie_id, role):
|
||||||
self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id)
|
self.location('http://www.imdb.com/title/%s/fullcredits' % movie_id)
|
||||||
assert self.is_on_page(MovieCrewPage)
|
assert self.is_on_page(MovieCrewPage)
|
||||||
for p in self.page.iter_persons(role):
|
for p in self.page.iter_persons(role):
|
||||||
yield p
|
yield p
|
||||||
|
|
@ -192,13 +194,13 @@ class ImdbBrowser(BaseBrowser):
|
||||||
yield movie
|
yield movie
|
||||||
|
|
||||||
def iter_movie_persons_ids(self, movie_id):
|
def iter_movie_persons_ids(self, movie_id):
|
||||||
self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id)
|
self.location('http://www.imdb.com/title/%s/fullcredits' % movie_id)
|
||||||
assert self.is_on_page(MovieCrewPage)
|
assert self.is_on_page(MovieCrewPage)
|
||||||
for person in self.page.iter_persons_ids():
|
for person in self.page.iter_persons_ids():
|
||||||
yield person
|
yield person
|
||||||
|
|
||||||
def get_movie_releases(self,id, country):
|
def get_movie_releases(self, id, country):
|
||||||
self.location('http://www.imdb.com/title/%s/releaseinfo'%id)
|
self.location('http://www.imdb.com/title/%s/releaseinfo' % id)
|
||||||
assert self.is_on_page(ReleasePage)
|
assert self.is_on_page(ReleasePage)
|
||||||
return self.page.get_movie_releases(country)
|
return self.page.get_movie_releases(country)
|
||||||
|
|
||||||
|
|
@ -222,5 +224,5 @@ dict_hex = {'á': u'á',
|
||||||
|
|
||||||
def latin2unicode(word):
|
def latin2unicode(word):
|
||||||
for key in dict_hex.keys():
|
for key in dict_hex.keys():
|
||||||
word = word.replace(key,dict_hex[key])
|
word = word.replace(key, dict_hex[key])
|
||||||
return unicode(word)
|
return unicode(word)
|
||||||
|
|
|
||||||
|
|
@ -25,28 +25,28 @@ from weboob.tools.browser import BasePage
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['PersonPage','MovieCrewPage','BiographyPage','FilmographyPage','ReleasePage']
|
__all__ = ['PersonPage', 'MovieCrewPage', 'BiographyPage', 'FilmographyPage', 'ReleasePage']
|
||||||
|
|
||||||
|
|
||||||
class ReleasePage(BasePage):
|
class ReleasePage(BasePage):
|
||||||
''' Page containing releases of a movie
|
''' Page containing releases of a movie
|
||||||
'''
|
'''
|
||||||
def get_movie_releases(self,country_filter):
|
def get_movie_releases(self, country_filter):
|
||||||
result = unicode()
|
result = unicode()
|
||||||
links = self.parser.select(self.document.getroot(),'b a')
|
links = self.parser.select(self.document.getroot(), 'b a')
|
||||||
for a in links:
|
for a in links:
|
||||||
href = a.attrib.get('href','')
|
href = a.attrib.get('href', '')
|
||||||
if href.strip('/').split('/')[0] == 'calendar' and\
|
if href.strip('/').split('/')[0] == 'calendar' and\
|
||||||
(country_filter is None or href.split('region=')[-1].lower() == country_filter):
|
(country_filter is None or href.split('region=')[-1].lower() == country_filter):
|
||||||
country = a.text
|
country = a.text
|
||||||
td_date = self.parser.select(a.getparent().getparent().getparent(),'td')[1]
|
td_date = self.parser.select(a.getparent().getparent().getparent(), 'td')[1]
|
||||||
date_links = self.parser.select(td_date,'a')
|
date_links = self.parser.select(td_date, 'a')
|
||||||
if len(date_links) > 1:
|
if len(date_links) > 1:
|
||||||
date = date_links[1].attrib.get('href','').strip('/').split('/')[-1]
|
date = date_links[1].attrib.get('href', '').strip('/').split('/')[-1]
|
||||||
date += '-'+date_links[0].attrib.get('href','').strip('/').split('/')[-1]
|
date += '-'+date_links[0].attrib.get('href', '').strip('/').split('/')[-1]
|
||||||
else:
|
else:
|
||||||
date = unicode(self.parser.select(a.getparent().getparent().getparent(),'td')[1].text_content())
|
date = unicode(self.parser.select(a.getparent().getparent().getparent(), 'td')[1].text_content())
|
||||||
result += '%s : %s\n' % (country,date)
|
result += '%s : %s\n' % (country, date)
|
||||||
if result == u'':
|
if result == u'':
|
||||||
result = NotAvailable
|
result = NotAvailable
|
||||||
else:
|
else:
|
||||||
|
|
@ -59,11 +59,11 @@ class BiographyPage(BasePage):
|
||||||
'''
|
'''
|
||||||
def get_biography(self):
|
def get_biography(self):
|
||||||
bio = unicode()
|
bio = unicode()
|
||||||
tn = self.parser.select(self.document.getroot(),'div#tn15content',1)
|
tn = self.parser.select(self.document.getroot(), 'div#tn15content', 1)
|
||||||
# we only read paragraphs, titles and links
|
# we only read paragraphs, titles and links
|
||||||
for ch in tn.getchildren():
|
for ch in tn.getchildren():
|
||||||
if ch.tag in ['p','h5','a']:
|
if ch.tag in ['p', 'h5', 'a']:
|
||||||
bio += '%s\n\n'%ch.text_content().strip()
|
bio += '%s\n\n' % ch.text_content().strip()
|
||||||
if bio == u'':
|
if bio == u'':
|
||||||
bio = NotAvailable
|
bio = NotAvailable
|
||||||
return bio
|
return bio
|
||||||
|
|
@ -74,52 +74,52 @@ class MovieCrewPage(BasePage):
|
||||||
'''
|
'''
|
||||||
def iter_persons(self, role_filter=None):
|
def iter_persons(self, role_filter=None):
|
||||||
if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
|
if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
|
||||||
tables = self.parser.select(self.document.getroot(),'table.cast')
|
tables = self.parser.select(self.document.getroot(), 'table.cast')
|
||||||
if len(tables) > 0:
|
if len(tables) > 0:
|
||||||
table = tables[0]
|
table = tables[0]
|
||||||
tds = self.parser.select(table,'td.nm')
|
tds = self.parser.select(table, 'td.nm')
|
||||||
for td in tds:
|
for td in tds:
|
||||||
id = td.find('a').attrib.get('href','').strip('/').split('/')[-1]
|
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
|
||||||
name = unicode(td.find('a').text)
|
name = unicode(td.find('a').text)
|
||||||
char_name = unicode(self.parser.select(td.getparent(),'td.char',1).text_content())
|
char_name = unicode(self.parser.select(td.getparent(), 'td.char', 1).text_content())
|
||||||
person = Person(id,name)
|
person = Person(id, name)
|
||||||
person.short_description = char_name
|
person.short_description = char_name
|
||||||
person.real_name = NotLoaded
|
person.real_name = NotLoaded
|
||||||
person.birth_place = NotLoaded
|
person.birth_place = NotLoaded
|
||||||
person.birth_date = NotLoaded
|
person.birth_date = NotLoaded
|
||||||
person.death_date = NotLoaded
|
person.death_date = NotLoaded
|
||||||
person.gender = NotLoaded
|
person.gender = NotLoaded
|
||||||
person.nationality = NotLoaded
|
person.nationality = NotLoaded
|
||||||
person.short_biography= NotLoaded
|
person.short_biography = NotLoaded
|
||||||
person.roles = NotLoaded
|
person.roles = NotLoaded
|
||||||
person.thumbnail_url = NotLoaded
|
person.thumbnail_url = NotLoaded
|
||||||
yield person
|
yield person
|
||||||
|
|
||||||
for gloss_link in self.parser.select(self.document.getroot(),'table[cellspacing=1] h5 a'):
|
for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing=1] h5 a'):
|
||||||
role = gloss_link.attrib.get('name','').rstrip('s')
|
role = gloss_link.attrib.get('name', '').rstrip('s')
|
||||||
if (role_filter is None or (role_filter is not None and role == role_filter)):
|
if (role_filter is None or (role_filter is not None and role == role_filter)):
|
||||||
tbody = gloss_link.getparent().getparent().getparent().getparent()
|
tbody = gloss_link.getparent().getparent().getparent().getparent()
|
||||||
for line in self.parser.select(tbody,'tr')[1:]:
|
for line in self.parser.select(tbody, 'tr')[1:]:
|
||||||
for a in self.parser.select(line,'a'):
|
for a in self.parser.select(line, 'a'):
|
||||||
role_detail = NotAvailable
|
role_detail = NotAvailable
|
||||||
href = a.attrib.get('href','')
|
href = a.attrib.get('href', '')
|
||||||
if '/name/nm' in href:
|
if '/name/nm' in href:
|
||||||
id = href.strip('/').split('/')[-1]
|
id = href.strip('/').split('/')[-1]
|
||||||
name = unicode(a.text)
|
name = unicode(a.text)
|
||||||
if 'glossary' in href:
|
if 'glossary' in href:
|
||||||
role_detail = unicode(a.text)
|
role_detail = unicode(a.text)
|
||||||
person = Person(id,name)
|
person = Person(id, name)
|
||||||
person.short_description = role_detail
|
person.short_description = role_detail
|
||||||
yield person
|
yield person
|
||||||
#yield self.browser.get_person(id)
|
# yield self.browser.get_person(id)
|
||||||
|
|
||||||
def iter_persons_ids(self):
|
def iter_persons_ids(self):
|
||||||
tables = self.parser.select(self.document.getroot(),'table.cast')
|
tables = self.parser.select(self.document.getroot(), 'table.cast')
|
||||||
if len(tables) > 0:
|
if len(tables) > 0:
|
||||||
table = tables[0]
|
table = tables[0]
|
||||||
tds = self.parser.select(table,'td.nm')
|
tds = self.parser.select(table, 'td.nm')
|
||||||
for td in tds:
|
for td in tds:
|
||||||
id = td.find('a').attrib.get('href','').strip('/').split('/')[-1]
|
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
|
||||||
yield id
|
yield id
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -127,7 +127,7 @@ class PersonPage(BasePage):
|
||||||
''' Page giving informations about a person
|
''' Page giving informations about a person
|
||||||
It is used to build a Person instance and to get the movie list related to a person
|
It is used to build a Person instance and to get the movie list related to a person
|
||||||
'''
|
'''
|
||||||
def get_person(self,id):
|
def get_person(self, id):
|
||||||
name = NotAvailable
|
name = NotAvailable
|
||||||
short_biography = NotAvailable
|
short_biography = NotAvailable
|
||||||
short_description = NotAvailable
|
short_description = NotAvailable
|
||||||
|
|
@ -139,52 +139,52 @@ class PersonPage(BasePage):
|
||||||
thumbnail_url = NotAvailable
|
thumbnail_url = NotAvailable
|
||||||
roles = {}
|
roles = {}
|
||||||
nationality = NotAvailable
|
nationality = NotAvailable
|
||||||
td_overview = self.parser.select(self.document.getroot(),'td#overview-top',1)
|
td_overview = self.parser.select(self.document.getroot(), 'td#overview-top', 1)
|
||||||
descs = self.parser.select(td_overview,'span[itemprop=description]')
|
descs = self.parser.select(td_overview, 'span[itemprop=description]')
|
||||||
if len(descs) > 0:
|
if len(descs) > 0:
|
||||||
short_biography = unicode(descs[0].text)
|
short_biography = unicode(descs[0].text)
|
||||||
rname_block = self.parser.select(td_overview,'div.txt-block h4.inline')
|
rname_block = self.parser.select(td_overview, 'div.txt-block h4.inline')
|
||||||
if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
|
if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
|
||||||
links = self.parser.select(rname_block[0].getparent(),'a')
|
links = self.parser.select(rname_block[0].getparent(), 'a')
|
||||||
for a in links:
|
for a in links:
|
||||||
href = a.attrib.get('href','').strip()
|
href = a.attrib.get('href', '').strip()
|
||||||
if href == 'bio':
|
if href == 'bio':
|
||||||
real_name = unicode(a.text.strip())
|
real_name = unicode(a.text.strip())
|
||||||
elif 'birth_place' in href:
|
elif 'birth_place' in href:
|
||||||
birth_place = unicode(a.text.lower().strip())
|
birth_place = unicode(a.text.lower().strip())
|
||||||
names = self.parser.select(td_overview,'h1[itemprop=name]')
|
names = self.parser.select(td_overview, 'h1[itemprop=name]')
|
||||||
if len(names) > 0:
|
if len(names) > 0:
|
||||||
name = unicode(names[0].text.strip())
|
name = unicode(names[0].text.strip())
|
||||||
times = self.parser.select(td_overview,'time[itemprop=birthDate]')
|
times = self.parser.select(td_overview, 'time[itemprop=birthDate]')
|
||||||
if len(times) > 0:
|
if len(times) > 0:
|
||||||
time = times[0].attrib.get('datetime','').split('-')
|
time = times[0].attrib.get('datetime', '').split('-')
|
||||||
if len(time) == 3 and int(time[0]) >= 1900:
|
if len(time) == 3 and int(time[0]) >= 1900:
|
||||||
birth_date = datetime(int(time[0]),int(time[1]),int(time[2]))
|
birth_date = datetime(int(time[0]), int(time[1]), int(time[2]))
|
||||||
dtimes = self.parser.select(td_overview,'time[itemprop=deathDate]')
|
dtimes = self.parser.select(td_overview, 'time[itemprop=deathDate]')
|
||||||
if len(dtimes) > 0:
|
if len(dtimes) > 0:
|
||||||
dtime = dtimes[0].attrib.get('datetime','').split('-')
|
dtime = dtimes[0].attrib.get('datetime', '').split('-')
|
||||||
if len(dtime) == 3 and int(dtime[0]) >= 1900:
|
if len(dtime) == 3 and int(dtime[0]) >= 1900:
|
||||||
death_date = datetime(int(dtime[0]),int(dtime[1]),int(dtime[2]))
|
death_date = datetime(int(dtime[0]), int(dtime[1]), int(dtime[2]))
|
||||||
img_thumbnail = self.parser.select(self.document.getroot(),'td#img_primary img')
|
img_thumbnail = self.parser.select(self.document.getroot(), 'td#img_primary img')
|
||||||
if len(img_thumbnail) > 0:
|
if len(img_thumbnail) > 0:
|
||||||
thumbnail_url = unicode(img_thumbnail[0].attrib.get('src',''))
|
thumbnail_url = unicode(img_thumbnail[0].attrib.get('src', ''))
|
||||||
|
|
||||||
# go to the filmography page
|
# go to the filmography page
|
||||||
self.browser.location('http://www.imdb.com/name/%s/filmotype'%id)
|
self.browser.location('http://www.imdb.com/name/%s/filmotype' % id)
|
||||||
assert self.browser.is_on_page(FilmographyPage)
|
assert self.browser.is_on_page(FilmographyPage)
|
||||||
roles = self.browser.page.get_roles()
|
roles = self.browser.page.get_roles()
|
||||||
|
|
||||||
person = Person(id,name)
|
person = Person(id, name)
|
||||||
person.real_name = real_name
|
person.real_name = real_name
|
||||||
person.birth_date = birth_date
|
person.birth_date = birth_date
|
||||||
person.death_date = death_date
|
person.death_date = death_date
|
||||||
person.birth_place = birth_place
|
person.birth_place = birth_place
|
||||||
person.gender = gender
|
person.gender = gender
|
||||||
person.nationality = nationality
|
person.nationality = nationality
|
||||||
person.short_biography = short_biography
|
person.short_biography = short_biography
|
||||||
person.short_description = short_description
|
person.short_description = short_description
|
||||||
person.roles = roles
|
person.roles = roles
|
||||||
person.thumbnail_url = thumbnail_url
|
person.thumbnail_url = thumbnail_url
|
||||||
return person
|
return person
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -193,39 +193,39 @@ class FilmographyPage(BasePage):
|
||||||
This page is easier to parse than the main person page filmography
|
This page is easier to parse than the main person page filmography
|
||||||
'''
|
'''
|
||||||
def iter_movies_ids(self):
|
def iter_movies_ids(self):
|
||||||
for role_div in self.parser.select(self.document.getroot(),'div.filmo'):
|
for role_div in self.parser.select(self.document.getroot(), 'div.filmo'):
|
||||||
for a in self.parser.select(role_div,'ol > li > a'):
|
for a in self.parser.select(role_div, 'ol > li > a'):
|
||||||
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
id = a.attrib.get('href', '').strip('/').split('/')[-1]
|
||||||
if id.startswith('tt'):
|
if id.startswith('tt'):
|
||||||
yield id
|
yield id
|
||||||
|
|
||||||
def get_roles(self):
|
def get_roles(self):
|
||||||
roles = {}
|
roles = {}
|
||||||
for role_div in self.parser.select(self.document.getroot(),'div.filmo'):
|
for role_div in self.parser.select(self.document.getroot(), 'div.filmo'):
|
||||||
role = self.parser.select(role_div,'h5 a',1).text.replace(':','')
|
role = self.parser.select(role_div, 'h5 a', 1).text.replace(':', '')
|
||||||
roles[role] = []
|
roles[role] = []
|
||||||
for a in self.parser.select(role_div,'ol > li > a'):
|
for a in self.parser.select(role_div, 'ol > li > a'):
|
||||||
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
id = a.attrib.get('href', '').strip('/').split('/')[-1]
|
||||||
if id.startswith('tt'):
|
if id.startswith('tt'):
|
||||||
if '(' in a.tail and ')' in a.tail:
|
if '(' in a.tail and ')' in a.tail:
|
||||||
between_p = a.tail.split(')')[0].split('(')[1]
|
between_p = a.tail.split(')')[0].split('(')[1]
|
||||||
else:
|
else:
|
||||||
between_p = '????'
|
between_p = '????'
|
||||||
roles[role].append('(%s) %s'%(between_p,a.text))
|
roles[role].append('(%s) %s' % (between_p, a.text))
|
||||||
return roles
|
return roles
|
||||||
|
|
||||||
def iter_movies(self, role_filter=None):
|
def iter_movies(self, role_filter=None):
|
||||||
for role_div in self.parser.select(self.document.getroot(),'div.filmo'):
|
for role_div in self.parser.select(self.document.getroot(), 'div.filmo'):
|
||||||
role = self.parser.select(role_div,'h5 a',1).text.replace(':','')
|
role = self.parser.select(role_div, 'h5 a', 1).text.replace(':', '')
|
||||||
if (role_filter is None or (role_filter is not None and role.lower().strip() == role_filter))\
|
if (role_filter is None or (role_filter is not None and role.lower().strip() == role_filter))\
|
||||||
and role != 'In Development':
|
and role != 'In Development':
|
||||||
for a in self.parser.select(role_div,'ol > li > a'):
|
for a in self.parser.select(role_div, 'ol > li > a'):
|
||||||
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
id = a.attrib.get('href', '').strip('/').split('/')[-1]
|
||||||
if id.startswith('tt'):
|
if id.startswith('tt'):
|
||||||
title = unicode(a.text)
|
title = unicode(a.text)
|
||||||
role_detail = NotAvailable
|
role_detail = NotAvailable
|
||||||
if len(a.tail) > 0:
|
if len(a.tail) > 0:
|
||||||
role_detail = unicode(' '.join(a.tail.replace('..','').split()))
|
role_detail = unicode(' '.join(a.tail.replace('..', '').split()))
|
||||||
movie = Movie(id,title)
|
movie = Movie(id, title)
|
||||||
movie.short_description = role_detail
|
movie.short_description = role_detail
|
||||||
yield movie
|
yield movie
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ class IsohuntBackend(BaseBackend, ICapTorrent):
|
||||||
return self.browser.openurl(torrent.url.encode('utf-8')).read()
|
return self.browser.openurl(torrent.url.encode('utf-8')).read()
|
||||||
|
|
||||||
def iter_torrents(self, pattern):
|
def iter_torrents(self, pattern):
|
||||||
return self.browser.iter_torrents(pattern.replace(' ','+'))
|
return self.browser.iter_torrents(pattern.replace(' ', '+'))
|
||||||
|
|
||||||
def fill_torrent(self, torrent, fields):
|
def fill_torrent(self, torrent, fields):
|
||||||
if 'description' in fields or 'files' in fields:
|
if 'description' in fields or 'files' in fields:
|
||||||
|
|
@ -61,5 +61,5 @@ class IsohuntBackend(BaseBackend, ICapTorrent):
|
||||||
return torrent
|
return torrent
|
||||||
|
|
||||||
OBJECTS = {
|
OBJECTS = {
|
||||||
Torrent:fill_torrent
|
Torrent: fill_torrent
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -32,9 +32,9 @@ class IsohuntBrowser(BaseBrowser):
|
||||||
ENCODING = 'utf-8'
|
ENCODING = 'utf-8'
|
||||||
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
||||||
PAGES = {
|
PAGES = {
|
||||||
'https://isohunt.com/torrents/.*iht=-1&ihp=1&ihs1=1&iho1=d' : TorrentsPage,
|
'https://isohunt.com/torrents/.*iht=-1&ihp=1&ihs1=1&iho1=d': TorrentsPage,
|
||||||
'https://isohunt.com/torrent_details.*tab=summary' : TorrentPage,
|
'https://isohunt.com/torrent_details.*tab=summary': TorrentPage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def home(self):
|
def home(self):
|
||||||
return self.location('https://isohunt.com')
|
return self.location('https://isohunt.com')
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,8 @@ class TorrentPage(BasePage):
|
||||||
title = NotAvailable
|
title = NotAvailable
|
||||||
size = NotAvailable
|
size = NotAvailable
|
||||||
url = 'https://isohunt.com/download/%s/%s.torrent' % (id, id)
|
url = 'https://isohunt.com/download/%s/%s.torrent' % (id, id)
|
||||||
title = unicode(self.parser.select(self.document.getroot(),'head > meta[name=title]',1).attrib.get('content',''))
|
title = unicode(self.parser.select(
|
||||||
|
self.document.getroot(), 'head > meta[name=title]', 1).attrib.get('content', ''))
|
||||||
seed = NotAvailable
|
seed = NotAvailable
|
||||||
leech = NotAvailable
|
leech = NotAvailable
|
||||||
tip_id = "none"
|
tip_id = "none"
|
||||||
|
|
|
||||||
|
|
@ -72,5 +72,5 @@ class KickassBackend(BaseBackend, ICapTorrent):
|
||||||
return torrent
|
return torrent
|
||||||
|
|
||||||
OBJECTS = {
|
OBJECTS = {
|
||||||
Torrent:fill_torrent
|
Torrent: fill_torrent
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ class KickassBrowser(BaseBrowser):
|
||||||
PAGES = {
|
PAGES = {
|
||||||
'http://kat.ph/usearch/.*field=seeders&sorder=desc': TorrentsPage,
|
'http://kat.ph/usearch/.*field=seeders&sorder=desc': TorrentsPage,
|
||||||
'http://kat.ph/.*.html': TorrentPage,
|
'http://kat.ph/.*.html': TorrentPage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def home(self):
|
def home(self):
|
||||||
return self.location('http://kat.ph')
|
return self.location('http://kat.ph')
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ from weboob.tools.browser import BasePage
|
||||||
from weboob.tools.misc import get_bytes_size
|
from weboob.tools.misc import get_bytes_size
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['TorrentsPage','TorrentPage']
|
__all__ = ['TorrentsPage', 'TorrentPage']
|
||||||
|
|
||||||
|
|
||||||
class TorrentsPage(BasePage):
|
class TorrentsPage(BasePage):
|
||||||
|
|
@ -53,14 +53,14 @@ class TorrentsPage(BasePage):
|
||||||
.replace('.html', '')
|
.replace('.html', '')
|
||||||
|
|
||||||
# look for url
|
# look for url
|
||||||
for a in self.parser.select(tr,'div.iaconbox a'):
|
for a in self.parser.select(tr, 'div.iaconbox a'):
|
||||||
href = a.attrib.get('href', '')
|
href = a.attrib.get('href', '')
|
||||||
if href.startswith('magnet'):
|
if href.startswith('magnet'):
|
||||||
magnet = unicode(href)
|
magnet = unicode(href)
|
||||||
elif href.startswith('http'):
|
elif href.startswith('http'):
|
||||||
url = unicode(href)
|
url = unicode(href)
|
||||||
elif href.startswith('//'):
|
elif href.startswith('//'):
|
||||||
url = u'http:%s'%href
|
url = u'http:%s' % href
|
||||||
|
|
||||||
size = tr.getchildren()[1].text
|
size = tr.getchildren()[1].text
|
||||||
u = tr.getchildren()[1].getchildren()[0].text
|
u = tr.getchildren()[1].getchildren()[0].text
|
||||||
|
|
@ -107,16 +107,16 @@ class TorrentPage(BasePage):
|
||||||
leech = 0
|
leech = 0
|
||||||
|
|
||||||
title = self.parser.select(self.document.getroot(),
|
title = self.parser.select(self.document.getroot(),
|
||||||
'h1.torrentName span', 1)
|
'h1.torrentName span', 1)
|
||||||
title = unicode(title.text)
|
title = unicode(title.text)
|
||||||
|
|
||||||
for a in self.parser.select(self.document.getroot(),
|
for a in self.parser.select(self.document.getroot(),
|
||||||
'div.downloadButtonGroup a'):
|
'div.downloadButtonGroup a'):
|
||||||
href = a.attrib.get('href', '')
|
href = a.attrib.get('href', '')
|
||||||
if href.startswith('magnet'):
|
if href.startswith('magnet'):
|
||||||
magnet = unicode(href)
|
magnet = unicode(href)
|
||||||
elif href.startswith('//'):
|
elif href.startswith('//'):
|
||||||
url = u'http:%s'%href
|
url = u'http:%s' % href
|
||||||
elif href.startswith('http'):
|
elif href.startswith('http'):
|
||||||
url = unicode(href)
|
url = unicode(href)
|
||||||
|
|
||||||
|
|
@ -127,7 +127,7 @@ class TorrentPage(BasePage):
|
||||||
# is enough to know if this is the right span
|
# is enough to know if this is the right span
|
||||||
if (span.attrib.get('class', '') == 'folder'
|
if (span.attrib.get('class', '') == 'folder'
|
||||||
or span.attrib.get('class', '') == 'folderopen') \
|
or span.attrib.get('class', '') == 'folderopen') \
|
||||||
and len(span.getchildren()) > 2:
|
and len(span.getchildren()) > 2:
|
||||||
size = span.getchildren()[1].tail
|
size = span.getchildren()[1].tail
|
||||||
u = span.getchildren()[2].text
|
u = span.getchildren()[2].text
|
||||||
size = float(size.split(': ')[1].replace(',', '.'))
|
size = float(size.split(': ')[1].replace(',', '.'))
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from weboob.capabilities.recipe import ICapRecipe,Recipe
|
from weboob.capabilities.recipe import ICapRecipe, Recipe
|
||||||
from weboob.tools.backend import BaseBackend
|
from weboob.tools.backend import BaseBackend
|
||||||
|
|
||||||
from .browser import MarmitonBrowser
|
from .browser import MarmitonBrowser
|
||||||
|
|
@ -48,16 +48,16 @@ class MarmitonBackend(BaseBackend, ICapRecipe):
|
||||||
def fill_recipe(self, recipe, fields):
|
def fill_recipe(self, recipe, fields):
|
||||||
if 'nb_person' in fields or 'instructions' in fields:
|
if 'nb_person' in fields or 'instructions' in fields:
|
||||||
rec = self.get_recipe(recipe.id)
|
rec = self.get_recipe(recipe.id)
|
||||||
recipe.picture_url = rec.picture_url
|
recipe.picture_url = rec.picture_url
|
||||||
recipe.instructions = rec.instructions
|
recipe.instructions = rec.instructions
|
||||||
recipe.ingredients = rec.ingredients
|
recipe.ingredients = rec.ingredients
|
||||||
recipe.comments = rec.comments
|
recipe.comments = rec.comments
|
||||||
recipe.nb_person = rec.nb_person
|
recipe.nb_person = rec.nb_person
|
||||||
recipe.cooking_time = rec.cooking_time
|
recipe.cooking_time = rec.cooking_time
|
||||||
recipe.preparation_time = rec.preparation_time
|
recipe.preparation_time = rec.preparation_time
|
||||||
|
|
||||||
return recipe
|
return recipe
|
||||||
|
|
||||||
OBJECTS = {
|
OBJECTS = {
|
||||||
Recipe:fill_recipe,
|
Recipe: fill_recipe,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ class MarmitonBrowser(BaseBrowser):
|
||||||
PAGES = {
|
PAGES = {
|
||||||
'http://www.marmiton.org/recettes/recherche.aspx.*': ResultsPage,
|
'http://www.marmiton.org/recettes/recherche.aspx.*': ResultsPage,
|
||||||
'http://www.marmiton.org/recettes/recette_.*': RecipePage,
|
'http://www.marmiton.org/recettes/recette_.*': RecipePage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_recipes(self, pattern):
|
def iter_recipes(self, pattern):
|
||||||
self.location('http://www.marmiton.org/recettes/recherche.aspx?st=5&cli=1&aqt=%s' % (pattern))
|
self.location('http://www.marmiton.org/recettes/recherche.aspx?st=5&cli=1&aqt=%s' % (pattern))
|
||||||
|
|
|
||||||
|
|
@ -30,27 +30,28 @@ class ResultsPage(BasePage):
|
||||||
""" Page which contains results as a list of recipies
|
""" Page which contains results as a list of recipies
|
||||||
"""
|
"""
|
||||||
def iter_recipes(self):
|
def iter_recipes(self):
|
||||||
for div in self.parser.select(self.document.getroot(),'div.m_search_result'):
|
for div in self.parser.select(self.document.getroot(), 'div.m_search_result'):
|
||||||
tds = self.parser.select(div,'td')
|
tds = self.parser.select(div, 'td')
|
||||||
if len(tds) == 2:
|
if len(tds) == 2:
|
||||||
title = NotAvailable
|
title = NotAvailable
|
||||||
thumbnail_url = NotAvailable
|
thumbnail_url = NotAvailable
|
||||||
short_description = NotAvailable
|
short_description = NotAvailable
|
||||||
imgs = self.parser.select(tds[0],'img')
|
imgs = self.parser.select(tds[0], 'img')
|
||||||
if len(imgs) > 0:
|
if len(imgs) > 0:
|
||||||
thumbnail_url = unicode(imgs[0].attrib.get('src',''))
|
thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
|
||||||
link = self.parser.select(tds[1],'div.m_search_titre_recette a',1)
|
link = self.parser.select(tds[1], 'div.m_search_titre_recette a', 1)
|
||||||
title = unicode(link.text)
|
title = unicode(link.text)
|
||||||
id = link.attrib.get('href','').replace('.aspx','').replace('/recettes/recette_','')
|
id = link.attrib.get('href', '').replace('.aspx', '').replace('/recettes/recette_', '')
|
||||||
short_description = unicode(' '.join(self.parser.select(tds[1],'div.m_search_result_part4',1).text.strip().split('\n')))
|
short_description = unicode(' '.join(self.parser.select(tds[
|
||||||
|
1], 'div.m_search_result_part4', 1).text.strip().split('\n')))
|
||||||
|
|
||||||
recipe = Recipe(id,title)
|
recipe = Recipe(id, title)
|
||||||
recipe.thumbnail_url = thumbnail_url
|
recipe.thumbnail_url = thumbnail_url
|
||||||
recipe.short_description= short_description
|
recipe.short_description = short_description
|
||||||
recipe.instructions = NotLoaded
|
recipe.instructions = NotLoaded
|
||||||
recipe.ingredients = NotLoaded
|
recipe.ingredients = NotLoaded
|
||||||
recipe.nb_person = NotLoaded
|
recipe.nb_person = NotLoaded
|
||||||
recipe.cooking_time = NotLoaded
|
recipe.cooking_time = NotLoaded
|
||||||
recipe.preparation_time = NotLoaded
|
recipe.preparation_time = NotLoaded
|
||||||
yield recipe
|
yield recipe
|
||||||
|
|
||||||
|
|
@ -68,30 +69,30 @@ class RecipePage(BasePage):
|
||||||
instructions = NotAvailable
|
instructions = NotAvailable
|
||||||
comments = []
|
comments = []
|
||||||
|
|
||||||
title = unicode(self.parser.select(self.document.getroot(),'h1.m_title',1).text_content().strip())
|
title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip())
|
||||||
main = self.parser.select(self.document.getroot(),'div.m_content_recette_main',1)
|
main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1)
|
||||||
preparation_time = int(self.parser.select(main,'p.m_content_recette_info span.preptime',1).text_content())
|
preparation_time = int(self.parser.select(main, 'p.m_content_recette_info span.preptime', 1).text_content())
|
||||||
cooking_time = int(self.parser.select(main,'p.m_content_recette_info span.cooktime',1).text_content())
|
cooking_time = int(self.parser.select(main, 'p.m_content_recette_info span.cooktime', 1).text_content())
|
||||||
ing_header_line = self.parser.select(main,'p.m_content_recette_ingredients span',1).text_content()
|
ing_header_line = self.parser.select(main, 'p.m_content_recette_ingredients span', 1).text_content()
|
||||||
if '(pour' in ing_header_line and ')' in ing_header_line:
|
if '(pour' in ing_header_line and ')' in ing_header_line:
|
||||||
nb_person = int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0])
|
nb_person = int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0])
|
||||||
ingredients = self.parser.select(main,'p.m_content_recette_ingredients',1).text_content().strip().split('- ')
|
ingredients = self.parser.select(main, 'p.m_content_recette_ingredients', 1).text_content().strip().split('- ')
|
||||||
ingredients=ingredients[1:]
|
ingredients = ingredients[1:]
|
||||||
rinstructions = self.parser.select(main,'div.m_content_recette_todo',1).text_content().strip()
|
rinstructions = self.parser.select(main, 'div.m_content_recette_todo', 1).text_content().strip()
|
||||||
instructions = u''
|
instructions = u''
|
||||||
for line in rinstructions.split('\n'):
|
for line in rinstructions.split('\n'):
|
||||||
instructions += '%s\n'%line.strip()
|
instructions += '%s\n' % line.strip()
|
||||||
instructions = instructions.strip('\n')
|
instructions = instructions.strip('\n')
|
||||||
imgillu = self.parser.select(self.document.getroot(),'a.m_content_recette_illu img')
|
imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img')
|
||||||
if len(imgillu) > 0:
|
if len(imgillu) > 0:
|
||||||
picture_url = unicode(imgillu[0].attrib.get('src',''))
|
picture_url = unicode(imgillu[0].attrib.get('src', ''))
|
||||||
for divcom in self.parser.select(self.document.getroot(),'div.m_commentaire_row'):
|
for divcom in self.parser.select(self.document.getroot(), 'div.m_commentaire_row'):
|
||||||
note = self.parser.select(divcom,'div.m_commentaire_note span',1).text.strip()
|
note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
|
||||||
user = self.parser.select(divcom,'div.m_commentaire_content span',1).text.strip()
|
user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
|
||||||
content = self.parser.select(divcom,'div.m_commentaire_content p',1).text.strip()
|
content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()
|
||||||
comments.append(u'user: %s, note: %s, comment: %s'%(user,note,content))
|
comments.append(u'user: %s, note: %s, comment: %s' % (user, note, content))
|
||||||
|
|
||||||
recipe = Recipe(id,title)
|
recipe = Recipe(id, title)
|
||||||
recipe.preparation_time = preparation_time
|
recipe.preparation_time = preparation_time
|
||||||
recipe.cooking_time = cooking_time
|
recipe.cooking_time = cooking_time
|
||||||
recipe.nb_person = nb_person
|
recipe.nb_person = nb_person
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from weboob.capabilities.subtitle import ICapSubtitle,LanguageNotSupported,Subtitle
|
from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported, Subtitle
|
||||||
from weboob.applications.suboob.suboob import LANGUAGE_CONV
|
from weboob.applications.suboob.suboob import LANGUAGE_CONV
|
||||||
from weboob.tools.backend import BaseBackend
|
from weboob.tools.backend import BaseBackend
|
||||||
|
|
||||||
|
|
@ -53,15 +53,15 @@ class OpensubtitlesBackend(BaseBackend, ICapSubtitle):
|
||||||
def iter_subtitles(self, language, pattern):
|
def iter_subtitles(self, language, pattern):
|
||||||
if language not in LANGUAGE_CONV.keys():
|
if language not in LANGUAGE_CONV.keys():
|
||||||
raise LanguageNotSupported()
|
raise LanguageNotSupported()
|
||||||
return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8')))
|
return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8')))
|
||||||
|
|
||||||
def fill_subtitle(self, subtitle, fields):
|
def fill_subtitle(self, subtitle, fields):
|
||||||
if 'description' in fields:
|
if 'description' in fields:
|
||||||
sub = self.get_subtitle(subtitle.id)
|
sub = self.get_subtitle(subtitle.id)
|
||||||
subtitle.description = sub.description
|
subtitle.description = sub.description
|
||||||
|
|
||||||
return subtitle
|
return subtitle
|
||||||
|
|
||||||
OBJECTS = {
|
OBJECTS = {
|
||||||
Subtitle:fill_subtitle,
|
Subtitle: fill_subtitle,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,13 +35,14 @@ class OpensubtitlesBrowser(BaseBrowser):
|
||||||
PAGES = {
|
PAGES = {
|
||||||
'http://www.opensubtitles.org.*search2/sublanguageid.*moviename.*': SearchPage,
|
'http://www.opensubtitles.org.*search2/sublanguageid.*moviename.*': SearchPage,
|
||||||
'http://www.opensubtitles.org.*search/sublanguageid.*idmovie.*': SubtitlesPage,
|
'http://www.opensubtitles.org.*search/sublanguageid.*idmovie.*': SubtitlesPage,
|
||||||
'http://www.opensubtitles.org.*search/imdbid.*/sublanguageid.*/moviename.*' : SubtitlesPage,
|
'http://www.opensubtitles.org.*search/imdbid.*/sublanguageid.*/moviename.*': SubtitlesPage,
|
||||||
'http://www.opensubtitles.org.*subtitles/[0-9]*/.*' : SubtitlePage
|
'http://www.opensubtitles.org.*subtitles/[0-9]*/.*': SubtitlePage
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_subtitles(self, language, pattern):
|
def iter_subtitles(self, language, pattern):
|
||||||
lang = LANGUAGE_CONV[language]
|
lang = LANGUAGE_CONV[language]
|
||||||
self.location('http://www.opensubtitles.org/search2/sublanguageid-%s/moviename-%s' % (lang,pattern.encode('utf-8')))
|
self.location('http://www.opensubtitles.org/search2/sublanguageid-%s/moviename-%s' % (
|
||||||
|
lang, pattern.encode('utf-8')))
|
||||||
assert self.is_on_page(SearchPage) or self.is_on_page(SubtitlesPage) or self.is_on_page(SubtitlePage)
|
assert self.is_on_page(SearchPage) or self.is_on_page(SubtitlesPage) or self.is_on_page(SubtitlePage)
|
||||||
return self.page.iter_subtitles()
|
return self.page.iter_subtitles()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,24 +24,24 @@ from weboob.tools.browser import BasePage
|
||||||
from weboob.applications.suboob.suboob import LANGUAGE_CONV
|
from weboob.applications.suboob.suboob import LANGUAGE_CONV
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['SubtitlesPage','SubtitlePage','SearchPage']
|
__all__ = ['SubtitlesPage', 'SubtitlePage', 'SearchPage']
|
||||||
|
|
||||||
|
|
||||||
class SearchPage(BasePage):
|
class SearchPage(BasePage):
|
||||||
""" Page which contains results as a list of movies
|
""" Page which contains results as a list of movies
|
||||||
"""
|
"""
|
||||||
def iter_subtitles(self):
|
def iter_subtitles(self):
|
||||||
tabresults = self.parser.select(self.document.getroot(),'table#search_results')
|
tabresults = self.parser.select(self.document.getroot(), 'table#search_results')
|
||||||
if len(tabresults) > 0:
|
if len(tabresults) > 0:
|
||||||
table = tabresults[0]
|
table = tabresults[0]
|
||||||
# for each result line, explore the subtitle list page to iter subtitles
|
# for each result line, explore the subtitle list page to iter subtitles
|
||||||
for line in self.parser.select(table,'tr'):
|
for line in self.parser.select(table, 'tr'):
|
||||||
links = self.parser.select(line,'a')
|
links = self.parser.select(line, 'a')
|
||||||
if len(links) > 0:
|
if len(links) > 0:
|
||||||
a = links[0]
|
a = links[0]
|
||||||
url = a.attrib.get('href','')
|
url = a.attrib.get('href', '')
|
||||||
if "ads.opensubtitles" not in url:
|
if "ads.opensubtitles" not in url:
|
||||||
self.browser.location("http://www.opensubtitles.org%s"%url)
|
self.browser.location("http://www.opensubtitles.org%s" % url)
|
||||||
assert self.browser.is_on_page(SubtitlesPage) or self.browser.is_on_page(SubtitlePage)
|
assert self.browser.is_on_page(SubtitlesPage) or self.browser.is_on_page(SubtitlePage)
|
||||||
# subtitles page does the job
|
# subtitles page does the job
|
||||||
for subtitle in self.browser.page.iter_subtitles():
|
for subtitle in self.browser.page.iter_subtitles():
|
||||||
|
|
@ -52,48 +52,48 @@ class SubtitlesPage(BasePage):
|
||||||
""" Page which contains several subtitles for a single movie
|
""" Page which contains several subtitles for a single movie
|
||||||
"""
|
"""
|
||||||
def iter_subtitles(self):
|
def iter_subtitles(self):
|
||||||
tabresults = self.parser.select(self.document.getroot(),'table#search_results')
|
tabresults = self.parser.select(self.document.getroot(), 'table#search_results')
|
||||||
if len(tabresults) > 0:
|
if len(tabresults) > 0:
|
||||||
table = tabresults[0]
|
table = tabresults[0]
|
||||||
# for each result line, get informations
|
# for each result line, get informations
|
||||||
# why following line doesn't work all the time (for example 'search fr sopranos guy walks' ?
|
# why following line doesn't work all the time (for example 'search fr sopranos guy walks' ?
|
||||||
#for line in self.parser.select(table,'tr'):
|
# for line in self.parser.select(table,'tr'):
|
||||||
for line in table.getiterator('tr'):
|
for line in table.getiterator('tr'):
|
||||||
# some tr are useless, specially ads
|
# some tr are useless, specially ads
|
||||||
if line.attrib.get('id','').startswith('name'):
|
if line.attrib.get('id', '').startswith('name'):
|
||||||
yield self.get_subtitle_from_line(line)
|
yield self.get_subtitle_from_line(line)
|
||||||
|
|
||||||
def get_subtitle_from_line(self,line):
|
def get_subtitle_from_line(self, line):
|
||||||
cells = self.parser.select(line,'td')
|
cells = self.parser.select(line, 'td')
|
||||||
if len(cells) > 0:
|
if len(cells) > 0:
|
||||||
links = self.parser.select(line,'a')
|
links = self.parser.select(line, 'a')
|
||||||
a = links[0]
|
a = links[0]
|
||||||
name = u" ".join(a.text.strip().split())
|
name = u" ".join(a.text.strip().split())
|
||||||
first_cell = cells[0]
|
first_cell = cells[0]
|
||||||
spanlist = self.parser.select(first_cell,'span')
|
spanlist = self.parser.select(first_cell, 'span')
|
||||||
if len(spanlist) > 0:
|
if len(spanlist) > 0:
|
||||||
long_name = spanlist[0].attrib.get('title','')
|
long_name = spanlist[0].attrib.get('title', '')
|
||||||
else:
|
else:
|
||||||
texts = first_cell.itertext()
|
texts = first_cell.itertext()
|
||||||
long_name = texts.next()
|
long_name = texts.next()
|
||||||
long_name = texts.next()
|
long_name = texts.next()
|
||||||
if "Download at 25" in long_name:
|
if "Download at 25" in long_name:
|
||||||
long_name = "---"
|
long_name = "---"
|
||||||
name = "%s (%s)"%(name,long_name)
|
name = "%s (%s)" % (name, long_name)
|
||||||
second_cell = cells[1]
|
second_cell = cells[1]
|
||||||
link = self.parser.select(second_cell,'a',1)
|
link = self.parser.select(second_cell, 'a', 1)
|
||||||
lang = link.attrib.get('href','').split('/')[-1].split('-')[-1]
|
lang = link.attrib.get('href', '').split('/')[-1].split('-')[-1]
|
||||||
for lshort,llong in LANGUAGE_CONV.items():
|
for lshort, llong in LANGUAGE_CONV.items():
|
||||||
if lang == llong:
|
if lang == llong:
|
||||||
lang = unicode(lshort)
|
lang = unicode(lshort)
|
||||||
break
|
break
|
||||||
nb_cd = int(cells[2].text.strip().lower().replace('cd',''))
|
nb_cd = int(cells[2].text.strip().lower().replace('cd', ''))
|
||||||
cell_dl = cells[4]
|
cell_dl = cells[4]
|
||||||
href = self.parser.select(cell_dl,'a',1).attrib.get('href','')
|
href = self.parser.select(cell_dl, 'a', 1).attrib.get('href', '')
|
||||||
url = unicode('http://www.opensubtitles.org%s'%href)
|
url = unicode('http://www.opensubtitles.org%s' % href)
|
||||||
id = href.split('/')[-1]
|
id = href.split('/')[-1]
|
||||||
|
|
||||||
subtitle = Subtitle(id,name)
|
subtitle = Subtitle(id, name)
|
||||||
subtitle.url = url
|
subtitle.url = url
|
||||||
subtitle.language = lang
|
subtitle.language = lang
|
||||||
subtitle.nb_cd = nb_cd
|
subtitle.nb_cd = nb_cd
|
||||||
|
|
@ -106,15 +106,15 @@ class SubtitlePage(BasePage):
|
||||||
"""
|
"""
|
||||||
def get_subtitle(self):
|
def get_subtitle(self):
|
||||||
desc = NotAvailable
|
desc = NotAvailable
|
||||||
father = self.parser.select(self.document.getroot(),'a#app_link',1).getparent()
|
father = self.parser.select(self.document.getroot(), 'a#app_link', 1).getparent()
|
||||||
a = self.parser.select(father,'a')[1]
|
a = self.parser.select(father, 'a')[1]
|
||||||
id = a.attrib.get('href','').split('/')[-1]
|
id = a.attrib.get('href', '').split('/')[-1]
|
||||||
url = unicode('http://www.opensubtitles.org/subtitleserve/sub/%s'%id)
|
url = unicode('http://www.opensubtitles.org/subtitleserve/sub/%s' % id)
|
||||||
link = self.parser.select(self.document.getroot(),'link[rel=bookmark]',1)
|
link = self.parser.select(self.document.getroot(), 'link[rel=bookmark]', 1)
|
||||||
title = unicode(link.attrib.get('title',''))
|
title = unicode(link.attrib.get('title', ''))
|
||||||
nb_cd = int(title.lower().split('cd')[0].split()[-1])
|
nb_cd = int(title.lower().split('cd')[0].split()[-1])
|
||||||
lang = unicode(title.split('(')[1].split(')')[0])
|
lang = unicode(title.split('(')[1].split(')')[0])
|
||||||
file_names = self.parser.select(self.document.getroot(),"img[title~=filename]")
|
file_names = self.parser.select(self.document.getroot(), "img[title~=filename]")
|
||||||
if len(file_names) > 0:
|
if len(file_names) > 0:
|
||||||
file_name = file_names[0].getparent().text_content()
|
file_name = file_names[0].getparent().text_content()
|
||||||
file_name = ' '.join(file_name.split())
|
file_name = ' '.join(file_name.split())
|
||||||
|
|
@ -122,11 +122,11 @@ class SubtitlePage(BasePage):
|
||||||
for f in file_names:
|
for f in file_names:
|
||||||
desc_line = f.getparent().text_content()
|
desc_line = f.getparent().text_content()
|
||||||
desc += '\n'+' '.join(desc_line.split())
|
desc += '\n'+' '.join(desc_line.split())
|
||||||
name = unicode('%s (%s)'%(title,file_name))
|
name = unicode('%s (%s)' % (title, file_name))
|
||||||
|
|
||||||
subtitle = Subtitle(id,name)
|
subtitle = Subtitle(id, name)
|
||||||
subtitle.url = url
|
subtitle.url = url
|
||||||
for lshort,llong in LANGUAGE_CONV.items():
|
for lshort, llong in LANGUAGE_CONV.items():
|
||||||
if lang == llong:
|
if lang == llong:
|
||||||
lang = unicode(lshort)
|
lang = unicode(lshort)
|
||||||
break
|
break
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ class OpensubtitlesTest(BackendTest):
|
||||||
|
|
||||||
def test_subtitle(self):
|
def test_subtitle(self):
|
||||||
lsub = []
|
lsub = []
|
||||||
subtitles = self.backend.iter_subtitles('fr','spiderman')
|
subtitles = self.backend.iter_subtitles('fr', 'spiderman')
|
||||||
for i in range(5):
|
for i in range(5):
|
||||||
subtitle = subtitles.next()
|
subtitle = subtitles.next()
|
||||||
lsub.append(subtitle)
|
lsub.append(subtitle)
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ class ParolesmaniaBackend(BaseBackend, ICapLyrics):
|
||||||
return self.browser.get_lyrics(id)
|
return self.browser.get_lyrics(id)
|
||||||
|
|
||||||
def iter_lyrics(self, criteria, pattern):
|
def iter_lyrics(self, criteria, pattern):
|
||||||
return self.browser.iter_lyrics(criteria,quote_plus(pattern.encode('utf-8')))
|
return self.browser.iter_lyrics(criteria, quote_plus(pattern.encode('utf-8')))
|
||||||
|
|
||||||
def fill_songlyrics(self, songlyrics, fields):
|
def fill_songlyrics(self, songlyrics, fields):
|
||||||
if 'content' in fields:
|
if 'content' in fields:
|
||||||
|
|
@ -52,5 +52,5 @@ class ParolesmaniaBackend(BaseBackend, ICapLyrics):
|
||||||
return songlyrics
|
return songlyrics
|
||||||
|
|
||||||
OBJECTS = {
|
OBJECTS = {
|
||||||
SongLyrics:fill_songlyrics
|
SongLyrics: fill_songlyrics
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,13 +36,13 @@ class ParolesmaniaBrowser(BaseBrowser):
|
||||||
'http://www.parolesmania.com/recherche.php\?c=artist.*': ArtistResultsPage,
|
'http://www.parolesmania.com/recherche.php\?c=artist.*': ArtistResultsPage,
|
||||||
'http://www.parolesmania.com/paroles.*[0-9]*/paroles.*': SonglyricsPage,
|
'http://www.parolesmania.com/paroles.*[0-9]*/paroles.*': SonglyricsPage,
|
||||||
'http://www.parolesmania.com/paroles[^/]*.html': ArtistSongsPage,
|
'http://www.parolesmania.com/paroles[^/]*.html': ArtistSongsPage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_lyrics(self, criteria, pattern):
|
def iter_lyrics(self, criteria, pattern):
|
||||||
crit = 'artist'
|
crit = 'artist'
|
||||||
if criteria != 'artist':
|
if criteria != 'artist':
|
||||||
crit = 'title'
|
crit = 'title'
|
||||||
self.location('http://www.parolesmania.com/recherche.php?c=%s&k=%s'%(crit,pattern))
|
self.location('http://www.parolesmania.com/recherche.php?c=%s&k=%s' % (crit, pattern))
|
||||||
assert self.is_on_page(SongResultsPage) or self.is_on_page(ArtistResultsPage)\
|
assert self.is_on_page(SongResultsPage) or self.is_on_page(ArtistResultsPage)\
|
||||||
or self.is_on_page(ArtistSongsPage)
|
or self.is_on_page(ArtistSongsPage)
|
||||||
for lyr in self.page.iter_lyrics():
|
for lyr in self.page.iter_lyrics():
|
||||||
|
|
@ -50,6 +50,6 @@ class ParolesmaniaBrowser(BaseBrowser):
|
||||||
|
|
||||||
def get_lyrics(self, id):
|
def get_lyrics(self, id):
|
||||||
ids = id.split('|')
|
ids = id.split('|')
|
||||||
self.location('http://www.parolesmania.com/paroles_%s/paroles_%s.html' % (ids[0],ids[1]))
|
self.location('http://www.parolesmania.com/paroles_%s/paroles_%s.html' % (ids[0], ids[1]))
|
||||||
assert self.is_on_page(SonglyricsPage)
|
assert self.is_on_page(SonglyricsPage)
|
||||||
return self.page.get_lyrics(id)
|
return self.page.get_lyrics(id)
|
||||||
|
|
|
||||||
|
|
@ -23,32 +23,32 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage']
|
__all__ = ['SongResultsPage', 'SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage']
|
||||||
|
|
||||||
|
|
||||||
class ArtistResultsPage(BasePage):
|
class ArtistResultsPage(BasePage):
|
||||||
def iter_lyrics(self):
|
def iter_lyrics(self):
|
||||||
for link in self.parser.select(self.document.getroot(),'div#albums > h1 a'):
|
for link in self.parser.select(self.document.getroot(), 'div#albums > h1 a'):
|
||||||
artist = unicode(link.text_content())
|
artist = unicode(link.text_content())
|
||||||
href = link.attrib.get('href','')
|
href = link.attrib.get('href', '')
|
||||||
if href.startswith('/paroles'):
|
if href.startswith('/paroles'):
|
||||||
self.browser.location('http://www.parolesmania.com%s'%href)
|
self.browser.location('http://www.parolesmania.com%s' % href)
|
||||||
assert self.browser.is_on_page(ArtistSongsPage)
|
assert self.browser.is_on_page(ArtistSongsPage)
|
||||||
for lyr in self.browser.page.iter_lyrics(artist):
|
for lyr in self.browser.page.iter_lyrics(artist):
|
||||||
yield lyr
|
yield lyr
|
||||||
|
|
||||||
|
|
||||||
class ArtistSongsPage(BasePage):
|
class ArtistSongsPage(BasePage):
|
||||||
def iter_lyrics(self,artist=None):
|
def iter_lyrics(self, artist=None):
|
||||||
if artist is None:
|
if artist is None:
|
||||||
artist = self.parser.select(self.document.getroot(),'head > title',1).text.replace('Paroles ','')
|
artist = self.parser.select(self.document.getroot(), 'head > title', 1).text.replace('Paroles ', '')
|
||||||
for link in self.parser.select(self.document.getroot(),'div#albums a'):
|
for link in self.parser.select(self.document.getroot(), 'div#albums a'):
|
||||||
href = link.attrib.get('href','')
|
href = link.attrib.get('href', '')
|
||||||
titleattrib = link.attrib.get('title','')
|
titleattrib = link.attrib.get('title', '')
|
||||||
if href.startswith('/paroles') and not href.endswith('alpha.html') and titleattrib.startswith('Paroles '):
|
if href.startswith('/paroles') and not href.endswith('alpha.html') and titleattrib.startswith('Paroles '):
|
||||||
title = unicode(link.text)
|
title = unicode(link.text)
|
||||||
ids = href.replace('/','').replace('.html','').split('paroles_')
|
ids = href.replace('/', '').replace('.html', '').split('paroles_')
|
||||||
id = '%s|%s'%(ids[1],ids[2])
|
id = '%s|%s' % (ids[1], ids[2])
|
||||||
songlyrics = SongLyrics(id, title)
|
songlyrics = SongLyrics(id, title)
|
||||||
songlyrics.artist = artist
|
songlyrics.artist = artist
|
||||||
songlyrics.content = NotLoaded
|
songlyrics.content = NotLoaded
|
||||||
|
|
@ -57,13 +57,13 @@ class ArtistSongsPage(BasePage):
|
||||||
|
|
||||||
class SongResultsPage(BasePage):
|
class SongResultsPage(BasePage):
|
||||||
def iter_lyrics(self):
|
def iter_lyrics(self):
|
||||||
for link in self.parser.select(self.document.getroot(),'div#albums a'):
|
for link in self.parser.select(self.document.getroot(), 'div#albums a'):
|
||||||
artist = NotAvailable
|
artist = NotAvailable
|
||||||
title = unicode(link.text.split(' - ')[0])
|
title = unicode(link.text.split(' - ')[0])
|
||||||
href = link.attrib.get('href','')
|
href = link.attrib.get('href', '')
|
||||||
if href.startswith('/paroles') and not href.endswith('alpha.html'):
|
if href.startswith('/paroles') and not href.endswith('alpha.html'):
|
||||||
ids = href.replace('/','').replace('.html','').split('paroles_')
|
ids = href.replace('/', '').replace('.html', '').split('paroles_')
|
||||||
id = '%s|%s'%(ids[1],ids[2])
|
id = '%s|%s' % (ids[1], ids[2])
|
||||||
artist = unicode(link.text.split(' - ')[1])
|
artist = unicode(link.text.split(' - ')[1])
|
||||||
songlyrics = SongLyrics(id, title)
|
songlyrics = SongLyrics(id, title)
|
||||||
songlyrics.artist = artist
|
songlyrics.artist = artist
|
||||||
|
|
@ -76,12 +76,12 @@ class SonglyricsPage(BasePage):
|
||||||
content = NotAvailable
|
content = NotAvailable
|
||||||
artist = NotAvailable
|
artist = NotAvailable
|
||||||
title = NotAvailable
|
title = NotAvailable
|
||||||
lyrdiv = self.parser.select(self.document.getroot(),'div#songlyrics_h')
|
lyrdiv = self.parser.select(self.document.getroot(), 'div#songlyrics_h')
|
||||||
if len(lyrdiv) > 0:
|
if len(lyrdiv) > 0:
|
||||||
content = unicode(lyrdiv[0].text_content().strip())
|
content = unicode(lyrdiv[0].text_content().strip())
|
||||||
infos = self.parser.select(self.document.getroot(),'head > title',1).text
|
infos = self.parser.select(self.document.getroot(), 'head > title', 1).text
|
||||||
artist = unicode(infos.split(' - ')[1])
|
artist = unicode(infos.split(' - ')[1])
|
||||||
title = unicode(infos.split(' - ')[0].replace('Paroles ',''))
|
title = unicode(infos.split(' - ')[0].replace('Paroles ', ''))
|
||||||
songlyrics = SongLyrics(id, title)
|
songlyrics = SongLyrics(id, title)
|
||||||
songlyrics.artist = artist
|
songlyrics.artist = artist
|
||||||
songlyrics.content = content
|
songlyrics.content = content
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class ParolesmaniaTest(BackendTest):
|
||||||
BACKEND = 'parolesmania'
|
BACKEND = 'parolesmania'
|
||||||
|
|
||||||
def test_search_song_n_get(self):
|
def test_search_song_n_get(self):
|
||||||
l_lyrics = list(self.backend.iter_lyrics('song','chien'))
|
l_lyrics = list(self.backend.iter_lyrics('song', 'chien'))
|
||||||
for songlyrics in l_lyrics:
|
for songlyrics in l_lyrics:
|
||||||
assert songlyrics.id
|
assert songlyrics.id
|
||||||
assert songlyrics.title
|
assert songlyrics.title
|
||||||
|
|
@ -38,7 +38,7 @@ class ParolesmaniaTest(BackendTest):
|
||||||
assert full_lyr.content is not NotLoaded
|
assert full_lyr.content is not NotLoaded
|
||||||
|
|
||||||
def test_search_artist(self):
|
def test_search_artist(self):
|
||||||
l_lyrics = list(self.backend.iter_lyrics('artist','boris'))
|
l_lyrics = list(self.backend.iter_lyrics('artist', 'boris'))
|
||||||
for songlyrics in l_lyrics:
|
for songlyrics in l_lyrics:
|
||||||
assert songlyrics.id
|
assert songlyrics.id
|
||||||
assert songlyrics.title
|
assert songlyrics.title
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ class ParolesmusiqueBackend(BaseBackend, ICapLyrics):
|
||||||
return self.browser.get_lyrics(id)
|
return self.browser.get_lyrics(id)
|
||||||
|
|
||||||
def iter_lyrics(self, criteria, pattern):
|
def iter_lyrics(self, criteria, pattern):
|
||||||
return self.browser.iter_lyrics(criteria,pattern.encode('utf-8'))
|
return self.browser.iter_lyrics(criteria, pattern.encode('utf-8'))
|
||||||
|
|
||||||
def fill_songlyrics(self, songlyrics, fields):
|
def fill_songlyrics(self, songlyrics, fields):
|
||||||
if 'content' in fields:
|
if 'content' in fields:
|
||||||
|
|
@ -50,5 +50,5 @@ class ParolesmusiqueBackend(BaseBackend, ICapLyrics):
|
||||||
return songlyrics
|
return songlyrics
|
||||||
|
|
||||||
OBJECTS = {
|
OBJECTS = {
|
||||||
SongLyrics:fill_songlyrics
|
SongLyrics: fill_songlyrics
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,12 +37,12 @@ class ParolesmusiqueBrowser(BaseBrowser):
|
||||||
'http://www.paroles-musique.com/lyrics-paroles-.*-0,0.php': ArtistResultsPage,
|
'http://www.paroles-musique.com/lyrics-paroles-.*-0,0.php': ArtistResultsPage,
|
||||||
'http://www.paroles-musique.com/paroles-.*p[0-9]*': SonglyricsPage,
|
'http://www.paroles-musique.com/paroles-.*p[0-9]*': SonglyricsPage,
|
||||||
'http://www.paroles-musique.com/paroles-.*-lyrics,a[0-9]*': ArtistSongsPage,
|
'http://www.paroles-musique.com/paroles-.*-lyrics,a[0-9]*': ArtistSongsPage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_lyrics(self, criteria, pattern):
|
def iter_lyrics(self, criteria, pattern):
|
||||||
self.location('http://www.paroles-musique.com')
|
self.location('http://www.paroles-musique.com')
|
||||||
assert self.is_on_page(HomePage)
|
assert self.is_on_page(HomePage)
|
||||||
return self.page.iter_lyrics(criteria,pattern)
|
return self.page.iter_lyrics(criteria, pattern)
|
||||||
|
|
||||||
def get_lyrics(self, id):
|
def get_lyrics(self, id):
|
||||||
self.location('http://www.paroles-musique.com/paroles-%s' % id)
|
self.location('http://www.paroles-musique.com/paroles-%s' % id)
|
||||||
|
|
|
||||||
|
|
@ -23,11 +23,11 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage', 'HomePage']
|
__all__ = ['SongResultsPage', 'SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage', 'HomePage']
|
||||||
|
|
||||||
|
|
||||||
class HomePage(BasePage):
|
class HomePage(BasePage):
|
||||||
def iter_lyrics(self,criteria,pattern):
|
def iter_lyrics(self, criteria, pattern):
|
||||||
self.browser.select_form(name='rechercher')
|
self.browser.select_form(name='rechercher')
|
||||||
if criteria == 'artist':
|
if criteria == 'artist':
|
||||||
self.browser['termes_a'] = pattern
|
self.browser['termes_a'] = pattern
|
||||||
|
|
@ -41,21 +41,21 @@ class HomePage(BasePage):
|
||||||
|
|
||||||
class ArtistResultsPage(BasePage):
|
class ArtistResultsPage(BasePage):
|
||||||
def iter_lyrics(self):
|
def iter_lyrics(self):
|
||||||
for link in self.parser.select(self.document.getroot(),'div.cont_cat table a.std'):
|
for link in self.parser.select(self.document.getroot(), 'div.cont_cat table a.std'):
|
||||||
artist = unicode(link.text_content())
|
artist = unicode(link.text_content())
|
||||||
self.browser.location('http://www.paroles-musique.com%s'%link.attrib.get('href',''))
|
self.browser.location('http://www.paroles-musique.com%s' % link.attrib.get('href', ''))
|
||||||
assert self.browser.is_on_page(ArtistSongsPage)
|
assert self.browser.is_on_page(ArtistSongsPage)
|
||||||
for lyr in self.browser.page.iter_lyrics(artist):
|
for lyr in self.browser.page.iter_lyrics(artist):
|
||||||
yield lyr
|
yield lyr
|
||||||
|
|
||||||
|
|
||||||
class ArtistSongsPage(BasePage):
|
class ArtistSongsPage(BasePage):
|
||||||
def iter_lyrics(self,artist):
|
def iter_lyrics(self, artist):
|
||||||
for link in self.parser.select(self.document.getroot(),'div.cont_catA div.art_scroll a'):
|
for link in self.parser.select(self.document.getroot(), 'div.cont_catA div.art_scroll a'):
|
||||||
href = link.attrib.get('href','')
|
href = link.attrib.get('href', '')
|
||||||
if href.startswith('./paroles'):
|
if href.startswith('./paroles'):
|
||||||
title = unicode(link.text)
|
title = unicode(link.text)
|
||||||
id = href.replace('./paroles-','')
|
id = href.replace('./paroles-', '')
|
||||||
songlyrics = SongLyrics(id, title)
|
songlyrics = SongLyrics(id, title)
|
||||||
songlyrics.artist = artist
|
songlyrics.artist = artist
|
||||||
songlyrics.content = NotLoaded
|
songlyrics.content = NotLoaded
|
||||||
|
|
@ -65,14 +65,14 @@ class ArtistSongsPage(BasePage):
|
||||||
class SongResultsPage(BasePage):
|
class SongResultsPage(BasePage):
|
||||||
def iter_lyrics(self):
|
def iter_lyrics(self):
|
||||||
first = True
|
first = True
|
||||||
for tr in self.parser.select(self.document.getroot(),'div.cont_cat table tr'):
|
for tr in self.parser.select(self.document.getroot(), 'div.cont_cat table tr'):
|
||||||
if first:
|
if first:
|
||||||
first = False
|
first = False
|
||||||
continue
|
continue
|
||||||
artist = NotAvailable
|
artist = NotAvailable
|
||||||
links = self.parser.select(tr,'a.std')
|
links = self.parser.select(tr, 'a.std')
|
||||||
title = unicode(links[0].text)
|
title = unicode(links[0].text)
|
||||||
id = links[0].attrib.get('href','').replace('/paroles-','')
|
id = links[0].attrib.get('href', '').replace('/paroles-', '')
|
||||||
artist = unicode(links[1].text)
|
artist = unicode(links[1].text)
|
||||||
songlyrics = SongLyrics(id, title)
|
songlyrics = SongLyrics(id, title)
|
||||||
songlyrics.artist = artist
|
songlyrics.artist = artist
|
||||||
|
|
@ -84,8 +84,8 @@ class SonglyricsPage(BasePage):
|
||||||
def get_lyrics(self, id):
|
def get_lyrics(self, id):
|
||||||
artist = NotAvailable
|
artist = NotAvailable
|
||||||
title = NotAvailable
|
title = NotAvailable
|
||||||
content = unicode(self.parser.select(self.document.getroot(),'div#lyr_scroll',1).text_content().strip())
|
content = unicode(self.parser.select(self.document.getroot(), 'div#lyr_scroll', 1).text_content().strip())
|
||||||
infos = self.parser.select(self.document.getroot(),'h2.lyrics > font')
|
infos = self.parser.select(self.document.getroot(), 'h2.lyrics > font')
|
||||||
artist = unicode(infos[0].text)
|
artist = unicode(infos[0].text)
|
||||||
title = unicode(infos[1].text)
|
title = unicode(infos[1].text)
|
||||||
songlyrics = SongLyrics(id, title)
|
songlyrics = SongLyrics(id, title)
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class ParolesmusiqueTest(BackendTest):
|
||||||
BACKEND = 'parolesmusique'
|
BACKEND = 'parolesmusique'
|
||||||
|
|
||||||
def test_search_song_n_get(self):
|
def test_search_song_n_get(self):
|
||||||
l_lyrics = list(self.backend.iter_lyrics('song','chien'))
|
l_lyrics = list(self.backend.iter_lyrics('song', 'chien'))
|
||||||
for songlyrics in l_lyrics:
|
for songlyrics in l_lyrics:
|
||||||
assert songlyrics.id
|
assert songlyrics.id
|
||||||
assert songlyrics.title
|
assert songlyrics.title
|
||||||
|
|
@ -38,7 +38,7 @@ class ParolesmusiqueTest(BackendTest):
|
||||||
assert full_lyr.content is not NotLoaded
|
assert full_lyr.content is not NotLoaded
|
||||||
|
|
||||||
def test_search_artist(self):
|
def test_search_artist(self):
|
||||||
l_lyrics = list(self.backend.iter_lyrics('artist','boris'))
|
l_lyrics = list(self.backend.iter_lyrics('artist', 'boris'))
|
||||||
for songlyrics in l_lyrics:
|
for songlyrics in l_lyrics:
|
||||||
assert songlyrics.id
|
assert songlyrics.id
|
||||||
assert songlyrics.title
|
assert songlyrics.title
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ class SeeklyricsBackend(BaseBackend, ICapLyrics):
|
||||||
return self.browser.get_lyrics(id)
|
return self.browser.get_lyrics(id)
|
||||||
|
|
||||||
def iter_lyrics(self, criteria, pattern):
|
def iter_lyrics(self, criteria, pattern):
|
||||||
return self.browser.iter_lyrics(criteria,quote_plus(pattern.encode('iso-8859-1')))
|
return self.browser.iter_lyrics(criteria, quote_plus(pattern.encode('iso-8859-1')))
|
||||||
|
|
||||||
def fill_songlyrics(self, songlyrics, fields):
|
def fill_songlyrics(self, songlyrics, fields):
|
||||||
if 'content' in fields:
|
if 'content' in fields:
|
||||||
|
|
@ -52,5 +52,5 @@ class SeeklyricsBackend(BaseBackend, ICapLyrics):
|
||||||
return songlyrics
|
return songlyrics
|
||||||
|
|
||||||
OBJECTS = {
|
OBJECTS = {
|
||||||
SongLyrics:fill_songlyrics
|
SongLyrics: fill_songlyrics
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,14 +36,14 @@ class SeeklyricsBrowser(BaseBrowser):
|
||||||
'http://www.seeklyrics.com/search.php.*t=2': ArtistResultsPage,
|
'http://www.seeklyrics.com/search.php.*t=2': ArtistResultsPage,
|
||||||
'http://www.seeklyrics.com/lyrics/.*html': SonglyricsPage,
|
'http://www.seeklyrics.com/lyrics/.*html': SonglyricsPage,
|
||||||
'http://www.seeklyrics.com/lyrics/.*/': ArtistSongsPage,
|
'http://www.seeklyrics.com/lyrics/.*/': ArtistSongsPage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_lyrics(self, criteria, pattern):
|
def iter_lyrics(self, criteria, pattern):
|
||||||
if criteria == 'artist':
|
if criteria == 'artist':
|
||||||
type = 2
|
type = 2
|
||||||
else:
|
else:
|
||||||
type = 1
|
type = 1
|
||||||
self.location('http://www.seeklyrics.com/search.php?q=%s&t=%s' % (pattern,type))
|
self.location('http://www.seeklyrics.com/search.php?q=%s&t=%s' % (pattern, type))
|
||||||
assert self.is_on_page(ArtistResultsPage) or self.is_on_page(SongResultsPage)
|
assert self.is_on_page(ArtistResultsPage) or self.is_on_page(SongResultsPage)
|
||||||
return self.page.iter_lyrics()
|
return self.page.iter_lyrics()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,27 +23,27 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage']
|
__all__ = ['SongResultsPage', 'SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage']
|
||||||
|
|
||||||
|
|
||||||
class ArtistResultsPage(BasePage):
|
class ArtistResultsPage(BasePage):
|
||||||
def iter_lyrics(self):
|
def iter_lyrics(self):
|
||||||
for link in self.parser.select(self.document.getroot(),'table[title~=Results] a.tlink'):
|
for link in self.parser.select(self.document.getroot(), 'table[title~=Results] a.tlink'):
|
||||||
artist = unicode(link.text_content())
|
artist = unicode(link.text_content())
|
||||||
self.browser.location('http://www.seeklyrics.com%s'%link.attrib.get('href',''))
|
self.browser.location('http://www.seeklyrics.com%s' % link.attrib.get('href', ''))
|
||||||
assert self.browser.is_on_page(ArtistSongsPage)
|
assert self.browser.is_on_page(ArtistSongsPage)
|
||||||
for lyr in self.browser.page.iter_lyrics(artist):
|
for lyr in self.browser.page.iter_lyrics(artist):
|
||||||
yield lyr
|
yield lyr
|
||||||
|
|
||||||
|
|
||||||
class ArtistSongsPage(BasePage):
|
class ArtistSongsPage(BasePage):
|
||||||
def iter_lyrics(self,artist):
|
def iter_lyrics(self, artist):
|
||||||
for th in self.parser.select(self.document.getroot(),'th.text'):
|
for th in self.parser.select(self.document.getroot(), 'th.text'):
|
||||||
txt = th.text_content()
|
txt = th.text_content()
|
||||||
if txt.startswith('Top') and txt.endswith('Lyrics'):
|
if txt.startswith('Top') and txt.endswith('Lyrics'):
|
||||||
for link in self.parser.select(th.getparent().getparent(),'a.tlink'):
|
for link in self.parser.select(th.getparent().getparent(), 'a.tlink'):
|
||||||
title = unicode(link.attrib.get('title','').replace(' Lyrics',''))
|
title = unicode(link.attrib.get('title', '').replace(' Lyrics', ''))
|
||||||
id = link.attrib.get('href','').replace('/lyrics/','').replace('.html','')
|
id = link.attrib.get('href', '').replace('/lyrics/', '').replace('.html', '')
|
||||||
songlyrics = SongLyrics(id, title)
|
songlyrics = SongLyrics(id, title)
|
||||||
songlyrics.artist = artist
|
songlyrics.artist = artist
|
||||||
songlyrics.content = NotLoaded
|
songlyrics.content = NotLoaded
|
||||||
|
|
@ -53,15 +53,15 @@ class ArtistSongsPage(BasePage):
|
||||||
class SongResultsPage(BasePage):
|
class SongResultsPage(BasePage):
|
||||||
def iter_lyrics(self):
|
def iter_lyrics(self):
|
||||||
first = True
|
first = True
|
||||||
for tr in self.parser.select(self.document.getroot(),'table[title~=Results] tr'):
|
for tr in self.parser.select(self.document.getroot(), 'table[title~=Results] tr'):
|
||||||
if first:
|
if first:
|
||||||
first = False
|
first = False
|
||||||
continue
|
continue
|
||||||
artist = NotAvailable
|
artist = NotAvailable
|
||||||
ftitle = self.parser.select(tr,'a > font > font',1)
|
ftitle = self.parser.select(tr, 'a > font > font', 1)
|
||||||
title = unicode(ftitle.getparent().getparent().text_content())
|
title = unicode(ftitle.getparent().getparent().text_content())
|
||||||
id = ftitle.getparent().getparent().attrib.get('href','').replace('/lyrics/','').replace('.html','')
|
id = ftitle.getparent().getparent().attrib.get('href', '').replace('/lyrics/', '').replace('.html', '')
|
||||||
aartist = self.parser.select(tr,'a')[-1]
|
aartist = self.parser.select(tr, 'a')[-1]
|
||||||
artist = unicode(aartist.text)
|
artist = unicode(aartist.text)
|
||||||
songlyrics = SongLyrics(id, title)
|
songlyrics = SongLyrics(id, title)
|
||||||
songlyrics.artist = artist
|
songlyrics.artist = artist
|
||||||
|
|
@ -73,12 +73,12 @@ class SonglyricsPage(BasePage):
|
||||||
def get_lyrics(self, id):
|
def get_lyrics(self, id):
|
||||||
artist = NotAvailable
|
artist = NotAvailable
|
||||||
title = NotAvailable
|
title = NotAvailable
|
||||||
l_artitle = self.parser.select(self.document.getroot(),'table.text td > b > h2')
|
l_artitle = self.parser.select(self.document.getroot(), 'table.text td > b > h2')
|
||||||
if len(l_artitle) > 0:
|
if len(l_artitle) > 0:
|
||||||
artitle = l_artitle[0].text.split(' Lyrics by ')
|
artitle = l_artitle[0].text.split(' Lyrics by ')
|
||||||
artist = unicode(artitle[1])
|
artist = unicode(artitle[1])
|
||||||
title = unicode(artitle[0])
|
title = unicode(artitle[0])
|
||||||
content = unicode(self.parser.select(self.document.getroot(),'div#songlyrics',1).text_content().strip())
|
content = unicode(self.parser.select(self.document.getroot(), 'div#songlyrics', 1).text_content().strip())
|
||||||
songlyrics = SongLyrics(id, title)
|
songlyrics = SongLyrics(id, title)
|
||||||
songlyrics.artist = artist
|
songlyrics.artist = artist
|
||||||
songlyrics.content = content
|
songlyrics.content = content
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ class SeeklyricsTest(BackendTest):
|
||||||
BACKEND = 'seeklyrics'
|
BACKEND = 'seeklyrics'
|
||||||
|
|
||||||
def test_search_song_n_get(self):
|
def test_search_song_n_get(self):
|
||||||
l_lyrics = list(self.backend.iter_lyrics('song','Complainte'))
|
l_lyrics = list(self.backend.iter_lyrics('song', 'Complainte'))
|
||||||
for songlyrics in l_lyrics:
|
for songlyrics in l_lyrics:
|
||||||
assert songlyrics.id
|
assert songlyrics.id
|
||||||
assert songlyrics.title
|
assert songlyrics.title
|
||||||
|
|
@ -38,7 +38,7 @@ class SeeklyricsTest(BackendTest):
|
||||||
assert full_lyr.content is not NotLoaded
|
assert full_lyr.content is not NotLoaded
|
||||||
|
|
||||||
def test_search_artist(self):
|
def test_search_artist(self):
|
||||||
l_lyrics = list(self.backend.iter_lyrics('artist','boris vian'))
|
l_lyrics = list(self.backend.iter_lyrics('artist', 'boris vian'))
|
||||||
for songlyrics in l_lyrics:
|
for songlyrics in l_lyrics:
|
||||||
assert songlyrics.id
|
assert songlyrics.id
|
||||||
assert songlyrics.title
|
assert songlyrics.title
|
||||||
|
|
|
||||||
|
|
@ -17,10 +17,10 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from weboob.capabilities.subtitle import ICapSubtitle,LanguageNotSupported
|
from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported
|
||||||
from weboob.tools.backend import BaseBackend
|
from weboob.tools.backend import BaseBackend
|
||||||
|
|
||||||
from .browser import TvsubtitlesBrowser,LANGUAGE_LIST
|
from .browser import TvsubtitlesBrowser, LANGUAGE_LIST
|
||||||
|
|
||||||
from urllib import quote_plus
|
from urllib import quote_plus
|
||||||
|
|
||||||
|
|
@ -52,4 +52,4 @@ class TvsubtitlesBackend(BaseBackend, ICapSubtitle):
|
||||||
def iter_subtitles(self, language, pattern):
|
def iter_subtitles(self, language, pattern):
|
||||||
if language not in LANGUAGE_LIST:
|
if language not in LANGUAGE_LIST:
|
||||||
raise LanguageNotSupported()
|
raise LanguageNotSupported()
|
||||||
return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8')))
|
return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8')))
|
||||||
|
|
|
||||||
|
|
@ -20,14 +20,14 @@
|
||||||
|
|
||||||
from weboob.tools.browser import BaseBrowser
|
from weboob.tools.browser import BaseBrowser
|
||||||
|
|
||||||
from .pages import SeriePage, SearchPage, SeasonPage,HomePage
|
from .pages import SeriePage, SearchPage, SeasonPage, HomePage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['TvsubtitlesBrowser']
|
__all__ = ['TvsubtitlesBrowser']
|
||||||
|
|
||||||
LANGUAGE_LIST = ['en','es','fr','de','br','ru','ua','it','gr',
|
LANGUAGE_LIST = ['en', 'es', 'fr', 'de', 'br', 'ru', 'ua', 'it', 'gr',
|
||||||
'ar','hu','pl','tr','nl','pt','sv','da','fi',
|
'ar', 'hu', 'pl', 'tr', 'nl', 'pt', 'sv', 'da', 'fi',
|
||||||
'ko','cn','jp','bg','cz','ro']
|
'ko', 'cn', 'jp', 'bg', 'cz', 'ro']
|
||||||
|
|
||||||
|
|
||||||
class TvsubtitlesBrowser(BaseBrowser):
|
class TvsubtitlesBrowser(BaseBrowser):
|
||||||
|
|
@ -39,13 +39,13 @@ class TvsubtitlesBrowser(BaseBrowser):
|
||||||
'http://www.tvsubtitles.net': HomePage,
|
'http://www.tvsubtitles.net': HomePage,
|
||||||
'http://www.tvsubtitles.net/search.php': SearchPage,
|
'http://www.tvsubtitles.net/search.php': SearchPage,
|
||||||
'http://www.tvsubtitles.net/tvshow-.*.html': SeriePage,
|
'http://www.tvsubtitles.net/tvshow-.*.html': SeriePage,
|
||||||
'http://www.tvsubtitles.net/subtitle-[0-9]*-[0-9]*-.*.html' : SeasonPage
|
'http://www.tvsubtitles.net/subtitle-[0-9]*-[0-9]*-.*.html': SeasonPage
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_subtitles(self, language, pattern):
|
def iter_subtitles(self, language, pattern):
|
||||||
self.location('http://www.tvsubtitles.net')
|
self.location('http://www.tvsubtitles.net')
|
||||||
assert self.is_on_page(HomePage)
|
assert self.is_on_page(HomePage)
|
||||||
return self.page.iter_subtitles(language,pattern)
|
return self.page.iter_subtitles(language, pattern)
|
||||||
|
|
||||||
def get_subtitle(self, id):
|
def get_subtitle(self, id):
|
||||||
self.location('http://www.tvsubtitles.net/subtitle-%s.html' % id)
|
self.location('http://www.tvsubtitles.net/subtitle-%s.html' % id)
|
||||||
|
|
|
||||||
|
|
@ -22,11 +22,11 @@ from weboob.capabilities.subtitle import Subtitle
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['HomePage','SearchPage','SeriePage','SeasonPage']
|
__all__ = ['HomePage', 'SearchPage', 'SeriePage', 'SeasonPage']
|
||||||
|
|
||||||
|
|
||||||
class HomePage(BasePage):
|
class HomePage(BasePage):
|
||||||
def iter_subtitles(self,language,pattern):
|
def iter_subtitles(self, language, pattern):
|
||||||
self.browser.select_form(nr=0)
|
self.browser.select_form(nr=0)
|
||||||
self.browser['q'] = pattern.encode('utf-8')
|
self.browser['q'] = pattern.encode('utf-8')
|
||||||
self.browser.submit()
|
self.browser.submit()
|
||||||
|
|
@ -38,15 +38,15 @@ class HomePage(BasePage):
|
||||||
class SearchPage(BasePage):
|
class SearchPage(BasePage):
|
||||||
""" Page which contains results as a list of series
|
""" Page which contains results as a list of series
|
||||||
"""
|
"""
|
||||||
def iter_subtitles(self,language):
|
def iter_subtitles(self, language):
|
||||||
list_result = self.parser.select(self.document.getroot(),'div.left_articles ul')
|
list_result = self.parser.select(self.document.getroot(), 'div.left_articles ul')
|
||||||
if len(list_result) > 0:
|
if len(list_result) > 0:
|
||||||
li_result = self.parser.select(list_result[0],'li')
|
li_result = self.parser.select(list_result[0], 'li')
|
||||||
for line in li_result:
|
for line in li_result:
|
||||||
if len(self.parser.select(line,'img[alt=%s]'%language)) > 0:
|
if len(self.parser.select(line, 'img[alt=%s]' % language)) > 0:
|
||||||
link = self.parser.select(line,'a',1)
|
link = self.parser.select(line, 'a', 1)
|
||||||
href = link.attrib.get('href','')
|
href = link.attrib.get('href', '')
|
||||||
self.browser.location("http://%s%s"%(self.browser.DOMAIN,href))
|
self.browser.location("http://%s%s" % (self.browser.DOMAIN, href))
|
||||||
assert self.browser.is_on_page(SeriePage)
|
assert self.browser.is_on_page(SeriePage)
|
||||||
for subtitle in self.browser.page.iter_subtitles(language):
|
for subtitle in self.browser.page.iter_subtitles(language):
|
||||||
yield subtitle
|
yield subtitle
|
||||||
|
|
@ -55,26 +55,27 @@ class SearchPage(BasePage):
|
||||||
class SeriePage(BasePage):
|
class SeriePage(BasePage):
|
||||||
""" Page of all seasons
|
""" Page of all seasons
|
||||||
"""
|
"""
|
||||||
def iter_subtitles(self,language,only_one_season=False):
|
def iter_subtitles(self, language, only_one_season=False):
|
||||||
# handle the current season
|
# handle the current season
|
||||||
last_table_line = self.parser.select(self.document.getroot(),'table#table5 tr')[-1]
|
last_table_line = self.parser.select(self.document.getroot(), 'table#table5 tr')[-1]
|
||||||
amount = int(self.parser.select(last_table_line,'td')[2].text_content())
|
amount = int(self.parser.select(last_table_line, 'td')[2].text_content())
|
||||||
if amount > 0:
|
if amount > 0:
|
||||||
my_lang_img = self.parser.select(last_table_line,'img[alt=%s]'%language)
|
my_lang_img = self.parser.select(last_table_line, 'img[alt=%s]' % language)
|
||||||
if len(my_lang_img) > 0:
|
if len(my_lang_img) > 0:
|
||||||
url_current_season = self.browser.geturl().split('/')[-1].replace('tvshow','subtitle').replace('.html','-%s.html'%language)
|
url_current_season = self.browser.geturl().split('/')[-1].replace(
|
||||||
|
'tvshow', 'subtitle').replace('.html', '-%s.html' % language)
|
||||||
self.browser.location(url_current_season)
|
self.browser.location(url_current_season)
|
||||||
assert self.browser.is_on_page(SeasonPage)
|
assert self.browser.is_on_page(SeasonPage)
|
||||||
yield self.browser.page.iter_subtitles()
|
yield self.browser.page.iter_subtitles()
|
||||||
|
|
||||||
if not only_one_season:
|
if not only_one_season:
|
||||||
# handle the other seasons by following top links
|
# handle the other seasons by following top links
|
||||||
other_seasons_links = self.parser.select(self.document.getroot(),'p.description a')
|
other_seasons_links = self.parser.select(self.document.getroot(), 'p.description a')
|
||||||
for link in other_seasons_links:
|
for link in other_seasons_links:
|
||||||
href = link.attrib.get('href','')
|
href = link.attrib.get('href', '')
|
||||||
self.browser.location("http://%s/%s"%(self.browser.DOMAIN,href))
|
self.browser.location("http://%s/%s" % (self.browser.DOMAIN, href))
|
||||||
assert self.browser.is_on_page(SeriePage)
|
assert self.browser.is_on_page(SeriePage)
|
||||||
for subtitle in self.browser.page.iter_subtitles(language,True):
|
for subtitle in self.browser.page.iter_subtitles(language, True):
|
||||||
yield subtitle
|
yield subtitle
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -82,19 +83,19 @@ class SeasonPage(BasePage):
|
||||||
""" Page of a season with the right language
|
""" Page of a season with the right language
|
||||||
"""
|
"""
|
||||||
def get_subtitle(self):
|
def get_subtitle(self):
|
||||||
filename_line = self.parser.select(self.document.getroot(),'img[alt=filename]',1).getparent().getparent()
|
filename_line = self.parser.select(self.document.getroot(), 'img[alt=filename]', 1).getparent().getparent()
|
||||||
name = unicode(self.parser.select(filename_line,'td')[2].text)
|
name = unicode(self.parser.select(filename_line, 'td')[2].text)
|
||||||
id = self.browser.geturl().split('/')[-1].replace('.html','').replace('subtitle-','')
|
id = self.browser.geturl().split('/')[-1].replace('.html', '').replace('subtitle-', '')
|
||||||
url = unicode('http://%s/download-%s.html'%(self.browser.DOMAIN,id))
|
url = unicode('http://%s/download-%s.html' % (self.browser.DOMAIN, id))
|
||||||
amount_line = self.parser.select(self.document.getroot(),'tr[title~=amount]',1)
|
amount_line = self.parser.select(self.document.getroot(), 'tr[title~=amount]', 1)
|
||||||
nb_cd = int(self.parser.select(amount_line,'td')[2].text)
|
nb_cd = int(self.parser.select(amount_line, 'td')[2].text)
|
||||||
lang = unicode(url.split('-')[-1].split('.html')[0])
|
lang = unicode(url.split('-')[-1].split('.html')[0])
|
||||||
filenames_line = self.parser.select(self.document.getroot(),'tr[title~=list]',1)
|
filenames_line = self.parser.select(self.document.getroot(), 'tr[title~=list]', 1)
|
||||||
file_names = self.parser.select(filenames_line,'td')[2].text_content().strip().replace('.srt','.srt\n')
|
file_names = self.parser.select(filenames_line, 'td')[2].text_content().strip().replace('.srt', '.srt\n')
|
||||||
desc = u"files :\n"
|
desc = u"files :\n"
|
||||||
desc += file_names
|
desc += file_names
|
||||||
|
|
||||||
subtitle = Subtitle(id,name)
|
subtitle = Subtitle(id, name)
|
||||||
subtitle.url = url
|
subtitle.url = url
|
||||||
subtitle.language = lang
|
subtitle.language = lang
|
||||||
subtitle.nb_cd = nb_cd
|
subtitle.nb_cd = nb_cd
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ class TvsubtitlesTest(BackendTest):
|
||||||
BACKEND = 'tvsubtitles'
|
BACKEND = 'tvsubtitles'
|
||||||
|
|
||||||
def test_subtitle(self):
|
def test_subtitle(self):
|
||||||
subtitles = list(self.backend.iter_subtitles('fr','sopranos'))
|
subtitles = list(self.backend.iter_subtitles('fr', 'sopranos'))
|
||||||
assert (len(subtitles) > 0)
|
assert (len(subtitles) > 0)
|
||||||
for subtitle in subtitles:
|
for subtitle in subtitles:
|
||||||
assert subtitle.url.startswith('http')
|
assert subtitle.url.startswith('http')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue