autopep8 with 120 chars line length on my modules

This commit is contained in:
Julien Veyssier 2013-03-16 01:39:35 +01:00
commit 5d923bc73b
39 changed files with 434 additions and 426 deletions

View file

@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.recipe import ICapRecipe,Recipe from weboob.capabilities.recipe import ICapRecipe, Recipe
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend
from .browser import SevenFiftyGramsBrowser from .browser import SevenFiftyGramsBrowser
@ -46,16 +46,16 @@ class SevenFiftyGramsBackend(BaseBackend, ICapRecipe):
def fill_recipe(self, recipe, fields): def fill_recipe(self, recipe, fields):
if 'nb_person' in fields or 'instructions' in fields: if 'nb_person' in fields or 'instructions' in fields:
rec = self.get_recipe(recipe.id) rec = self.get_recipe(recipe.id)
recipe.picture_url = rec.picture_url recipe.picture_url = rec.picture_url
recipe.instructions = rec.instructions recipe.instructions = rec.instructions
recipe.ingredients = rec.ingredients recipe.ingredients = rec.ingredients
recipe.comments = rec.comments recipe.comments = rec.comments
recipe.nb_person = rec.nb_person recipe.nb_person = rec.nb_person
recipe.cooking_time = rec.cooking_time recipe.cooking_time = rec.cooking_time
recipe.preparation_time = rec.preparation_time recipe.preparation_time = rec.preparation_time
return recipe return recipe
OBJECTS = { OBJECTS = {
Recipe:fill_recipe, Recipe: fill_recipe,
} }

View file

@ -34,10 +34,10 @@ class SevenFiftyGramsBrowser(BaseBrowser):
PAGES = { PAGES = {
'http://www.750g.com/recettes_.*.htm': ResultsPage, 'http://www.750g.com/recettes_.*.htm': ResultsPage,
'http://www.750g.com/fiche_de_cuisine_complete.htm\?recettes_id=[0-9]*': RecipePage, 'http://www.750g.com/fiche_de_cuisine_complete.htm\?recettes_id=[0-9]*': RecipePage,
} }
def iter_recipes(self, pattern): def iter_recipes(self, pattern):
self.location('http://www.750g.com/recettes_%s.htm' % (pattern.replace(' ','_'))) self.location('http://www.750g.com/recettes_%s.htm' % (pattern.replace(' ', '_')))
assert self.is_on_page(ResultsPage) assert self.is_on_page(ResultsPage)
return self.page.iter_recipes() return self.page.iter_recipes()

View file

@ -30,35 +30,36 @@ class ResultsPage(BasePage):
""" Page which contains results as a list of recipies """ Page which contains results as a list of recipies
""" """
def iter_recipes(self): def iter_recipes(self):
for div in self.parser.select(self.document.getroot(),'div.recette_description > div.data'): for div in self.parser.select(self.document.getroot(), 'div.recette_description > div.data'):
links = self.parser.select(div,'div.info > p.title > a.fn') links = self.parser.select(div, 'div.info > p.title > a.fn')
if len(links) > 0: if len(links) > 0:
link = links[0] link = links[0]
title = unicode(link.text) title = unicode(link.text)
#id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm')) # id = unicode(link.attrib.get('href','').strip('/').replace('.htm','htm'))
id = unicode(self.parser.select(div,'div.carnet-add a',1).attrib.get('href','').split('=')[-1]) id = unicode(self.parser.select(div, 'div.carnet-add a', 1).attrib.get('href', '').split('=')[-1])
thumbnail_url = NotAvailable thumbnail_url = NotAvailable
short_description = NotAvailable short_description = NotAvailable
imgs = self.parser.select(div,'img.recipe-image') imgs = self.parser.select(div, 'img.recipe-image')
if len(imgs) > 0: if len(imgs) > 0:
thumbnail_url = unicode(imgs[0].attrib.get('src','')) thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
short_description = unicode(' '.join(self.parser.select(div,'div.infos_column',1).text_content().split()).strip()) short_description = unicode(' '.join(self.parser.select(
imgs_cost = self.parser.select(div,'div.infos_column img') div, 'div.infos_column', 1).text_content().split()).strip())
imgs_cost = self.parser.select(div, 'div.infos_column img')
cost_tot = len(imgs_cost) cost_tot = len(imgs_cost)
cost_on = 0 cost_on = 0
for img in imgs_cost: for img in imgs_cost:
if img.attrib.get('src','').endswith('euro_on.png'): if img.attrib.get('src', '').endswith('euro_on.png'):
cost_on += 1 cost_on += 1
short_description += u' %s/%s'%(cost_on,cost_tot) short_description += u' %s/%s' % (cost_on, cost_tot)
recipe = Recipe(id,title) recipe = Recipe(id, title)
recipe.thumbnail_url = thumbnail_url recipe.thumbnail_url = thumbnail_url
recipe.short_description= short_description recipe.short_description = short_description
recipe.instructions = NotLoaded recipe.instructions = NotLoaded
recipe.ingredients = NotLoaded recipe.ingredients = NotLoaded
recipe.nb_person = NotLoaded recipe.nb_person = NotLoaded
recipe.cooking_time = NotLoaded recipe.cooking_time = NotLoaded
recipe.preparation_time = NotLoaded recipe.preparation_time = NotLoaded
yield recipe yield recipe
@ -76,10 +77,10 @@ class RecipePage(BasePage):
instructions = NotAvailable instructions = NotAvailable
comments = [] comments = []
title = unicode(self.parser.select(self.document.getroot(),'head > title',1).text.split(' - ')[1]) title = unicode(self.parser.select(self.document.getroot(), 'head > title', 1).text.split(' - ')[1])
main = self.parser.select(self.document.getroot(),'div.recette_description',1) main = self.parser.select(self.document.getroot(), 'div.recette_description', 1)
rec_infos = self.parser.select(self.document.getroot(),'div.recette_infos div.infos_column strong') rec_infos = self.parser.select(self.document.getroot(), 'div.recette_infos div.infos_column strong')
for info_title in rec_infos: for info_title in rec_infos:
if u'Temps de préparation' in unicode(info_title.text): if u'Temps de préparation' in unicode(info_title.text):
if info_title.tail.strip() != '': if info_title.tail.strip() != '':
@ -96,31 +97,31 @@ class RecipePage(BasePage):
nb_person = int(info_title.tail) nb_person = int(info_title.tail)
ingredients = [] ingredients = []
p_ing = self.parser.select(main,'div.data.top.left > div.content p') p_ing = self.parser.select(main, 'div.data.top.left > div.content p')
for ing in p_ing: for ing in p_ing:
ingtxt = unicode(ing.text_content().strip()) ingtxt = unicode(ing.text_content().strip())
if ingtxt != '': if ingtxt != '':
ingredients.append(ingtxt) ingredients.append(ingtxt)
lines_instr = self.parser.select(main,'div.data.top.right div.content li') lines_instr = self.parser.select(main, 'div.data.top.right div.content li')
if len(lines_instr) > 0: if len(lines_instr) > 0:
instructions = u'' instructions = u''
for line in lines_instr: for line in lines_instr:
inst = ' '.join(line.text_content().strip().split()) inst = ' '.join(line.text_content().strip().split())
instructions += '%s\n'% inst instructions += '%s\n' % inst
instructions = instructions.strip('\n') instructions = instructions.strip('\n')
imgillu = self.parser.select(self.document.getroot(),'div.resume_recette_illustree img.photo') imgillu = self.parser.select(self.document.getroot(), 'div.resume_recette_illustree img.photo')
if len(imgillu) > 0: if len(imgillu) > 0:
picture_url = unicode(imgillu[0].attrib.get('src','')) picture_url = unicode(imgillu[0].attrib.get('src', ''))
for divcom in self.parser.select(self.document.getroot(),'div.comment-outer'): for divcom in self.parser.select(self.document.getroot(), 'div.comment-outer'):
comtxt = unicode(' '.join(divcom.text_content().strip().split())) comtxt = unicode(' '.join(divcom.text_content().strip().split()))
if u'| Répondre' in comtxt: if u'| Répondre' in comtxt:
comtxt = comtxt.strip('0123456789').replace(u' | Répondre','') comtxt = comtxt.strip('0123456789').replace(u' | Répondre', '')
comments.append(comtxt) comments.append(comtxt)
recipe = Recipe(id,title) recipe = Recipe(id, title)
recipe.preparation_time = preparation_time recipe.preparation_time = preparation_time
recipe.cooking_time = cooking_time recipe.cooking_time = cooking_time
recipe.nb_person = nb_person recipe.nb_person = nb_person

View file

@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.subtitle import ICapSubtitle,LanguageNotSupported from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend
from .browser import AttilasubBrowser from .browser import AttilasubBrowser
@ -53,4 +53,4 @@ class AttilasubBackend(BaseBackend, ICapSubtitle):
def iter_subtitles(self, language, pattern): def iter_subtitles(self, language, pattern):
if language not in self.LANGUAGE_LIST: if language not in self.LANGUAGE_LIST:
raise LanguageNotSupported() raise LanguageNotSupported()
return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8'))) return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8')))

View file

@ -34,12 +34,13 @@ class AttilasubBrowser(BaseBrowser):
PAGES = { PAGES = {
'http://search.freefind.com/find.html.*': SearchPage, 'http://search.freefind.com/find.html.*': SearchPage,
'http://davidbillemont3.free.fr/.*.htm': SubtitlesPage, 'http://davidbillemont3.free.fr/.*.htm': SubtitlesPage,
} }
def iter_subtitles(self, language, pattern): def iter_subtitles(self, language, pattern):
self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' % pattern.encode('utf-8')) self.location('http://search.freefind.com/find.html?id=81131980&_charset_=&bcd=%%F7&scs=1&pageid=r&query=%s&mode=Find%%20pages%%20matching%%20ALL%%20words' %
pattern.encode('utf-8'))
assert self.is_on_page(SearchPage) assert self.is_on_page(SearchPage)
return self.page.iter_subtitles(language,pattern) return self.page.iter_subtitles(language, pattern)
def get_subtitle(self, id): def get_subtitle(self, id):
url_end = id.split('|')[0] url_end = id.split('|')[0]

View file

@ -23,16 +23,16 @@ from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
__all__ = ['SubtitlesPage','SearchPage'] __all__ = ['SubtitlesPage', 'SearchPage']
class SearchPage(BasePage): class SearchPage(BasePage):
def iter_subtitles(self, language, pattern): def iter_subtitles(self, language, pattern):
fontresult = self.parser.select(self.document.getroot(),'div.search-results font.search-results') fontresult = self.parser.select(self.document.getroot(), 'div.search-results font.search-results')
# for each result in freefind, explore the subtitle list page to iter subtitles # for each result in freefind, explore the subtitle list page to iter subtitles
for res in fontresult: for res in fontresult:
a = self.parser.select(res,'a',1) a = self.parser.select(res, 'a', 1)
url = a.attrib.get('href','') url = a.attrib.get('href', '')
self.browser.location(url) self.browser.location(url)
assert self.browser.is_on_page(SubtitlesPage) assert self.browser.is_on_page(SubtitlesPage)
# subtitles page does the job # subtitles page does the job
@ -41,15 +41,15 @@ class SearchPage(BasePage):
class SubtitlesPage(BasePage): class SubtitlesPage(BasePage):
def get_subtitle(self,id): def get_subtitle(self, id):
href = id.split('|')[1] href = id.split('|')[1]
# we have to find the 'tr' which contains the link to this address # we have to find the 'tr' which contains the link to this address
a = self.parser.select(self.document.getroot(),'a[href="%s"]'%href,1) a = self.parser.select(self.document.getroot(), 'a[href="%s"]' % href, 1)
line = a.getparent().getparent().getparent().getparent().getparent() line = a.getparent().getparent().getparent().getparent().getparent()
cols = self.parser.select(line,'td') cols = self.parser.select(line, 'td')
traduced_title = self.parser.select(cols[0],'font',1).text.lower() traduced_title = self.parser.select(cols[0], 'font', 1).text.lower()
original_title = self.parser.select(cols[1],'font',1).text.lower() original_title = self.parser.select(cols[1], 'font', 1).text.lower()
nb_cd = self.parser.select(cols[2],'font',1).text.strip() nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
nb_cd = int(nb_cd.split()[0]) nb_cd = int(nb_cd.split()[0])
traduced_title_words = traduced_title.split() traduced_title_words = traduced_title.split()
@ -59,30 +59,30 @@ class SubtitlesPage(BasePage):
traduced_title = " ".join(traduced_title_words) traduced_title = " ".join(traduced_title_words)
original_title = " ".join(original_title_words) original_title = " ".join(original_title_words)
name = unicode('%s (%s)'%(original_title,traduced_title)) name = unicode('%s (%s)' % (original_title, traduced_title))
url = unicode('http://davidbillemont3.free.fr/%s'%href) url = unicode('http://davidbillemont3.free.fr/%s' % href)
subtitle = Subtitle(id,name) subtitle = Subtitle(id, name)
subtitle.url = url subtitle.url = url
subtitle.language = unicode('fr') subtitle.language = unicode('fr')
subtitle.nb_cd = nb_cd subtitle.nb_cd = nb_cd
subtitle.description = NotAvailable subtitle.description = NotAvailable
return subtitle return subtitle
def iter_subtitles(self,language, pattern): def iter_subtitles(self, language, pattern):
pattern = pattern.strip().replace('+',' ').lower() pattern = pattern.strip().replace('+', ' ').lower()
pattern_words = pattern.split() pattern_words = pattern.split()
tab = self.parser.select(self.document.getroot(),'table[bordercolor="#B8C0B2"]') tab = self.parser.select(self.document.getroot(), 'table[bordercolor="#B8C0B2"]')
if len(tab) == 0: if len(tab) == 0:
tab = self.parser.select(self.document.getroot(),'table[bordercolordark="#B8C0B2"]') tab = self.parser.select(self.document.getroot(), 'table[bordercolordark="#B8C0B2"]')
if len(tab) == 0: if len(tab) == 0:
return return
# some results of freefind point on useless pages # some results of freefind point on useless pages
if tab[0].attrib.get('width','') != '100%': if tab[0].attrib.get('width', '') != '100%':
return return
for line in tab[0].getiterator('tr'): for line in tab[0].getiterator('tr'):
cols = self.parser.select(line,'td') cols = self.parser.select(line, 'td')
traduced_title = self.parser.select(cols[0],'font',1).text.lower() traduced_title = self.parser.select(cols[0], 'font', 1).text.lower()
original_title = self.parser.select(cols[1],'font',1).text.lower() original_title = self.parser.select(cols[1], 'font', 1).text.lower()
traduced_title_words = traduced_title.split() traduced_title_words = traduced_title.split()
original_title_words = original_title.split() original_title_words = original_title.split()
@ -98,13 +98,13 @@ class SubtitlesPage(BasePage):
traduced_title = " ".join(traduced_title_words) traduced_title = " ".join(traduced_title_words)
original_title = " ".join(original_title_words) original_title = " ".join(original_title_words)
nb_cd = self.parser.select(cols[2],'font',1).text.strip() nb_cd = self.parser.select(cols[2], 'font', 1).text.strip()
nb_cd = int(nb_cd.strip(' CD')) nb_cd = int(nb_cd.strip(' CD'))
name = unicode('%s (%s)'%(original_title,traduced_title)) name = unicode('%s (%s)' % (original_title, traduced_title))
href = self.parser.select(cols[3],'a',1).attrib.get('href','') href = self.parser.select(cols[3], 'a', 1).attrib.get('href', '')
url = unicode('http://davidbillemont3.free.fr/%s'%href) url = unicode('http://davidbillemont3.free.fr/%s' % href)
id = unicode('%s|%s'%(self.browser.geturl().split('/')[-1],href)) id = unicode('%s|%s' % (self.browser.geturl().split('/')[-1], href))
subtitle = Subtitle(id,name) subtitle = Subtitle(id, name)
subtitle.url = url subtitle.url = url
subtitle.language = unicode('fr') subtitle.language = unicode('fr')
subtitle.nb_cd = nb_cd subtitle.nb_cd = nb_cd

View file

@ -27,7 +27,7 @@ class AttilasubTest(BackendTest):
BACKEND = 'attilasub' BACKEND = 'attilasub'
def test_subtitle(self): def test_subtitle(self):
subtitles = list(self.backend.iter_subtitles('fr','spiderman')) subtitles = list(self.backend.iter_subtitles('fr', 'spiderman'))
assert (len(subtitles) > 0) assert (len(subtitles) > 0)
for subtitle in subtitles: for subtitle in subtitles:
path, qs = urllib.splitquery(subtitle.url) path, qs = urllib.splitquery(subtitle.url)

View file

@ -67,25 +67,25 @@ class ImdbBackend(BaseBackend, ICapCinema):
return self.browser.get_person_biography(id) return self.browser.get_person_biography(id)
def get_movie_releases(self, id, country=None): def get_movie_releases(self, id, country=None):
return self.browser.get_movie_releases(id,country) return self.browser.get_movie_releases(id, country)
def fill_person(self, person, fields): def fill_person(self, person, fields):
if 'real_name' in fields or 'birth_place' in fields\ if 'real_name' in fields or 'birth_place' in fields\
or 'death_date' in fields or 'nationality' in fields\ or 'death_date' in fields or 'nationality' in fields\
or 'short_biography' in fields or 'roles' in fields\ or 'short_biography' in fields or 'roles' in fields\
or 'birth_date' in fields or 'thumbnail_url' in fields\ or 'birth_date' in fields or 'thumbnail_url' in fields\
or 'gender' in fields or fields is None: or 'gender' in fields or fields is None:
per = self.get_person(person.id) per = self.get_person(person.id)
person.real_name = per.real_name person.real_name = per.real_name
person.birth_date = per.birth_date person.birth_date = per.birth_date
person.death_date = per.death_date person.death_date = per.death_date
person.birth_place = per.birth_place person.birth_place = per.birth_place
person.gender = per.gender person.gender = per.gender
person.nationality = per.nationality person.nationality = per.nationality
person.short_biography = per.short_biography person.short_biography = per.short_biography
person.short_description = per.short_description person.short_description = per.short_description
person.roles = per.roles person.roles = per.roles
person.thumbnail_url = per.thumbnail_url person.thumbnail_url = per.thumbnail_url
if 'biography' in fields: if 'biography' in fields:
person.biography = self.get_person_biography(person.id) person.biography = self.get_person_biography(person.id)
@ -94,19 +94,19 @@ class ImdbBackend(BaseBackend, ICapCinema):
def fill_movie(self, movie, fields): def fill_movie(self, movie, fields):
if 'other_titles' in fields or 'release_date' in fields\ if 'other_titles' in fields or 'release_date' in fields\
or 'duration' in fields or 'country' in fields\ or 'duration' in fields or 'country' in fields\
or 'roles' in fields or 'note' in fields\ or 'roles' in fields or 'note' in fields\
or 'thumbnail_url' in fields: or 'thumbnail_url' in fields:
mov = self.get_movie(movie.id) mov = self.get_movie(movie.id)
movie.other_titles = mov.other_titles movie.other_titles = mov.other_titles
movie.release_date = mov.release_date movie.release_date = mov.release_date
movie.duration = mov.duration movie.duration = mov.duration
movie.pitch = mov.pitch movie.pitch = mov.pitch
movie.country = mov.country movie.country = mov.country
movie.note = mov.note movie.note = mov.note
movie.roles = mov.roles movie.roles = mov.roles
movie.short_description= mov.short_description movie.short_description = mov.short_description
movie.thumbnail_url = mov.thumbnail_url movie.thumbnail_url = mov.thumbnail_url
if 'all_release_dates' in fields: if 'all_release_dates' in fields:
movie.all_release_dates = self.get_movie_releases(movie.id) movie.all_release_dates = self.get_movie_releases(movie.id)
@ -114,6 +114,6 @@ class ImdbBackend(BaseBackend, ICapCinema):
return movie return movie
OBJECTS = { OBJECTS = {
Person:fill_person, Person: fill_person,
Movie:fill_movie Movie: fill_movie
} }

View file

@ -42,53 +42,55 @@ class ImdbBrowser(BaseBrowser):
'http://www.imdb.com/name/nm[0-9]*/*': PersonPage, 'http://www.imdb.com/name/nm[0-9]*/*': PersonPage,
'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage, 'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage,
'http://www.imdb.com/name/nm[0-9]*/filmo.*': FilmographyPage, 'http://www.imdb.com/name/nm[0-9]*/filmo.*': FilmographyPage,
} }
def iter_movies(self, pattern): def iter_movies(self, pattern):
res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q=%s' % pattern.encode('utf-8')) res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q=%s' % pattern.encode('utf-8'))
jres = json.loads(res) jres = json.loads(res)
for cat in ['title_popular','title_exact','title_approx']: for cat in ['title_popular', 'title_exact', 'title_approx']:
if cat in jres: if cat in jres:
for m in jres[cat]: for m in jres[cat]:
tdesc = unicode(m['title_description']) tdesc = unicode(m['title_description'])
if '<a' in tdesc and '>' in tdesc: if '<a' in tdesc and '>' in tdesc:
short_description = u'%s %s'%(tdesc.split('<')[0].strip(', '), tdesc.split('>')[1].split('<')[0]) short_description = u'%s %s' % (tdesc.split('<')[
0].strip(', '), tdesc.split('>')[1].split('<')[0])
else: else:
short_description = tdesc.strip(', ') short_description = tdesc.strip(', ')
movie = Movie(m['id'],latin2unicode(m['title'])) movie = Movie(m['id'], latin2unicode(m['title']))
movie.other_titles = NotLoaded movie.other_titles = NotLoaded
movie.release_date = NotLoaded movie.release_date = NotLoaded
movie.duration = NotLoaded movie.duration = NotLoaded
movie.short_description = latin2unicode(short_description) movie.short_description = latin2unicode(short_description)
movie.pitch = NotLoaded movie.pitch = NotLoaded
movie.country = NotLoaded movie.country = NotLoaded
movie.note = NotLoaded movie.note = NotLoaded
movie.roles = NotLoaded movie.roles = NotLoaded
movie.all_release_dates= NotLoaded movie.all_release_dates = NotLoaded
movie.thumbnail_url = NotLoaded movie.thumbnail_url = NotLoaded
yield movie yield movie
def iter_persons(self, pattern): def iter_persons(self, pattern):
res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&nm=on&q=%s' % pattern.encode('utf-8')) res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&nm=on&q=%s' % pattern.encode('utf-8'))
jres = json.loads(res) jres = json.loads(res)
for cat in ['name_popular','name_exact','name_approx']: for cat in ['name_popular', 'name_exact', 'name_approx']:
if cat in jres: if cat in jres:
for p in jres[cat]: for p in jres[cat]:
person = Person(p['id'],latin2unicode(p['name'])) person = Person(p['id'], latin2unicode(p['name']))
person.real_name = NotLoaded person.real_name = NotLoaded
person.birth_place = NotLoaded person.birth_place = NotLoaded
person.birth_date = NotLoaded person.birth_date = NotLoaded
person.death_date = NotLoaded person.death_date = NotLoaded
person.gender = NotLoaded person.gender = NotLoaded
person.nationality = NotLoaded person.nationality = NotLoaded
person.short_biography= NotLoaded person.short_biography = NotLoaded
person.short_description= latin2unicode(p['description']) person.short_description = latin2unicode(p['description'])
person.roles = NotLoaded person.roles = NotLoaded
person.thumbnail_url = NotLoaded person.thumbnail_url = NotLoaded
yield person yield person
def get_movie(self, id): def get_movie(self, id):
res = self.readurl('http://imdbapi.org/?id=%s&type=json&plot=simple&episode=1&lang=en-US&aka=full&release=simple&business=0&tech=0' % id ) res = self.readurl(
'http://imdbapi.org/?id=%s&type=json&plot=simple&episode=1&lang=en-US&aka=full&release=simple&business=0&tech=0' % id)
if res is not None: if res is not None:
jres = json.loads(res) jres = json.loads(res)
else: else:
@ -122,7 +124,7 @@ class ImdbBrowser(BaseBrowser):
if 'also_known_as' in jres: if 'also_known_as' in jres:
for other_t in jres['also_known_as']: for other_t in jres['also_known_as']:
if 'country' in other_t and 'title' in other_t: if 'country' in other_t and 'title' in other_t:
other_titles.append('%s : %s' % (other_t['country'],htmlparser.unescape(other_t['title']))) other_titles.append('%s : %s' % (other_t['country'], htmlparser.unescape(other_t['title'])))
if 'release_date' in jres: if 'release_date' in jres:
dstr = str(jres['release_date']) dstr = str(jres['release_date'])
year = int(dstr[:4]) year = int(dstr[:4])
@ -134,31 +136,31 @@ class ImdbBrowser(BaseBrowser):
day = int(dstr[-2:]) day = int(dstr[-2:])
if day == 0: if day == 0:
day = 1 day = 1
release_date = datetime(year,month,day) release_date = datetime(year, month, day)
if 'country' in jres: if 'country' in jres:
country = u'' country = u''
for c in jres['country']: for c in jres['country']:
country += '%s, '%c country += '%s, ' % c
country = country[:-2] country = country[:-2]
if 'plot_simple' in jres: if 'plot_simple' in jres:
pitch = unicode(jres['plot_simple']) pitch = unicode(jres['plot_simple'])
if 'rating' in jres and 'rating_count' in jres: if 'rating' in jres and 'rating_count' in jres:
note = u'%s/10 (%s votes)'%(jres['rating'],jres['rating_count']) note = u'%s/10 (%s votes)' % (jres['rating'], jres['rating_count'])
for r in ['actor','director','writer']: for r in ['actor', 'director', 'writer']:
if '%ss'%r in jres: if '%ss' % r in jres:
roles['%s'%r] = list(jres['%ss'%r]) roles['%s' % r] = list(jres['%ss' % r])
movie = Movie(id,title) movie = Movie(id, title)
movie.other_titles = other_titles movie.other_titles = other_titles
movie.release_date = release_date movie.release_date = release_date
movie.duration = duration movie.duration = duration
movie.pitch = pitch movie.pitch = pitch
movie.country = country movie.country = country
movie.note = note movie.note = note
movie.roles = roles movie.roles = roles
movie.short_description= short_description movie.short_description = short_description
movie.all_release_dates= NotLoaded movie.all_release_dates = NotLoaded
movie.thumbnail_url = thumbnail_url movie.thumbnail_url = thumbnail_url
return movie return movie
def get_person(self, id): def get_person(self, id):
@ -175,7 +177,7 @@ class ImdbBrowser(BaseBrowser):
return self.page.get_biography() return self.page.get_biography()
def iter_movie_persons(self, movie_id, role): def iter_movie_persons(self, movie_id, role):
self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id) self.location('http://www.imdb.com/title/%s/fullcredits' % movie_id)
assert self.is_on_page(MovieCrewPage) assert self.is_on_page(MovieCrewPage)
for p in self.page.iter_persons(role): for p in self.page.iter_persons(role):
yield p yield p
@ -192,13 +194,13 @@ class ImdbBrowser(BaseBrowser):
yield movie yield movie
def iter_movie_persons_ids(self, movie_id): def iter_movie_persons_ids(self, movie_id):
self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id) self.location('http://www.imdb.com/title/%s/fullcredits' % movie_id)
assert self.is_on_page(MovieCrewPage) assert self.is_on_page(MovieCrewPage)
for person in self.page.iter_persons_ids(): for person in self.page.iter_persons_ids():
yield person yield person
def get_movie_releases(self,id, country): def get_movie_releases(self, id, country):
self.location('http://www.imdb.com/title/%s/releaseinfo'%id) self.location('http://www.imdb.com/title/%s/releaseinfo' % id)
assert self.is_on_page(ReleasePage) assert self.is_on_page(ReleasePage)
return self.page.get_movie_releases(country) return self.page.get_movie_releases(country)
@ -222,5 +224,5 @@ dict_hex = {'&#xE1;': u'á',
def latin2unicode(word): def latin2unicode(word):
for key in dict_hex.keys(): for key in dict_hex.keys():
word = word.replace(key,dict_hex[key]) word = word.replace(key, dict_hex[key])
return unicode(word) return unicode(word)

View file

@ -25,28 +25,28 @@ from weboob.tools.browser import BasePage
from datetime import datetime from datetime import datetime
__all__ = ['PersonPage','MovieCrewPage','BiographyPage','FilmographyPage','ReleasePage'] __all__ = ['PersonPage', 'MovieCrewPage', 'BiographyPage', 'FilmographyPage', 'ReleasePage']
class ReleasePage(BasePage): class ReleasePage(BasePage):
''' Page containing releases of a movie ''' Page containing releases of a movie
''' '''
def get_movie_releases(self,country_filter): def get_movie_releases(self, country_filter):
result = unicode() result = unicode()
links = self.parser.select(self.document.getroot(),'b a') links = self.parser.select(self.document.getroot(), 'b a')
for a in links: for a in links:
href = a.attrib.get('href','') href = a.attrib.get('href', '')
if href.strip('/').split('/')[0] == 'calendar' and\ if href.strip('/').split('/')[0] == 'calendar' and\
(country_filter is None or href.split('region=')[-1].lower() == country_filter): (country_filter is None or href.split('region=')[-1].lower() == country_filter):
country = a.text country = a.text
td_date = self.parser.select(a.getparent().getparent().getparent(),'td')[1] td_date = self.parser.select(a.getparent().getparent().getparent(), 'td')[1]
date_links = self.parser.select(td_date,'a') date_links = self.parser.select(td_date, 'a')
if len(date_links) > 1: if len(date_links) > 1:
date = date_links[1].attrib.get('href','').strip('/').split('/')[-1] date = date_links[1].attrib.get('href', '').strip('/').split('/')[-1]
date += '-'+date_links[0].attrib.get('href','').strip('/').split('/')[-1] date += '-'+date_links[0].attrib.get('href', '').strip('/').split('/')[-1]
else: else:
date = unicode(self.parser.select(a.getparent().getparent().getparent(),'td')[1].text_content()) date = unicode(self.parser.select(a.getparent().getparent().getparent(), 'td')[1].text_content())
result += '%s : %s\n' % (country,date) result += '%s : %s\n' % (country, date)
if result == u'': if result == u'':
result = NotAvailable result = NotAvailable
else: else:
@ -59,11 +59,11 @@ class BiographyPage(BasePage):
''' '''
def get_biography(self): def get_biography(self):
bio = unicode() bio = unicode()
tn = self.parser.select(self.document.getroot(),'div#tn15content',1) tn = self.parser.select(self.document.getroot(), 'div#tn15content', 1)
# we only read paragraphs, titles and links # we only read paragraphs, titles and links
for ch in tn.getchildren(): for ch in tn.getchildren():
if ch.tag in ['p','h5','a']: if ch.tag in ['p', 'h5', 'a']:
bio += '%s\n\n'%ch.text_content().strip() bio += '%s\n\n' % ch.text_content().strip()
if bio == u'': if bio == u'':
bio = NotAvailable bio = NotAvailable
return bio return bio
@ -74,52 +74,52 @@ class MovieCrewPage(BasePage):
''' '''
def iter_persons(self, role_filter=None): def iter_persons(self, role_filter=None):
if (role_filter is None or (role_filter is not None and role_filter == 'actor')): if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
tables = self.parser.select(self.document.getroot(),'table.cast') tables = self.parser.select(self.document.getroot(), 'table.cast')
if len(tables) > 0: if len(tables) > 0:
table = tables[0] table = tables[0]
tds = self.parser.select(table,'td.nm') tds = self.parser.select(table, 'td.nm')
for td in tds: for td in tds:
id = td.find('a').attrib.get('href','').strip('/').split('/')[-1] id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
name = unicode(td.find('a').text) name = unicode(td.find('a').text)
char_name = unicode(self.parser.select(td.getparent(),'td.char',1).text_content()) char_name = unicode(self.parser.select(td.getparent(), 'td.char', 1).text_content())
person = Person(id,name) person = Person(id, name)
person.short_description = char_name person.short_description = char_name
person.real_name = NotLoaded person.real_name = NotLoaded
person.birth_place = NotLoaded person.birth_place = NotLoaded
person.birth_date = NotLoaded person.birth_date = NotLoaded
person.death_date = NotLoaded person.death_date = NotLoaded
person.gender = NotLoaded person.gender = NotLoaded
person.nationality = NotLoaded person.nationality = NotLoaded
person.short_biography= NotLoaded person.short_biography = NotLoaded
person.roles = NotLoaded person.roles = NotLoaded
person.thumbnail_url = NotLoaded person.thumbnail_url = NotLoaded
yield person yield person
for gloss_link in self.parser.select(self.document.getroot(),'table[cellspacing=1] h5 a'): for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing=1] h5 a'):
role = gloss_link.attrib.get('name','').rstrip('s') role = gloss_link.attrib.get('name', '').rstrip('s')
if (role_filter is None or (role_filter is not None and role == role_filter)): if (role_filter is None or (role_filter is not None and role == role_filter)):
tbody = gloss_link.getparent().getparent().getparent().getparent() tbody = gloss_link.getparent().getparent().getparent().getparent()
for line in self.parser.select(tbody,'tr')[1:]: for line in self.parser.select(tbody, 'tr')[1:]:
for a in self.parser.select(line,'a'): for a in self.parser.select(line, 'a'):
role_detail = NotAvailable role_detail = NotAvailable
href = a.attrib.get('href','') href = a.attrib.get('href', '')
if '/name/nm' in href: if '/name/nm' in href:
id = href.strip('/').split('/')[-1] id = href.strip('/').split('/')[-1]
name = unicode(a.text) name = unicode(a.text)
if 'glossary' in href: if 'glossary' in href:
role_detail = unicode(a.text) role_detail = unicode(a.text)
person = Person(id,name) person = Person(id, name)
person.short_description = role_detail person.short_description = role_detail
yield person yield person
#yield self.browser.get_person(id) # yield self.browser.get_person(id)
def iter_persons_ids(self): def iter_persons_ids(self):
tables = self.parser.select(self.document.getroot(),'table.cast') tables = self.parser.select(self.document.getroot(), 'table.cast')
if len(tables) > 0: if len(tables) > 0:
table = tables[0] table = tables[0]
tds = self.parser.select(table,'td.nm') tds = self.parser.select(table, 'td.nm')
for td in tds: for td in tds:
id = td.find('a').attrib.get('href','').strip('/').split('/')[-1] id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
yield id yield id
@ -127,7 +127,7 @@ class PersonPage(BasePage):
''' Page giving informations about a person ''' Page giving informations about a person
It is used to build a Person instance and to get the movie list related to a person It is used to build a Person instance and to get the movie list related to a person
''' '''
def get_person(self,id): def get_person(self, id):
name = NotAvailable name = NotAvailable
short_biography = NotAvailable short_biography = NotAvailable
short_description = NotAvailable short_description = NotAvailable
@ -139,52 +139,52 @@ class PersonPage(BasePage):
thumbnail_url = NotAvailable thumbnail_url = NotAvailable
roles = {} roles = {}
nationality = NotAvailable nationality = NotAvailable
td_overview = self.parser.select(self.document.getroot(),'td#overview-top',1) td_overview = self.parser.select(self.document.getroot(), 'td#overview-top', 1)
descs = self.parser.select(td_overview,'span[itemprop=description]') descs = self.parser.select(td_overview, 'span[itemprop=description]')
if len(descs) > 0: if len(descs) > 0:
short_biography = unicode(descs[0].text) short_biography = unicode(descs[0].text)
rname_block = self.parser.select(td_overview,'div.txt-block h4.inline') rname_block = self.parser.select(td_overview, 'div.txt-block h4.inline')
if len(rname_block) > 0 and "born" in rname_block[0].text.lower(): if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
links = self.parser.select(rname_block[0].getparent(),'a') links = self.parser.select(rname_block[0].getparent(), 'a')
for a in links: for a in links:
href = a.attrib.get('href','').strip() href = a.attrib.get('href', '').strip()
if href == 'bio': if href == 'bio':
real_name = unicode(a.text.strip()) real_name = unicode(a.text.strip())
elif 'birth_place' in href: elif 'birth_place' in href:
birth_place = unicode(a.text.lower().strip()) birth_place = unicode(a.text.lower().strip())
names = self.parser.select(td_overview,'h1[itemprop=name]') names = self.parser.select(td_overview, 'h1[itemprop=name]')
if len(names) > 0: if len(names) > 0:
name = unicode(names[0].text.strip()) name = unicode(names[0].text.strip())
times = self.parser.select(td_overview,'time[itemprop=birthDate]') times = self.parser.select(td_overview, 'time[itemprop=birthDate]')
if len(times) > 0: if len(times) > 0:
time = times[0].attrib.get('datetime','').split('-') time = times[0].attrib.get('datetime', '').split('-')
if len(time) == 3 and int(time[0]) >= 1900: if len(time) == 3 and int(time[0]) >= 1900:
birth_date = datetime(int(time[0]),int(time[1]),int(time[2])) birth_date = datetime(int(time[0]), int(time[1]), int(time[2]))
dtimes = self.parser.select(td_overview,'time[itemprop=deathDate]') dtimes = self.parser.select(td_overview, 'time[itemprop=deathDate]')
if len(dtimes) > 0: if len(dtimes) > 0:
dtime = dtimes[0].attrib.get('datetime','').split('-') dtime = dtimes[0].attrib.get('datetime', '').split('-')
if len(dtime) == 3 and int(dtime[0]) >= 1900: if len(dtime) == 3 and int(dtime[0]) >= 1900:
death_date = datetime(int(dtime[0]),int(dtime[1]),int(dtime[2])) death_date = datetime(int(dtime[0]), int(dtime[1]), int(dtime[2]))
img_thumbnail = self.parser.select(self.document.getroot(),'td#img_primary img') img_thumbnail = self.parser.select(self.document.getroot(), 'td#img_primary img')
if len(img_thumbnail) > 0: if len(img_thumbnail) > 0:
thumbnail_url = unicode(img_thumbnail[0].attrib.get('src','')) thumbnail_url = unicode(img_thumbnail[0].attrib.get('src', ''))
# go to the filmography page # go to the filmography page
self.browser.location('http://www.imdb.com/name/%s/filmotype'%id) self.browser.location('http://www.imdb.com/name/%s/filmotype' % id)
assert self.browser.is_on_page(FilmographyPage) assert self.browser.is_on_page(FilmographyPage)
roles = self.browser.page.get_roles() roles = self.browser.page.get_roles()
person = Person(id,name) person = Person(id, name)
person.real_name = real_name person.real_name = real_name
person.birth_date = birth_date person.birth_date = birth_date
person.death_date = death_date person.death_date = death_date
person.birth_place = birth_place person.birth_place = birth_place
person.gender = gender person.gender = gender
person.nationality = nationality person.nationality = nationality
person.short_biography = short_biography person.short_biography = short_biography
person.short_description = short_description person.short_description = short_description
person.roles = roles person.roles = roles
person.thumbnail_url = thumbnail_url person.thumbnail_url = thumbnail_url
return person return person
@ -193,39 +193,39 @@ class FilmographyPage(BasePage):
This page is easier to parse than the main person page filmography This page is easier to parse than the main person page filmography
''' '''
def iter_movies_ids(self): def iter_movies_ids(self):
for role_div in self.parser.select(self.document.getroot(),'div.filmo'): for role_div in self.parser.select(self.document.getroot(), 'div.filmo'):
for a in self.parser.select(role_div,'ol > li > a'): for a in self.parser.select(role_div, 'ol > li > a'):
id = a.attrib.get('href','').strip('/').split('/')[-1] id = a.attrib.get('href', '').strip('/').split('/')[-1]
if id.startswith('tt'): if id.startswith('tt'):
yield id yield id
def get_roles(self): def get_roles(self):
roles = {} roles = {}
for role_div in self.parser.select(self.document.getroot(),'div.filmo'): for role_div in self.parser.select(self.document.getroot(), 'div.filmo'):
role = self.parser.select(role_div,'h5 a',1).text.replace(':','') role = self.parser.select(role_div, 'h5 a', 1).text.replace(':', '')
roles[role] = [] roles[role] = []
for a in self.parser.select(role_div,'ol > li > a'): for a in self.parser.select(role_div, 'ol > li > a'):
id = a.attrib.get('href','').strip('/').split('/')[-1] id = a.attrib.get('href', '').strip('/').split('/')[-1]
if id.startswith('tt'): if id.startswith('tt'):
if '(' in a.tail and ')' in a.tail: if '(' in a.tail and ')' in a.tail:
between_p = a.tail.split(')')[0].split('(')[1] between_p = a.tail.split(')')[0].split('(')[1]
else: else:
between_p = '????' between_p = '????'
roles[role].append('(%s) %s'%(between_p,a.text)) roles[role].append('(%s) %s' % (between_p, a.text))
return roles return roles
def iter_movies(self, role_filter=None): def iter_movies(self, role_filter=None):
for role_div in self.parser.select(self.document.getroot(),'div.filmo'): for role_div in self.parser.select(self.document.getroot(), 'div.filmo'):
role = self.parser.select(role_div,'h5 a',1).text.replace(':','') role = self.parser.select(role_div, 'h5 a', 1).text.replace(':', '')
if (role_filter is None or (role_filter is not None and role.lower().strip() == role_filter))\ if (role_filter is None or (role_filter is not None and role.lower().strip() == role_filter))\
and role != 'In Development': and role != 'In Development':
for a in self.parser.select(role_div,'ol > li > a'): for a in self.parser.select(role_div, 'ol > li > a'):
id = a.attrib.get('href','').strip('/').split('/')[-1] id = a.attrib.get('href', '').strip('/').split('/')[-1]
if id.startswith('tt'): if id.startswith('tt'):
title = unicode(a.text) title = unicode(a.text)
role_detail = NotAvailable role_detail = NotAvailable
if len(a.tail) > 0: if len(a.tail) > 0:
role_detail = unicode(' '.join(a.tail.replace('..','').split())) role_detail = unicode(' '.join(a.tail.replace('..', '').split()))
movie = Movie(id,title) movie = Movie(id, title)
movie.short_description = role_detail movie.short_description = role_detail
yield movie yield movie

View file

@ -49,7 +49,7 @@ class IsohuntBackend(BaseBackend, ICapTorrent):
return self.browser.openurl(torrent.url.encode('utf-8')).read() return self.browser.openurl(torrent.url.encode('utf-8')).read()
def iter_torrents(self, pattern): def iter_torrents(self, pattern):
return self.browser.iter_torrents(pattern.replace(' ','+')) return self.browser.iter_torrents(pattern.replace(' ', '+'))
def fill_torrent(self, torrent, fields): def fill_torrent(self, torrent, fields):
if 'description' in fields or 'files' in fields: if 'description' in fields or 'files' in fields:
@ -61,5 +61,5 @@ class IsohuntBackend(BaseBackend, ICapTorrent):
return torrent return torrent
OBJECTS = { OBJECTS = {
Torrent:fill_torrent Torrent: fill_torrent
} }

View file

@ -32,9 +32,9 @@ class IsohuntBrowser(BaseBrowser):
ENCODING = 'utf-8' ENCODING = 'utf-8'
USER_AGENT = BaseBrowser.USER_AGENTS['wget'] USER_AGENT = BaseBrowser.USER_AGENTS['wget']
PAGES = { PAGES = {
'https://isohunt.com/torrents/.*iht=-1&ihp=1&ihs1=1&iho1=d' : TorrentsPage, 'https://isohunt.com/torrents/.*iht=-1&ihp=1&ihs1=1&iho1=d': TorrentsPage,
'https://isohunt.com/torrent_details.*tab=summary' : TorrentPage, 'https://isohunt.com/torrent_details.*tab=summary': TorrentPage,
} }
def home(self): def home(self):
return self.location('https://isohunt.com') return self.location('https://isohunt.com')

View file

@ -73,7 +73,8 @@ class TorrentPage(BasePage):
title = NotAvailable title = NotAvailable
size = NotAvailable size = NotAvailable
url = 'https://isohunt.com/download/%s/%s.torrent' % (id, id) url = 'https://isohunt.com/download/%s/%s.torrent' % (id, id)
title = unicode(self.parser.select(self.document.getroot(),'head > meta[name=title]',1).attrib.get('content','')) title = unicode(self.parser.select(
self.document.getroot(), 'head > meta[name=title]', 1).attrib.get('content', ''))
seed = NotAvailable seed = NotAvailable
leech = NotAvailable leech = NotAvailable
tip_id = "none" tip_id = "none"

View file

@ -72,5 +72,5 @@ class KickassBackend(BaseBackend, ICapTorrent):
return torrent return torrent
OBJECTS = { OBJECTS = {
Torrent:fill_torrent Torrent: fill_torrent
} }

View file

@ -34,7 +34,7 @@ class KickassBrowser(BaseBrowser):
PAGES = { PAGES = {
'http://kat.ph/usearch/.*field=seeders&sorder=desc': TorrentsPage, 'http://kat.ph/usearch/.*field=seeders&sorder=desc': TorrentsPage,
'http://kat.ph/.*.html': TorrentPage, 'http://kat.ph/.*.html': TorrentPage,
} }
def home(self): def home(self):
return self.location('http://kat.ph') return self.location('http://kat.ph')

View file

@ -31,7 +31,7 @@ from weboob.tools.browser import BasePage
from weboob.tools.misc import get_bytes_size from weboob.tools.misc import get_bytes_size
__all__ = ['TorrentsPage','TorrentPage'] __all__ = ['TorrentsPage', 'TorrentPage']
class TorrentsPage(BasePage): class TorrentsPage(BasePage):
@ -53,14 +53,14 @@ class TorrentsPage(BasePage):
.replace('.html', '') .replace('.html', '')
# look for url # look for url
for a in self.parser.select(tr,'div.iaconbox a'): for a in self.parser.select(tr, 'div.iaconbox a'):
href = a.attrib.get('href', '') href = a.attrib.get('href', '')
if href.startswith('magnet'): if href.startswith('magnet'):
magnet = unicode(href) magnet = unicode(href)
elif href.startswith('http'): elif href.startswith('http'):
url = unicode(href) url = unicode(href)
elif href.startswith('//'): elif href.startswith('//'):
url = u'http:%s'%href url = u'http:%s' % href
size = tr.getchildren()[1].text size = tr.getchildren()[1].text
u = tr.getchildren()[1].getchildren()[0].text u = tr.getchildren()[1].getchildren()[0].text
@ -107,16 +107,16 @@ class TorrentPage(BasePage):
leech = 0 leech = 0
title = self.parser.select(self.document.getroot(), title = self.parser.select(self.document.getroot(),
'h1.torrentName span', 1) 'h1.torrentName span', 1)
title = unicode(title.text) title = unicode(title.text)
for a in self.parser.select(self.document.getroot(), for a in self.parser.select(self.document.getroot(),
'div.downloadButtonGroup a'): 'div.downloadButtonGroup a'):
href = a.attrib.get('href', '') href = a.attrib.get('href', '')
if href.startswith('magnet'): if href.startswith('magnet'):
magnet = unicode(href) magnet = unicode(href)
elif href.startswith('//'): elif href.startswith('//'):
url = u'http:%s'%href url = u'http:%s' % href
elif href.startswith('http'): elif href.startswith('http'):
url = unicode(href) url = unicode(href)
@ -127,7 +127,7 @@ class TorrentPage(BasePage):
# is enough to know if this is the right span # is enough to know if this is the right span
if (span.attrib.get('class', '') == 'folder' if (span.attrib.get('class', '') == 'folder'
or span.attrib.get('class', '') == 'folderopen') \ or span.attrib.get('class', '') == 'folderopen') \
and len(span.getchildren()) > 2: and len(span.getchildren()) > 2:
size = span.getchildren()[1].tail size = span.getchildren()[1].tail
u = span.getchildren()[2].text u = span.getchildren()[2].text
size = float(size.split(': ')[1].replace(',', '.')) size = float(size.split(': ')[1].replace(',', '.'))

View file

@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.recipe import ICapRecipe,Recipe from weboob.capabilities.recipe import ICapRecipe, Recipe
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend
from .browser import MarmitonBrowser from .browser import MarmitonBrowser
@ -48,16 +48,16 @@ class MarmitonBackend(BaseBackend, ICapRecipe):
def fill_recipe(self, recipe, fields): def fill_recipe(self, recipe, fields):
if 'nb_person' in fields or 'instructions' in fields: if 'nb_person' in fields or 'instructions' in fields:
rec = self.get_recipe(recipe.id) rec = self.get_recipe(recipe.id)
recipe.picture_url = rec.picture_url recipe.picture_url = rec.picture_url
recipe.instructions = rec.instructions recipe.instructions = rec.instructions
recipe.ingredients = rec.ingredients recipe.ingredients = rec.ingredients
recipe.comments = rec.comments recipe.comments = rec.comments
recipe.nb_person = rec.nb_person recipe.nb_person = rec.nb_person
recipe.cooking_time = rec.cooking_time recipe.cooking_time = rec.cooking_time
recipe.preparation_time = rec.preparation_time recipe.preparation_time = rec.preparation_time
return recipe return recipe
OBJECTS = { OBJECTS = {
Recipe:fill_recipe, Recipe: fill_recipe,
} }

View file

@ -34,7 +34,7 @@ class MarmitonBrowser(BaseBrowser):
PAGES = { PAGES = {
'http://www.marmiton.org/recettes/recherche.aspx.*': ResultsPage, 'http://www.marmiton.org/recettes/recherche.aspx.*': ResultsPage,
'http://www.marmiton.org/recettes/recette_.*': RecipePage, 'http://www.marmiton.org/recettes/recette_.*': RecipePage,
} }
def iter_recipes(self, pattern): def iter_recipes(self, pattern):
self.location('http://www.marmiton.org/recettes/recherche.aspx?st=5&cli=1&aqt=%s' % (pattern)) self.location('http://www.marmiton.org/recettes/recherche.aspx?st=5&cli=1&aqt=%s' % (pattern))

View file

@ -30,27 +30,28 @@ class ResultsPage(BasePage):
""" Page which contains results as a list of recipies """ Page which contains results as a list of recipies
""" """
def iter_recipes(self): def iter_recipes(self):
for div in self.parser.select(self.document.getroot(),'div.m_search_result'): for div in self.parser.select(self.document.getroot(), 'div.m_search_result'):
tds = self.parser.select(div,'td') tds = self.parser.select(div, 'td')
if len(tds) == 2: if len(tds) == 2:
title = NotAvailable title = NotAvailable
thumbnail_url = NotAvailable thumbnail_url = NotAvailable
short_description = NotAvailable short_description = NotAvailable
imgs = self.parser.select(tds[0],'img') imgs = self.parser.select(tds[0], 'img')
if len(imgs) > 0: if len(imgs) > 0:
thumbnail_url = unicode(imgs[0].attrib.get('src','')) thumbnail_url = unicode(imgs[0].attrib.get('src', ''))
link = self.parser.select(tds[1],'div.m_search_titre_recette a',1) link = self.parser.select(tds[1], 'div.m_search_titre_recette a', 1)
title = unicode(link.text) title = unicode(link.text)
id = link.attrib.get('href','').replace('.aspx','').replace('/recettes/recette_','') id = link.attrib.get('href', '').replace('.aspx', '').replace('/recettes/recette_', '')
short_description = unicode(' '.join(self.parser.select(tds[1],'div.m_search_result_part4',1).text.strip().split('\n'))) short_description = unicode(' '.join(self.parser.select(tds[
1], 'div.m_search_result_part4', 1).text.strip().split('\n')))
recipe = Recipe(id,title) recipe = Recipe(id, title)
recipe.thumbnail_url = thumbnail_url recipe.thumbnail_url = thumbnail_url
recipe.short_description= short_description recipe.short_description = short_description
recipe.instructions = NotLoaded recipe.instructions = NotLoaded
recipe.ingredients = NotLoaded recipe.ingredients = NotLoaded
recipe.nb_person = NotLoaded recipe.nb_person = NotLoaded
recipe.cooking_time = NotLoaded recipe.cooking_time = NotLoaded
recipe.preparation_time = NotLoaded recipe.preparation_time = NotLoaded
yield recipe yield recipe
@ -68,30 +69,30 @@ class RecipePage(BasePage):
instructions = NotAvailable instructions = NotAvailable
comments = [] comments = []
title = unicode(self.parser.select(self.document.getroot(),'h1.m_title',1).text_content().strip()) title = unicode(self.parser.select(self.document.getroot(), 'h1.m_title', 1).text_content().strip())
main = self.parser.select(self.document.getroot(),'div.m_content_recette_main',1) main = self.parser.select(self.document.getroot(), 'div.m_content_recette_main', 1)
preparation_time = int(self.parser.select(main,'p.m_content_recette_info span.preptime',1).text_content()) preparation_time = int(self.parser.select(main, 'p.m_content_recette_info span.preptime', 1).text_content())
cooking_time = int(self.parser.select(main,'p.m_content_recette_info span.cooktime',1).text_content()) cooking_time = int(self.parser.select(main, 'p.m_content_recette_info span.cooktime', 1).text_content())
ing_header_line = self.parser.select(main,'p.m_content_recette_ingredients span',1).text_content() ing_header_line = self.parser.select(main, 'p.m_content_recette_ingredients span', 1).text_content()
if '(pour' in ing_header_line and ')' in ing_header_line: if '(pour' in ing_header_line and ')' in ing_header_line:
nb_person = int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0]) nb_person = int(ing_header_line.split('pour ')[-1].split('personnes)')[0].split()[0])
ingredients = self.parser.select(main,'p.m_content_recette_ingredients',1).text_content().strip().split('- ') ingredients = self.parser.select(main, 'p.m_content_recette_ingredients', 1).text_content().strip().split('- ')
ingredients=ingredients[1:] ingredients = ingredients[1:]
rinstructions = self.parser.select(main,'div.m_content_recette_todo',1).text_content().strip() rinstructions = self.parser.select(main, 'div.m_content_recette_todo', 1).text_content().strip()
instructions = u'' instructions = u''
for line in rinstructions.split('\n'): for line in rinstructions.split('\n'):
instructions += '%s\n'%line.strip() instructions += '%s\n' % line.strip()
instructions = instructions.strip('\n') instructions = instructions.strip('\n')
imgillu = self.parser.select(self.document.getroot(),'a.m_content_recette_illu img') imgillu = self.parser.select(self.document.getroot(), 'a.m_content_recette_illu img')
if len(imgillu) > 0: if len(imgillu) > 0:
picture_url = unicode(imgillu[0].attrib.get('src','')) picture_url = unicode(imgillu[0].attrib.get('src', ''))
for divcom in self.parser.select(self.document.getroot(),'div.m_commentaire_row'): for divcom in self.parser.select(self.document.getroot(), 'div.m_commentaire_row'):
note = self.parser.select(divcom,'div.m_commentaire_note span',1).text.strip() note = self.parser.select(divcom, 'div.m_commentaire_note span', 1).text.strip()
user = self.parser.select(divcom,'div.m_commentaire_content span',1).text.strip() user = self.parser.select(divcom, 'div.m_commentaire_content span', 1).text.strip()
content = self.parser.select(divcom,'div.m_commentaire_content p',1).text.strip() content = self.parser.select(divcom, 'div.m_commentaire_content p', 1).text.strip()
comments.append(u'user: %s, note: %s, comment: %s'%(user,note,content)) comments.append(u'user: %s, note: %s, comment: %s' % (user, note, content))
recipe = Recipe(id,title) recipe = Recipe(id, title)
recipe.preparation_time = preparation_time recipe.preparation_time = preparation_time
recipe.cooking_time = cooking_time recipe.cooking_time = cooking_time
recipe.nb_person = nb_person recipe.nb_person = nb_person

View file

@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.subtitle import ICapSubtitle,LanguageNotSupported,Subtitle from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported, Subtitle
from weboob.applications.suboob.suboob import LANGUAGE_CONV from weboob.applications.suboob.suboob import LANGUAGE_CONV
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend
@ -53,15 +53,15 @@ class OpensubtitlesBackend(BaseBackend, ICapSubtitle):
def iter_subtitles(self, language, pattern): def iter_subtitles(self, language, pattern):
if language not in LANGUAGE_CONV.keys(): if language not in LANGUAGE_CONV.keys():
raise LanguageNotSupported() raise LanguageNotSupported()
return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8'))) return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8')))
def fill_subtitle(self, subtitle, fields): def fill_subtitle(self, subtitle, fields):
if 'description' in fields: if 'description' in fields:
sub = self.get_subtitle(subtitle.id) sub = self.get_subtitle(subtitle.id)
subtitle.description = sub.description subtitle.description = sub.description
return subtitle return subtitle
OBJECTS = { OBJECTS = {
Subtitle:fill_subtitle, Subtitle: fill_subtitle,
} }

View file

@ -35,13 +35,14 @@ class OpensubtitlesBrowser(BaseBrowser):
PAGES = { PAGES = {
'http://www.opensubtitles.org.*search2/sublanguageid.*moviename.*': SearchPage, 'http://www.opensubtitles.org.*search2/sublanguageid.*moviename.*': SearchPage,
'http://www.opensubtitles.org.*search/sublanguageid.*idmovie.*': SubtitlesPage, 'http://www.opensubtitles.org.*search/sublanguageid.*idmovie.*': SubtitlesPage,
'http://www.opensubtitles.org.*search/imdbid.*/sublanguageid.*/moviename.*' : SubtitlesPage, 'http://www.opensubtitles.org.*search/imdbid.*/sublanguageid.*/moviename.*': SubtitlesPage,
'http://www.opensubtitles.org.*subtitles/[0-9]*/.*' : SubtitlePage 'http://www.opensubtitles.org.*subtitles/[0-9]*/.*': SubtitlePage
} }
def iter_subtitles(self, language, pattern): def iter_subtitles(self, language, pattern):
lang = LANGUAGE_CONV[language] lang = LANGUAGE_CONV[language]
self.location('http://www.opensubtitles.org/search2/sublanguageid-%s/moviename-%s' % (lang,pattern.encode('utf-8'))) self.location('http://www.opensubtitles.org/search2/sublanguageid-%s/moviename-%s' % (
lang, pattern.encode('utf-8')))
assert self.is_on_page(SearchPage) or self.is_on_page(SubtitlesPage) or self.is_on_page(SubtitlePage) assert self.is_on_page(SearchPage) or self.is_on_page(SubtitlesPage) or self.is_on_page(SubtitlePage)
return self.page.iter_subtitles() return self.page.iter_subtitles()

View file

@ -24,24 +24,24 @@ from weboob.tools.browser import BasePage
from weboob.applications.suboob.suboob import LANGUAGE_CONV from weboob.applications.suboob.suboob import LANGUAGE_CONV
__all__ = ['SubtitlesPage','SubtitlePage','SearchPage'] __all__ = ['SubtitlesPage', 'SubtitlePage', 'SearchPage']
class SearchPage(BasePage): class SearchPage(BasePage):
""" Page which contains results as a list of movies """ Page which contains results as a list of movies
""" """
def iter_subtitles(self): def iter_subtitles(self):
tabresults = self.parser.select(self.document.getroot(),'table#search_results') tabresults = self.parser.select(self.document.getroot(), 'table#search_results')
if len(tabresults) > 0: if len(tabresults) > 0:
table = tabresults[0] table = tabresults[0]
# for each result line, explore the subtitle list page to iter subtitles # for each result line, explore the subtitle list page to iter subtitles
for line in self.parser.select(table,'tr'): for line in self.parser.select(table, 'tr'):
links = self.parser.select(line,'a') links = self.parser.select(line, 'a')
if len(links) > 0: if len(links) > 0:
a = links[0] a = links[0]
url = a.attrib.get('href','') url = a.attrib.get('href', '')
if "ads.opensubtitles" not in url: if "ads.opensubtitles" not in url:
self.browser.location("http://www.opensubtitles.org%s"%url) self.browser.location("http://www.opensubtitles.org%s" % url)
assert self.browser.is_on_page(SubtitlesPage) or self.browser.is_on_page(SubtitlePage) assert self.browser.is_on_page(SubtitlesPage) or self.browser.is_on_page(SubtitlePage)
# subtitles page does the job # subtitles page does the job
for subtitle in self.browser.page.iter_subtitles(): for subtitle in self.browser.page.iter_subtitles():
@ -52,48 +52,48 @@ class SubtitlesPage(BasePage):
""" Page which contains several subtitles for a single movie """ Page which contains several subtitles for a single movie
""" """
def iter_subtitles(self): def iter_subtitles(self):
tabresults = self.parser.select(self.document.getroot(),'table#search_results') tabresults = self.parser.select(self.document.getroot(), 'table#search_results')
if len(tabresults) > 0: if len(tabresults) > 0:
table = tabresults[0] table = tabresults[0]
# for each result line, get informations # for each result line, get informations
# why following line doesn't work all the time (for example 'search fr sopranos guy walks' ? # why following line doesn't work all the time (for example 'search fr sopranos guy walks' ?
#for line in self.parser.select(table,'tr'): # for line in self.parser.select(table,'tr'):
for line in table.getiterator('tr'): for line in table.getiterator('tr'):
# some tr are useless, specially ads # some tr are useless, specially ads
if line.attrib.get('id','').startswith('name'): if line.attrib.get('id', '').startswith('name'):
yield self.get_subtitle_from_line(line) yield self.get_subtitle_from_line(line)
def get_subtitle_from_line(self,line): def get_subtitle_from_line(self, line):
cells = self.parser.select(line,'td') cells = self.parser.select(line, 'td')
if len(cells) > 0: if len(cells) > 0:
links = self.parser.select(line,'a') links = self.parser.select(line, 'a')
a = links[0] a = links[0]
name = u" ".join(a.text.strip().split()) name = u" ".join(a.text.strip().split())
first_cell = cells[0] first_cell = cells[0]
spanlist = self.parser.select(first_cell,'span') spanlist = self.parser.select(first_cell, 'span')
if len(spanlist) > 0: if len(spanlist) > 0:
long_name = spanlist[0].attrib.get('title','') long_name = spanlist[0].attrib.get('title', '')
else: else:
texts = first_cell.itertext() texts = first_cell.itertext()
long_name = texts.next() long_name = texts.next()
long_name = texts.next() long_name = texts.next()
if "Download at 25" in long_name: if "Download at 25" in long_name:
long_name = "---" long_name = "---"
name = "%s (%s)"%(name,long_name) name = "%s (%s)" % (name, long_name)
second_cell = cells[1] second_cell = cells[1]
link = self.parser.select(second_cell,'a',1) link = self.parser.select(second_cell, 'a', 1)
lang = link.attrib.get('href','').split('/')[-1].split('-')[-1] lang = link.attrib.get('href', '').split('/')[-1].split('-')[-1]
for lshort,llong in LANGUAGE_CONV.items(): for lshort, llong in LANGUAGE_CONV.items():
if lang == llong: if lang == llong:
lang = unicode(lshort) lang = unicode(lshort)
break break
nb_cd = int(cells[2].text.strip().lower().replace('cd','')) nb_cd = int(cells[2].text.strip().lower().replace('cd', ''))
cell_dl = cells[4] cell_dl = cells[4]
href = self.parser.select(cell_dl,'a',1).attrib.get('href','') href = self.parser.select(cell_dl, 'a', 1).attrib.get('href', '')
url = unicode('http://www.opensubtitles.org%s'%href) url = unicode('http://www.opensubtitles.org%s' % href)
id = href.split('/')[-1] id = href.split('/')[-1]
subtitle = Subtitle(id,name) subtitle = Subtitle(id, name)
subtitle.url = url subtitle.url = url
subtitle.language = lang subtitle.language = lang
subtitle.nb_cd = nb_cd subtitle.nb_cd = nb_cd
@ -106,15 +106,15 @@ class SubtitlePage(BasePage):
""" """
def get_subtitle(self): def get_subtitle(self):
desc = NotAvailable desc = NotAvailable
father = self.parser.select(self.document.getroot(),'a#app_link',1).getparent() father = self.parser.select(self.document.getroot(), 'a#app_link', 1).getparent()
a = self.parser.select(father,'a')[1] a = self.parser.select(father, 'a')[1]
id = a.attrib.get('href','').split('/')[-1] id = a.attrib.get('href', '').split('/')[-1]
url = unicode('http://www.opensubtitles.org/subtitleserve/sub/%s'%id) url = unicode('http://www.opensubtitles.org/subtitleserve/sub/%s' % id)
link = self.parser.select(self.document.getroot(),'link[rel=bookmark]',1) link = self.parser.select(self.document.getroot(), 'link[rel=bookmark]', 1)
title = unicode(link.attrib.get('title','')) title = unicode(link.attrib.get('title', ''))
nb_cd = int(title.lower().split('cd')[0].split()[-1]) nb_cd = int(title.lower().split('cd')[0].split()[-1])
lang = unicode(title.split('(')[1].split(')')[0]) lang = unicode(title.split('(')[1].split(')')[0])
file_names = self.parser.select(self.document.getroot(),"img[title~=filename]") file_names = self.parser.select(self.document.getroot(), "img[title~=filename]")
if len(file_names) > 0: if len(file_names) > 0:
file_name = file_names[0].getparent().text_content() file_name = file_names[0].getparent().text_content()
file_name = ' '.join(file_name.split()) file_name = ' '.join(file_name.split())
@ -122,11 +122,11 @@ class SubtitlePage(BasePage):
for f in file_names: for f in file_names:
desc_line = f.getparent().text_content() desc_line = f.getparent().text_content()
desc += '\n'+' '.join(desc_line.split()) desc += '\n'+' '.join(desc_line.split())
name = unicode('%s (%s)'%(title,file_name)) name = unicode('%s (%s)' % (title, file_name))
subtitle = Subtitle(id,name) subtitle = Subtitle(id, name)
subtitle.url = url subtitle.url = url
for lshort,llong in LANGUAGE_CONV.items(): for lshort, llong in LANGUAGE_CONV.items():
if lang == llong: if lang == llong:
lang = unicode(lshort) lang = unicode(lshort)
break break

View file

@ -27,7 +27,7 @@ class OpensubtitlesTest(BackendTest):
def test_subtitle(self): def test_subtitle(self):
lsub = [] lsub = []
subtitles = self.backend.iter_subtitles('fr','spiderman') subtitles = self.backend.iter_subtitles('fr', 'spiderman')
for i in range(5): for i in range(5):
subtitle = subtitles.next() subtitle = subtitles.next()
lsub.append(subtitle) lsub.append(subtitle)

View file

@ -43,7 +43,7 @@ class ParolesmaniaBackend(BaseBackend, ICapLyrics):
return self.browser.get_lyrics(id) return self.browser.get_lyrics(id)
def iter_lyrics(self, criteria, pattern): def iter_lyrics(self, criteria, pattern):
return self.browser.iter_lyrics(criteria,quote_plus(pattern.encode('utf-8'))) return self.browser.iter_lyrics(criteria, quote_plus(pattern.encode('utf-8')))
def fill_songlyrics(self, songlyrics, fields): def fill_songlyrics(self, songlyrics, fields):
if 'content' in fields: if 'content' in fields:
@ -52,5 +52,5 @@ class ParolesmaniaBackend(BaseBackend, ICapLyrics):
return songlyrics return songlyrics
OBJECTS = { OBJECTS = {
SongLyrics:fill_songlyrics SongLyrics: fill_songlyrics
} }

View file

@ -36,13 +36,13 @@ class ParolesmaniaBrowser(BaseBrowser):
'http://www.parolesmania.com/recherche.php\?c=artist.*': ArtistResultsPage, 'http://www.parolesmania.com/recherche.php\?c=artist.*': ArtistResultsPage,
'http://www.parolesmania.com/paroles.*[0-9]*/paroles.*': SonglyricsPage, 'http://www.parolesmania.com/paroles.*[0-9]*/paroles.*': SonglyricsPage,
'http://www.parolesmania.com/paroles[^/]*.html': ArtistSongsPage, 'http://www.parolesmania.com/paroles[^/]*.html': ArtistSongsPage,
} }
def iter_lyrics(self, criteria, pattern): def iter_lyrics(self, criteria, pattern):
crit = 'artist' crit = 'artist'
if criteria != 'artist': if criteria != 'artist':
crit = 'title' crit = 'title'
self.location('http://www.parolesmania.com/recherche.php?c=%s&k=%s'%(crit,pattern)) self.location('http://www.parolesmania.com/recherche.php?c=%s&k=%s' % (crit, pattern))
assert self.is_on_page(SongResultsPage) or self.is_on_page(ArtistResultsPage)\ assert self.is_on_page(SongResultsPage) or self.is_on_page(ArtistResultsPage)\
or self.is_on_page(ArtistSongsPage) or self.is_on_page(ArtistSongsPage)
for lyr in self.page.iter_lyrics(): for lyr in self.page.iter_lyrics():
@ -50,6 +50,6 @@ class ParolesmaniaBrowser(BaseBrowser):
def get_lyrics(self, id): def get_lyrics(self, id):
ids = id.split('|') ids = id.split('|')
self.location('http://www.parolesmania.com/paroles_%s/paroles_%s.html' % (ids[0],ids[1])) self.location('http://www.parolesmania.com/paroles_%s/paroles_%s.html' % (ids[0], ids[1]))
assert self.is_on_page(SonglyricsPage) assert self.is_on_page(SonglyricsPage)
return self.page.get_lyrics(id) return self.page.get_lyrics(id)

View file

@ -23,32 +23,32 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage'] __all__ = ['SongResultsPage', 'SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage']
class ArtistResultsPage(BasePage): class ArtistResultsPage(BasePage):
def iter_lyrics(self): def iter_lyrics(self):
for link in self.parser.select(self.document.getroot(),'div#albums > h1 a'): for link in self.parser.select(self.document.getroot(), 'div#albums > h1 a'):
artist = unicode(link.text_content()) artist = unicode(link.text_content())
href = link.attrib.get('href','') href = link.attrib.get('href', '')
if href.startswith('/paroles'): if href.startswith('/paroles'):
self.browser.location('http://www.parolesmania.com%s'%href) self.browser.location('http://www.parolesmania.com%s' % href)
assert self.browser.is_on_page(ArtistSongsPage) assert self.browser.is_on_page(ArtistSongsPage)
for lyr in self.browser.page.iter_lyrics(artist): for lyr in self.browser.page.iter_lyrics(artist):
yield lyr yield lyr
class ArtistSongsPage(BasePage): class ArtistSongsPage(BasePage):
def iter_lyrics(self,artist=None): def iter_lyrics(self, artist=None):
if artist is None: if artist is None:
artist = self.parser.select(self.document.getroot(),'head > title',1).text.replace('Paroles ','') artist = self.parser.select(self.document.getroot(), 'head > title', 1).text.replace('Paroles ', '')
for link in self.parser.select(self.document.getroot(),'div#albums a'): for link in self.parser.select(self.document.getroot(), 'div#albums a'):
href = link.attrib.get('href','') href = link.attrib.get('href', '')
titleattrib = link.attrib.get('title','') titleattrib = link.attrib.get('title', '')
if href.startswith('/paroles') and not href.endswith('alpha.html') and titleattrib.startswith('Paroles '): if href.startswith('/paroles') and not href.endswith('alpha.html') and titleattrib.startswith('Paroles '):
title = unicode(link.text) title = unicode(link.text)
ids = href.replace('/','').replace('.html','').split('paroles_') ids = href.replace('/', '').replace('.html', '').split('paroles_')
id = '%s|%s'%(ids[1],ids[2]) id = '%s|%s' % (ids[1], ids[2])
songlyrics = SongLyrics(id, title) songlyrics = SongLyrics(id, title)
songlyrics.artist = artist songlyrics.artist = artist
songlyrics.content = NotLoaded songlyrics.content = NotLoaded
@ -57,13 +57,13 @@ class ArtistSongsPage(BasePage):
class SongResultsPage(BasePage): class SongResultsPage(BasePage):
def iter_lyrics(self): def iter_lyrics(self):
for link in self.parser.select(self.document.getroot(),'div#albums a'): for link in self.parser.select(self.document.getroot(), 'div#albums a'):
artist = NotAvailable artist = NotAvailable
title = unicode(link.text.split(' - ')[0]) title = unicode(link.text.split(' - ')[0])
href = link.attrib.get('href','') href = link.attrib.get('href', '')
if href.startswith('/paroles') and not href.endswith('alpha.html'): if href.startswith('/paroles') and not href.endswith('alpha.html'):
ids = href.replace('/','').replace('.html','').split('paroles_') ids = href.replace('/', '').replace('.html', '').split('paroles_')
id = '%s|%s'%(ids[1],ids[2]) id = '%s|%s' % (ids[1], ids[2])
artist = unicode(link.text.split(' - ')[1]) artist = unicode(link.text.split(' - ')[1])
songlyrics = SongLyrics(id, title) songlyrics = SongLyrics(id, title)
songlyrics.artist = artist songlyrics.artist = artist
@ -76,12 +76,12 @@ class SonglyricsPage(BasePage):
content = NotAvailable content = NotAvailable
artist = NotAvailable artist = NotAvailable
title = NotAvailable title = NotAvailable
lyrdiv = self.parser.select(self.document.getroot(),'div#songlyrics_h') lyrdiv = self.parser.select(self.document.getroot(), 'div#songlyrics_h')
if len(lyrdiv) > 0: if len(lyrdiv) > 0:
content = unicode(lyrdiv[0].text_content().strip()) content = unicode(lyrdiv[0].text_content().strip())
infos = self.parser.select(self.document.getroot(),'head > title',1).text infos = self.parser.select(self.document.getroot(), 'head > title', 1).text
artist = unicode(infos.split(' - ')[1]) artist = unicode(infos.split(' - ')[1])
title = unicode(infos.split(' - ')[0].replace('Paroles ','')) title = unicode(infos.split(' - ')[0].replace('Paroles ', ''))
songlyrics = SongLyrics(id, title) songlyrics = SongLyrics(id, title)
songlyrics.artist = artist songlyrics.artist = artist
songlyrics.content = content songlyrics.content = content

View file

@ -25,7 +25,7 @@ class ParolesmaniaTest(BackendTest):
BACKEND = 'parolesmania' BACKEND = 'parolesmania'
def test_search_song_n_get(self): def test_search_song_n_get(self):
l_lyrics = list(self.backend.iter_lyrics('song','chien')) l_lyrics = list(self.backend.iter_lyrics('song', 'chien'))
for songlyrics in l_lyrics: for songlyrics in l_lyrics:
assert songlyrics.id assert songlyrics.id
assert songlyrics.title assert songlyrics.title
@ -38,7 +38,7 @@ class ParolesmaniaTest(BackendTest):
assert full_lyr.content is not NotLoaded assert full_lyr.content is not NotLoaded
def test_search_artist(self): def test_search_artist(self):
l_lyrics = list(self.backend.iter_lyrics('artist','boris')) l_lyrics = list(self.backend.iter_lyrics('artist', 'boris'))
for songlyrics in l_lyrics: for songlyrics in l_lyrics:
assert songlyrics.id assert songlyrics.id
assert songlyrics.title assert songlyrics.title

View file

@ -41,7 +41,7 @@ class ParolesmusiqueBackend(BaseBackend, ICapLyrics):
return self.browser.get_lyrics(id) return self.browser.get_lyrics(id)
def iter_lyrics(self, criteria, pattern): def iter_lyrics(self, criteria, pattern):
return self.browser.iter_lyrics(criteria,pattern.encode('utf-8')) return self.browser.iter_lyrics(criteria, pattern.encode('utf-8'))
def fill_songlyrics(self, songlyrics, fields): def fill_songlyrics(self, songlyrics, fields):
if 'content' in fields: if 'content' in fields:
@ -50,5 +50,5 @@ class ParolesmusiqueBackend(BaseBackend, ICapLyrics):
return songlyrics return songlyrics
OBJECTS = { OBJECTS = {
SongLyrics:fill_songlyrics SongLyrics: fill_songlyrics
} }

View file

@ -37,12 +37,12 @@ class ParolesmusiqueBrowser(BaseBrowser):
'http://www.paroles-musique.com/lyrics-paroles-.*-0,0.php': ArtistResultsPage, 'http://www.paroles-musique.com/lyrics-paroles-.*-0,0.php': ArtistResultsPage,
'http://www.paroles-musique.com/paroles-.*p[0-9]*': SonglyricsPage, 'http://www.paroles-musique.com/paroles-.*p[0-9]*': SonglyricsPage,
'http://www.paroles-musique.com/paroles-.*-lyrics,a[0-9]*': ArtistSongsPage, 'http://www.paroles-musique.com/paroles-.*-lyrics,a[0-9]*': ArtistSongsPage,
} }
def iter_lyrics(self, criteria, pattern): def iter_lyrics(self, criteria, pattern):
self.location('http://www.paroles-musique.com') self.location('http://www.paroles-musique.com')
assert self.is_on_page(HomePage) assert self.is_on_page(HomePage)
return self.page.iter_lyrics(criteria,pattern) return self.page.iter_lyrics(criteria, pattern)
def get_lyrics(self, id): def get_lyrics(self, id):
self.location('http://www.paroles-musique.com/paroles-%s' % id) self.location('http://www.paroles-musique.com/paroles-%s' % id)

View file

@ -23,11 +23,11 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage', 'HomePage'] __all__ = ['SongResultsPage', 'SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage', 'HomePage']
class HomePage(BasePage): class HomePage(BasePage):
def iter_lyrics(self,criteria,pattern): def iter_lyrics(self, criteria, pattern):
self.browser.select_form(name='rechercher') self.browser.select_form(name='rechercher')
if criteria == 'artist': if criteria == 'artist':
self.browser['termes_a'] = pattern self.browser['termes_a'] = pattern
@ -41,21 +41,21 @@ class HomePage(BasePage):
class ArtistResultsPage(BasePage): class ArtistResultsPage(BasePage):
def iter_lyrics(self): def iter_lyrics(self):
for link in self.parser.select(self.document.getroot(),'div.cont_cat table a.std'): for link in self.parser.select(self.document.getroot(), 'div.cont_cat table a.std'):
artist = unicode(link.text_content()) artist = unicode(link.text_content())
self.browser.location('http://www.paroles-musique.com%s'%link.attrib.get('href','')) self.browser.location('http://www.paroles-musique.com%s' % link.attrib.get('href', ''))
assert self.browser.is_on_page(ArtistSongsPage) assert self.browser.is_on_page(ArtistSongsPage)
for lyr in self.browser.page.iter_lyrics(artist): for lyr in self.browser.page.iter_lyrics(artist):
yield lyr yield lyr
class ArtistSongsPage(BasePage): class ArtistSongsPage(BasePage):
def iter_lyrics(self,artist): def iter_lyrics(self, artist):
for link in self.parser.select(self.document.getroot(),'div.cont_catA div.art_scroll a'): for link in self.parser.select(self.document.getroot(), 'div.cont_catA div.art_scroll a'):
href = link.attrib.get('href','') href = link.attrib.get('href', '')
if href.startswith('./paroles'): if href.startswith('./paroles'):
title = unicode(link.text) title = unicode(link.text)
id = href.replace('./paroles-','') id = href.replace('./paroles-', '')
songlyrics = SongLyrics(id, title) songlyrics = SongLyrics(id, title)
songlyrics.artist = artist songlyrics.artist = artist
songlyrics.content = NotLoaded songlyrics.content = NotLoaded
@ -65,14 +65,14 @@ class ArtistSongsPage(BasePage):
class SongResultsPage(BasePage): class SongResultsPage(BasePage):
def iter_lyrics(self): def iter_lyrics(self):
first = True first = True
for tr in self.parser.select(self.document.getroot(),'div.cont_cat table tr'): for tr in self.parser.select(self.document.getroot(), 'div.cont_cat table tr'):
if first: if first:
first = False first = False
continue continue
artist = NotAvailable artist = NotAvailable
links = self.parser.select(tr,'a.std') links = self.parser.select(tr, 'a.std')
title = unicode(links[0].text) title = unicode(links[0].text)
id = links[0].attrib.get('href','').replace('/paroles-','') id = links[0].attrib.get('href', '').replace('/paroles-', '')
artist = unicode(links[1].text) artist = unicode(links[1].text)
songlyrics = SongLyrics(id, title) songlyrics = SongLyrics(id, title)
songlyrics.artist = artist songlyrics.artist = artist
@ -84,8 +84,8 @@ class SonglyricsPage(BasePage):
def get_lyrics(self, id): def get_lyrics(self, id):
artist = NotAvailable artist = NotAvailable
title = NotAvailable title = NotAvailable
content = unicode(self.parser.select(self.document.getroot(),'div#lyr_scroll',1).text_content().strip()) content = unicode(self.parser.select(self.document.getroot(), 'div#lyr_scroll', 1).text_content().strip())
infos = self.parser.select(self.document.getroot(),'h2.lyrics > font') infos = self.parser.select(self.document.getroot(), 'h2.lyrics > font')
artist = unicode(infos[0].text) artist = unicode(infos[0].text)
title = unicode(infos[1].text) title = unicode(infos[1].text)
songlyrics = SongLyrics(id, title) songlyrics = SongLyrics(id, title)

View file

@ -25,7 +25,7 @@ class ParolesmusiqueTest(BackendTest):
BACKEND = 'parolesmusique' BACKEND = 'parolesmusique'
def test_search_song_n_get(self): def test_search_song_n_get(self):
l_lyrics = list(self.backend.iter_lyrics('song','chien')) l_lyrics = list(self.backend.iter_lyrics('song', 'chien'))
for songlyrics in l_lyrics: for songlyrics in l_lyrics:
assert songlyrics.id assert songlyrics.id
assert songlyrics.title assert songlyrics.title
@ -38,7 +38,7 @@ class ParolesmusiqueTest(BackendTest):
assert full_lyr.content is not NotLoaded assert full_lyr.content is not NotLoaded
def test_search_artist(self): def test_search_artist(self):
l_lyrics = list(self.backend.iter_lyrics('artist','boris')) l_lyrics = list(self.backend.iter_lyrics('artist', 'boris'))
for songlyrics in l_lyrics: for songlyrics in l_lyrics:
assert songlyrics.id assert songlyrics.id
assert songlyrics.title assert songlyrics.title

View file

@ -43,7 +43,7 @@ class SeeklyricsBackend(BaseBackend, ICapLyrics):
return self.browser.get_lyrics(id) return self.browser.get_lyrics(id)
def iter_lyrics(self, criteria, pattern): def iter_lyrics(self, criteria, pattern):
return self.browser.iter_lyrics(criteria,quote_plus(pattern.encode('iso-8859-1'))) return self.browser.iter_lyrics(criteria, quote_plus(pattern.encode('iso-8859-1')))
def fill_songlyrics(self, songlyrics, fields): def fill_songlyrics(self, songlyrics, fields):
if 'content' in fields: if 'content' in fields:
@ -52,5 +52,5 @@ class SeeklyricsBackend(BaseBackend, ICapLyrics):
return songlyrics return songlyrics
OBJECTS = { OBJECTS = {
SongLyrics:fill_songlyrics SongLyrics: fill_songlyrics
} }

View file

@ -36,14 +36,14 @@ class SeeklyricsBrowser(BaseBrowser):
'http://www.seeklyrics.com/search.php.*t=2': ArtistResultsPage, 'http://www.seeklyrics.com/search.php.*t=2': ArtistResultsPage,
'http://www.seeklyrics.com/lyrics/.*html': SonglyricsPage, 'http://www.seeklyrics.com/lyrics/.*html': SonglyricsPage,
'http://www.seeklyrics.com/lyrics/.*/': ArtistSongsPage, 'http://www.seeklyrics.com/lyrics/.*/': ArtistSongsPage,
} }
def iter_lyrics(self, criteria, pattern): def iter_lyrics(self, criteria, pattern):
if criteria == 'artist': if criteria == 'artist':
type = 2 type = 2
else: else:
type = 1 type = 1
self.location('http://www.seeklyrics.com/search.php?q=%s&t=%s' % (pattern,type)) self.location('http://www.seeklyrics.com/search.php?q=%s&t=%s' % (pattern, type))
assert self.is_on_page(ArtistResultsPage) or self.is_on_page(SongResultsPage) assert self.is_on_page(ArtistResultsPage) or self.is_on_page(SongResultsPage)
return self.page.iter_lyrics() return self.page.iter_lyrics()

View file

@ -23,27 +23,27 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
__all__ = ['SongResultsPage','SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage'] __all__ = ['SongResultsPage', 'SonglyricsPage', 'ArtistResultsPage', 'ArtistSongsPage']
class ArtistResultsPage(BasePage): class ArtistResultsPage(BasePage):
def iter_lyrics(self): def iter_lyrics(self):
for link in self.parser.select(self.document.getroot(),'table[title~=Results] a.tlink'): for link in self.parser.select(self.document.getroot(), 'table[title~=Results] a.tlink'):
artist = unicode(link.text_content()) artist = unicode(link.text_content())
self.browser.location('http://www.seeklyrics.com%s'%link.attrib.get('href','')) self.browser.location('http://www.seeklyrics.com%s' % link.attrib.get('href', ''))
assert self.browser.is_on_page(ArtistSongsPage) assert self.browser.is_on_page(ArtistSongsPage)
for lyr in self.browser.page.iter_lyrics(artist): for lyr in self.browser.page.iter_lyrics(artist):
yield lyr yield lyr
class ArtistSongsPage(BasePage): class ArtistSongsPage(BasePage):
def iter_lyrics(self,artist): def iter_lyrics(self, artist):
for th in self.parser.select(self.document.getroot(),'th.text'): for th in self.parser.select(self.document.getroot(), 'th.text'):
txt = th.text_content() txt = th.text_content()
if txt.startswith('Top') and txt.endswith('Lyrics'): if txt.startswith('Top') and txt.endswith('Lyrics'):
for link in self.parser.select(th.getparent().getparent(),'a.tlink'): for link in self.parser.select(th.getparent().getparent(), 'a.tlink'):
title = unicode(link.attrib.get('title','').replace(' Lyrics','')) title = unicode(link.attrib.get('title', '').replace(' Lyrics', ''))
id = link.attrib.get('href','').replace('/lyrics/','').replace('.html','') id = link.attrib.get('href', '').replace('/lyrics/', '').replace('.html', '')
songlyrics = SongLyrics(id, title) songlyrics = SongLyrics(id, title)
songlyrics.artist = artist songlyrics.artist = artist
songlyrics.content = NotLoaded songlyrics.content = NotLoaded
@ -53,15 +53,15 @@ class ArtistSongsPage(BasePage):
class SongResultsPage(BasePage): class SongResultsPage(BasePage):
def iter_lyrics(self): def iter_lyrics(self):
first = True first = True
for tr in self.parser.select(self.document.getroot(),'table[title~=Results] tr'): for tr in self.parser.select(self.document.getroot(), 'table[title~=Results] tr'):
if first: if first:
first = False first = False
continue continue
artist = NotAvailable artist = NotAvailable
ftitle = self.parser.select(tr,'a > font > font',1) ftitle = self.parser.select(tr, 'a > font > font', 1)
title = unicode(ftitle.getparent().getparent().text_content()) title = unicode(ftitle.getparent().getparent().text_content())
id = ftitle.getparent().getparent().attrib.get('href','').replace('/lyrics/','').replace('.html','') id = ftitle.getparent().getparent().attrib.get('href', '').replace('/lyrics/', '').replace('.html', '')
aartist = self.parser.select(tr,'a')[-1] aartist = self.parser.select(tr, 'a')[-1]
artist = unicode(aartist.text) artist = unicode(aartist.text)
songlyrics = SongLyrics(id, title) songlyrics = SongLyrics(id, title)
songlyrics.artist = artist songlyrics.artist = artist
@ -73,12 +73,12 @@ class SonglyricsPage(BasePage):
def get_lyrics(self, id): def get_lyrics(self, id):
artist = NotAvailable artist = NotAvailable
title = NotAvailable title = NotAvailable
l_artitle = self.parser.select(self.document.getroot(),'table.text td > b > h2') l_artitle = self.parser.select(self.document.getroot(), 'table.text td > b > h2')
if len(l_artitle) > 0: if len(l_artitle) > 0:
artitle = l_artitle[0].text.split(' Lyrics by ') artitle = l_artitle[0].text.split(' Lyrics by ')
artist = unicode(artitle[1]) artist = unicode(artitle[1])
title = unicode(artitle[0]) title = unicode(artitle[0])
content = unicode(self.parser.select(self.document.getroot(),'div#songlyrics',1).text_content().strip()) content = unicode(self.parser.select(self.document.getroot(), 'div#songlyrics', 1).text_content().strip())
songlyrics = SongLyrics(id, title) songlyrics = SongLyrics(id, title)
songlyrics.artist = artist songlyrics.artist = artist
songlyrics.content = content songlyrics.content = content

View file

@ -25,7 +25,7 @@ class SeeklyricsTest(BackendTest):
BACKEND = 'seeklyrics' BACKEND = 'seeklyrics'
def test_search_song_n_get(self): def test_search_song_n_get(self):
l_lyrics = list(self.backend.iter_lyrics('song','Complainte')) l_lyrics = list(self.backend.iter_lyrics('song', 'Complainte'))
for songlyrics in l_lyrics: for songlyrics in l_lyrics:
assert songlyrics.id assert songlyrics.id
assert songlyrics.title assert songlyrics.title
@ -38,7 +38,7 @@ class SeeklyricsTest(BackendTest):
assert full_lyr.content is not NotLoaded assert full_lyr.content is not NotLoaded
def test_search_artist(self): def test_search_artist(self):
l_lyrics = list(self.backend.iter_lyrics('artist','boris vian')) l_lyrics = list(self.backend.iter_lyrics('artist', 'boris vian'))
for songlyrics in l_lyrics: for songlyrics in l_lyrics:
assert songlyrics.id assert songlyrics.id
assert songlyrics.title assert songlyrics.title

View file

@ -17,10 +17,10 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.subtitle import ICapSubtitle,LanguageNotSupported from weboob.capabilities.subtitle import ICapSubtitle, LanguageNotSupported
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend
from .browser import TvsubtitlesBrowser,LANGUAGE_LIST from .browser import TvsubtitlesBrowser, LANGUAGE_LIST
from urllib import quote_plus from urllib import quote_plus
@ -52,4 +52,4 @@ class TvsubtitlesBackend(BaseBackend, ICapSubtitle):
def iter_subtitles(self, language, pattern): def iter_subtitles(self, language, pattern):
if language not in LANGUAGE_LIST: if language not in LANGUAGE_LIST:
raise LanguageNotSupported() raise LanguageNotSupported()
return self.browser.iter_subtitles(language,quote_plus(pattern.encode('utf-8'))) return self.browser.iter_subtitles(language, quote_plus(pattern.encode('utf-8')))

View file

@ -20,14 +20,14 @@
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from .pages import SeriePage, SearchPage, SeasonPage,HomePage from .pages import SeriePage, SearchPage, SeasonPage, HomePage
__all__ = ['TvsubtitlesBrowser'] __all__ = ['TvsubtitlesBrowser']
LANGUAGE_LIST = ['en','es','fr','de','br','ru','ua','it','gr', LANGUAGE_LIST = ['en', 'es', 'fr', 'de', 'br', 'ru', 'ua', 'it', 'gr',
'ar','hu','pl','tr','nl','pt','sv','da','fi', 'ar', 'hu', 'pl', 'tr', 'nl', 'pt', 'sv', 'da', 'fi',
'ko','cn','jp','bg','cz','ro'] 'ko', 'cn', 'jp', 'bg', 'cz', 'ro']
class TvsubtitlesBrowser(BaseBrowser): class TvsubtitlesBrowser(BaseBrowser):
@ -39,13 +39,13 @@ class TvsubtitlesBrowser(BaseBrowser):
'http://www.tvsubtitles.net': HomePage, 'http://www.tvsubtitles.net': HomePage,
'http://www.tvsubtitles.net/search.php': SearchPage, 'http://www.tvsubtitles.net/search.php': SearchPage,
'http://www.tvsubtitles.net/tvshow-.*.html': SeriePage, 'http://www.tvsubtitles.net/tvshow-.*.html': SeriePage,
'http://www.tvsubtitles.net/subtitle-[0-9]*-[0-9]*-.*.html' : SeasonPage 'http://www.tvsubtitles.net/subtitle-[0-9]*-[0-9]*-.*.html': SeasonPage
} }
def iter_subtitles(self, language, pattern): def iter_subtitles(self, language, pattern):
self.location('http://www.tvsubtitles.net') self.location('http://www.tvsubtitles.net')
assert self.is_on_page(HomePage) assert self.is_on_page(HomePage)
return self.page.iter_subtitles(language,pattern) return self.page.iter_subtitles(language, pattern)
def get_subtitle(self, id): def get_subtitle(self, id):
self.location('http://www.tvsubtitles.net/subtitle-%s.html' % id) self.location('http://www.tvsubtitles.net/subtitle-%s.html' % id)

View file

@ -22,11 +22,11 @@ from weboob.capabilities.subtitle import Subtitle
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
__all__ = ['HomePage','SearchPage','SeriePage','SeasonPage'] __all__ = ['HomePage', 'SearchPage', 'SeriePage', 'SeasonPage']
class HomePage(BasePage): class HomePage(BasePage):
def iter_subtitles(self,language,pattern): def iter_subtitles(self, language, pattern):
self.browser.select_form(nr=0) self.browser.select_form(nr=0)
self.browser['q'] = pattern.encode('utf-8') self.browser['q'] = pattern.encode('utf-8')
self.browser.submit() self.browser.submit()
@ -38,15 +38,15 @@ class HomePage(BasePage):
class SearchPage(BasePage): class SearchPage(BasePage):
""" Page which contains results as a list of series """ Page which contains results as a list of series
""" """
def iter_subtitles(self,language): def iter_subtitles(self, language):
list_result = self.parser.select(self.document.getroot(),'div.left_articles ul') list_result = self.parser.select(self.document.getroot(), 'div.left_articles ul')
if len(list_result) > 0: if len(list_result) > 0:
li_result = self.parser.select(list_result[0],'li') li_result = self.parser.select(list_result[0], 'li')
for line in li_result: for line in li_result:
if len(self.parser.select(line,'img[alt=%s]'%language)) > 0: if len(self.parser.select(line, 'img[alt=%s]' % language)) > 0:
link = self.parser.select(line,'a',1) link = self.parser.select(line, 'a', 1)
href = link.attrib.get('href','') href = link.attrib.get('href', '')
self.browser.location("http://%s%s"%(self.browser.DOMAIN,href)) self.browser.location("http://%s%s" % (self.browser.DOMAIN, href))
assert self.browser.is_on_page(SeriePage) assert self.browser.is_on_page(SeriePage)
for subtitle in self.browser.page.iter_subtitles(language): for subtitle in self.browser.page.iter_subtitles(language):
yield subtitle yield subtitle
@ -55,26 +55,27 @@ class SearchPage(BasePage):
class SeriePage(BasePage): class SeriePage(BasePage):
""" Page of all seasons """ Page of all seasons
""" """
def iter_subtitles(self,language,only_one_season=False): def iter_subtitles(self, language, only_one_season=False):
# handle the current season # handle the current season
last_table_line = self.parser.select(self.document.getroot(),'table#table5 tr')[-1] last_table_line = self.parser.select(self.document.getroot(), 'table#table5 tr')[-1]
amount = int(self.parser.select(last_table_line,'td')[2].text_content()) amount = int(self.parser.select(last_table_line, 'td')[2].text_content())
if amount > 0: if amount > 0:
my_lang_img = self.parser.select(last_table_line,'img[alt=%s]'%language) my_lang_img = self.parser.select(last_table_line, 'img[alt=%s]' % language)
if len(my_lang_img) > 0: if len(my_lang_img) > 0:
url_current_season = self.browser.geturl().split('/')[-1].replace('tvshow','subtitle').replace('.html','-%s.html'%language) url_current_season = self.browser.geturl().split('/')[-1].replace(
'tvshow', 'subtitle').replace('.html', '-%s.html' % language)
self.browser.location(url_current_season) self.browser.location(url_current_season)
assert self.browser.is_on_page(SeasonPage) assert self.browser.is_on_page(SeasonPage)
yield self.browser.page.iter_subtitles() yield self.browser.page.iter_subtitles()
if not only_one_season: if not only_one_season:
# handle the other seasons by following top links # handle the other seasons by following top links
other_seasons_links = self.parser.select(self.document.getroot(),'p.description a') other_seasons_links = self.parser.select(self.document.getroot(), 'p.description a')
for link in other_seasons_links: for link in other_seasons_links:
href = link.attrib.get('href','') href = link.attrib.get('href', '')
self.browser.location("http://%s/%s"%(self.browser.DOMAIN,href)) self.browser.location("http://%s/%s" % (self.browser.DOMAIN, href))
assert self.browser.is_on_page(SeriePage) assert self.browser.is_on_page(SeriePage)
for subtitle in self.browser.page.iter_subtitles(language,True): for subtitle in self.browser.page.iter_subtitles(language, True):
yield subtitle yield subtitle
@ -82,19 +83,19 @@ class SeasonPage(BasePage):
""" Page of a season with the right language """ Page of a season with the right language
""" """
def get_subtitle(self): def get_subtitle(self):
filename_line = self.parser.select(self.document.getroot(),'img[alt=filename]',1).getparent().getparent() filename_line = self.parser.select(self.document.getroot(), 'img[alt=filename]', 1).getparent().getparent()
name = unicode(self.parser.select(filename_line,'td')[2].text) name = unicode(self.parser.select(filename_line, 'td')[2].text)
id = self.browser.geturl().split('/')[-1].replace('.html','').replace('subtitle-','') id = self.browser.geturl().split('/')[-1].replace('.html', '').replace('subtitle-', '')
url = unicode('http://%s/download-%s.html'%(self.browser.DOMAIN,id)) url = unicode('http://%s/download-%s.html' % (self.browser.DOMAIN, id))
amount_line = self.parser.select(self.document.getroot(),'tr[title~=amount]',1) amount_line = self.parser.select(self.document.getroot(), 'tr[title~=amount]', 1)
nb_cd = int(self.parser.select(amount_line,'td')[2].text) nb_cd = int(self.parser.select(amount_line, 'td')[2].text)
lang = unicode(url.split('-')[-1].split('.html')[0]) lang = unicode(url.split('-')[-1].split('.html')[0])
filenames_line = self.parser.select(self.document.getroot(),'tr[title~=list]',1) filenames_line = self.parser.select(self.document.getroot(), 'tr[title~=list]', 1)
file_names = self.parser.select(filenames_line,'td')[2].text_content().strip().replace('.srt','.srt\n') file_names = self.parser.select(filenames_line, 'td')[2].text_content().strip().replace('.srt', '.srt\n')
desc = u"files :\n" desc = u"files :\n"
desc += file_names desc += file_names
subtitle = Subtitle(id,name) subtitle = Subtitle(id, name)
subtitle.url = url subtitle.url = url
subtitle.language = lang subtitle.language = lang
subtitle.nb_cd = nb_cd subtitle.nb_cd = nb_cd

View file

@ -26,7 +26,7 @@ class TvsubtitlesTest(BackendTest):
BACKEND = 'tvsubtitles' BACKEND = 'tvsubtitles'
def test_subtitle(self): def test_subtitle(self):
subtitles = list(self.backend.iter_subtitles('fr','sopranos')) subtitles = list(self.backend.iter_subtitles('fr', 'sopranos'))
assert (len(subtitles) > 0) assert (len(subtitles) > 0)
for subtitle in subtitles: for subtitle in subtitles:
assert subtitle.url.startswith('http') assert subtitle.url.startswith('http')