[imdb] simplification of iters and fill short description in all cases
This commit is contained in:
parent
5e8e4690da
commit
19b418e6d0
3 changed files with 31 additions and 36 deletions
|
|
@ -23,7 +23,7 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
|
|||
from weboob.capabilities.cinema import Movie, Person
|
||||
from weboob.tools.json import json
|
||||
|
||||
from .pages import MoviePage, PersonPage, MovieCrewPage, BiographyPage, FilmographyPage
|
||||
from .pages import PersonPage, MovieCrewPage, BiographyPage, FilmographyPage
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
|
|
@ -36,7 +36,6 @@ class ImdbBrowser(BaseBrowser):
|
|||
ENCODING = 'utf-8'
|
||||
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
||||
PAGES = {
|
||||
'http://www.imdb.com/title/tt[0-9]*/*': MoviePage,
|
||||
'http://www.imdb.com/title/tt[0-9]*/fullcredits.*': MovieCrewPage,
|
||||
'http://www.imdb.com/name/nm[0-9]*/*': PersonPage,
|
||||
'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage,
|
||||
|
|
@ -98,10 +97,13 @@ class ImdbBrowser(BaseBrowser):
|
|||
pitch = NotAvailable
|
||||
country = NotAvailable
|
||||
note = NotAvailable
|
||||
short_description = NotAvailable
|
||||
other_titles = []
|
||||
roles = {}
|
||||
|
||||
title = unicode(jres['title'].strip())
|
||||
if jres.has_key('directors'):
|
||||
short_description = ', '.join(jres['directors'])
|
||||
if jres.has_key('runtime'):
|
||||
dur_str = jres['runtime'][0].split(':')
|
||||
if len(dur_str) == 1:
|
||||
|
|
@ -145,6 +147,7 @@ class ImdbBrowser(BaseBrowser):
|
|||
movie.country = country
|
||||
movie.note = note
|
||||
movie.roles = roles
|
||||
movie.short_description= short_description
|
||||
return movie
|
||||
|
||||
def get_person(self, id):
|
||||
|
|
@ -158,9 +161,10 @@ class ImdbBrowser(BaseBrowser):
|
|||
return self.page.get_biography()
|
||||
|
||||
def iter_movie_persons(self, movie_id, role):
|
||||
self.location('http://www.imdb.com/title/%s' % movie_id)
|
||||
assert self.is_on_page(MoviePage)
|
||||
return self.page.iter_persons(movie_id, role)
|
||||
self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id)
|
||||
assert self.is_on_page(MovieCrewPage)
|
||||
for p in self.page.iter_persons(role):
|
||||
yield p
|
||||
|
||||
def iter_person_movies(self, person_id, role):
|
||||
self.location('http://www.imdb.com/name/%s/filmotype' % person_id)
|
||||
|
|
@ -168,11 +172,13 @@ class ImdbBrowser(BaseBrowser):
|
|||
return self.page.iter_movies(role)
|
||||
|
||||
def iter_person_movies_ids(self, person_id):
|
||||
self.location('http://www.imdb.com/name/%s' % person_id)
|
||||
assert self.is_on_page(PersonPage)
|
||||
return self.page.iter_movies_ids(person_id)
|
||||
self.location('http://www.imdb.com/name/%s/filmotype' % person_id)
|
||||
assert self.is_on_page(FilmographyPage)
|
||||
for movie in self.page.iter_movies_ids():
|
||||
yield movie
|
||||
|
||||
def iter_movie_persons_ids(self, movie_id):
|
||||
self.location('http://www.imdb.com/title/%s' % movie_id)
|
||||
assert self.is_on_page(MoviePage)
|
||||
return self.page.iter_persons_ids(movie_id)
|
||||
self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id)
|
||||
assert self.is_on_page(MovieCrewPage)
|
||||
for person in self.page.iter_persons_ids():
|
||||
yield person
|
||||
|
|
|
|||
|
|
@ -25,23 +25,7 @@ from weboob.tools.browser import BasePage
|
|||
from datetime import datetime
|
||||
|
||||
|
||||
__all__ = ['MoviePage','PersonPage','MovieCrewPage','BiographyPage','FilmographyPage']
|
||||
|
||||
|
||||
class MoviePage(BasePage):
|
||||
''' Page describing a movie, only used to go on the MovieCrewPage
|
||||
'''
|
||||
def iter_persons(self, id, role=None):
|
||||
self.browser.location('http://www.imdb.com/title/%s/fullcredits'%id)
|
||||
assert self.browser.is_on_page(MovieCrewPage)
|
||||
for p in self.browser.page.iter_persons(role):
|
||||
yield p
|
||||
|
||||
def iter_persons_ids(self,id):
|
||||
self.browser.location('http://www.imdb.com/title/%s/fullcredits'%id)
|
||||
assert self.browser.is_on_page(MovieCrewPage)
|
||||
for p in self.browser.page.iter_persons_ids():
|
||||
yield p
|
||||
__all__ = ['PersonPage','MovieCrewPage','BiographyPage','FilmographyPage']
|
||||
|
||||
|
||||
class BiographyPage(BasePage):
|
||||
|
|
@ -171,16 +155,17 @@ class PersonPage(BasePage):
|
|||
person.roles = roles
|
||||
return person
|
||||
|
||||
def iter_movies_ids(self,person_id):
|
||||
for movie_div in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'):
|
||||
a = self.parser.select(movie_div,'b a',1)
|
||||
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
||||
yield id
|
||||
|
||||
class FilmographyPage(BasePage):
|
||||
''' Page of detailed filmography of a person, sorted by type of role
|
||||
This page is easier to parse than the main person page filmography
|
||||
'''
|
||||
def iter_movies_ids(self):
|
||||
for role_div in self.parser.select(self.document.getroot(),'div.filmo'):
|
||||
for a in self.parser.select(role_div,'ol > li > a'):
|
||||
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
||||
if id.startswith('tt'):
|
||||
yield id
|
||||
|
||||
def get_roles(self):
|
||||
roles = {}
|
||||
for role_div in self.parser.select(self.document.getroot(),'div.filmo'):
|
||||
|
|
@ -205,6 +190,9 @@ class FilmographyPage(BasePage):
|
|||
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
||||
if id.startswith('tt'):
|
||||
title = a.text
|
||||
#movie = self.browser.get_movie(id)
|
||||
role_detail = NotAvailable
|
||||
if len(a.tail) > 0:
|
||||
role_detail = unicode(' '.join(a.tail.replace('..','').split()))
|
||||
movie = Movie(id,title)
|
||||
movie.short_description = role_detail
|
||||
yield movie
|
||||
|
|
|
|||
|
|
@ -203,7 +203,8 @@ class Cineoob(ReplApplication):
|
|||
inter = list(set(lid1) & set(lid2))
|
||||
for common in inter:
|
||||
movie = self.get_object(common, 'get_movie')
|
||||
self.cached_format(movie)
|
||||
if movie:
|
||||
self.cached_format(movie)
|
||||
self.flush()
|
||||
|
||||
def do_persons_in_common(self, line):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue