[imdb] simplification of iters and fill short description in all cases
This commit is contained in:
parent
5e8e4690da
commit
19b418e6d0
3 changed files with 31 additions and 36 deletions
|
|
@ -23,7 +23,7 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
|
||||||
from weboob.capabilities.cinema import Movie, Person
|
from weboob.capabilities.cinema import Movie, Person
|
||||||
from weboob.tools.json import json
|
from weboob.tools.json import json
|
||||||
|
|
||||||
from .pages import MoviePage, PersonPage, MovieCrewPage, BiographyPage, FilmographyPage
|
from .pages import PersonPage, MovieCrewPage, BiographyPage, FilmographyPage
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
@ -36,7 +36,6 @@ class ImdbBrowser(BaseBrowser):
|
||||||
ENCODING = 'utf-8'
|
ENCODING = 'utf-8'
|
||||||
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
|
||||||
PAGES = {
|
PAGES = {
|
||||||
'http://www.imdb.com/title/tt[0-9]*/*': MoviePage,
|
|
||||||
'http://www.imdb.com/title/tt[0-9]*/fullcredits.*': MovieCrewPage,
|
'http://www.imdb.com/title/tt[0-9]*/fullcredits.*': MovieCrewPage,
|
||||||
'http://www.imdb.com/name/nm[0-9]*/*': PersonPage,
|
'http://www.imdb.com/name/nm[0-9]*/*': PersonPage,
|
||||||
'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage,
|
'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage,
|
||||||
|
|
@ -98,10 +97,13 @@ class ImdbBrowser(BaseBrowser):
|
||||||
pitch = NotAvailable
|
pitch = NotAvailable
|
||||||
country = NotAvailable
|
country = NotAvailable
|
||||||
note = NotAvailable
|
note = NotAvailable
|
||||||
|
short_description = NotAvailable
|
||||||
other_titles = []
|
other_titles = []
|
||||||
roles = {}
|
roles = {}
|
||||||
|
|
||||||
title = unicode(jres['title'].strip())
|
title = unicode(jres['title'].strip())
|
||||||
|
if jres.has_key('directors'):
|
||||||
|
short_description = ', '.join(jres['directors'])
|
||||||
if jres.has_key('runtime'):
|
if jres.has_key('runtime'):
|
||||||
dur_str = jres['runtime'][0].split(':')
|
dur_str = jres['runtime'][0].split(':')
|
||||||
if len(dur_str) == 1:
|
if len(dur_str) == 1:
|
||||||
|
|
@ -145,6 +147,7 @@ class ImdbBrowser(BaseBrowser):
|
||||||
movie.country = country
|
movie.country = country
|
||||||
movie.note = note
|
movie.note = note
|
||||||
movie.roles = roles
|
movie.roles = roles
|
||||||
|
movie.short_description= short_description
|
||||||
return movie
|
return movie
|
||||||
|
|
||||||
def get_person(self, id):
|
def get_person(self, id):
|
||||||
|
|
@ -158,9 +161,10 @@ class ImdbBrowser(BaseBrowser):
|
||||||
return self.page.get_biography()
|
return self.page.get_biography()
|
||||||
|
|
||||||
def iter_movie_persons(self, movie_id, role):
|
def iter_movie_persons(self, movie_id, role):
|
||||||
self.location('http://www.imdb.com/title/%s' % movie_id)
|
self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id)
|
||||||
assert self.is_on_page(MoviePage)
|
assert self.is_on_page(MovieCrewPage)
|
||||||
return self.page.iter_persons(movie_id, role)
|
for p in self.page.iter_persons(role):
|
||||||
|
yield p
|
||||||
|
|
||||||
def iter_person_movies(self, person_id, role):
|
def iter_person_movies(self, person_id, role):
|
||||||
self.location('http://www.imdb.com/name/%s/filmotype' % person_id)
|
self.location('http://www.imdb.com/name/%s/filmotype' % person_id)
|
||||||
|
|
@ -168,11 +172,13 @@ class ImdbBrowser(BaseBrowser):
|
||||||
return self.page.iter_movies(role)
|
return self.page.iter_movies(role)
|
||||||
|
|
||||||
def iter_person_movies_ids(self, person_id):
|
def iter_person_movies_ids(self, person_id):
|
||||||
self.location('http://www.imdb.com/name/%s' % person_id)
|
self.location('http://www.imdb.com/name/%s/filmotype' % person_id)
|
||||||
assert self.is_on_page(PersonPage)
|
assert self.is_on_page(FilmographyPage)
|
||||||
return self.page.iter_movies_ids(person_id)
|
for movie in self.page.iter_movies_ids():
|
||||||
|
yield movie
|
||||||
|
|
||||||
def iter_movie_persons_ids(self, movie_id):
|
def iter_movie_persons_ids(self, movie_id):
|
||||||
self.location('http://www.imdb.com/title/%s' % movie_id)
|
self.location('http://www.imdb.com/title/%s/fullcredits'%movie_id)
|
||||||
assert self.is_on_page(MoviePage)
|
assert self.is_on_page(MovieCrewPage)
|
||||||
return self.page.iter_persons_ids(movie_id)
|
for person in self.page.iter_persons_ids():
|
||||||
|
yield person
|
||||||
|
|
|
||||||
|
|
@ -25,23 +25,7 @@ from weboob.tools.browser import BasePage
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['MoviePage','PersonPage','MovieCrewPage','BiographyPage','FilmographyPage']
|
__all__ = ['PersonPage','MovieCrewPage','BiographyPage','FilmographyPage']
|
||||||
|
|
||||||
|
|
||||||
class MoviePage(BasePage):
|
|
||||||
''' Page describing a movie, only used to go on the MovieCrewPage
|
|
||||||
'''
|
|
||||||
def iter_persons(self, id, role=None):
|
|
||||||
self.browser.location('http://www.imdb.com/title/%s/fullcredits'%id)
|
|
||||||
assert self.browser.is_on_page(MovieCrewPage)
|
|
||||||
for p in self.browser.page.iter_persons(role):
|
|
||||||
yield p
|
|
||||||
|
|
||||||
def iter_persons_ids(self,id):
|
|
||||||
self.browser.location('http://www.imdb.com/title/%s/fullcredits'%id)
|
|
||||||
assert self.browser.is_on_page(MovieCrewPage)
|
|
||||||
for p in self.browser.page.iter_persons_ids():
|
|
||||||
yield p
|
|
||||||
|
|
||||||
|
|
||||||
class BiographyPage(BasePage):
|
class BiographyPage(BasePage):
|
||||||
|
|
@ -171,16 +155,17 @@ class PersonPage(BasePage):
|
||||||
person.roles = roles
|
person.roles = roles
|
||||||
return person
|
return person
|
||||||
|
|
||||||
def iter_movies_ids(self,person_id):
|
|
||||||
for movie_div in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'):
|
|
||||||
a = self.parser.select(movie_div,'b a',1)
|
|
||||||
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
|
||||||
yield id
|
|
||||||
|
|
||||||
class FilmographyPage(BasePage):
|
class FilmographyPage(BasePage):
|
||||||
''' Page of detailed filmography of a person, sorted by type of role
|
''' Page of detailed filmography of a person, sorted by type of role
|
||||||
This page is easier to parse than the main person page filmography
|
This page is easier to parse than the main person page filmography
|
||||||
'''
|
'''
|
||||||
|
def iter_movies_ids(self):
|
||||||
|
for role_div in self.parser.select(self.document.getroot(),'div.filmo'):
|
||||||
|
for a in self.parser.select(role_div,'ol > li > a'):
|
||||||
|
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
||||||
|
if id.startswith('tt'):
|
||||||
|
yield id
|
||||||
|
|
||||||
def get_roles(self):
|
def get_roles(self):
|
||||||
roles = {}
|
roles = {}
|
||||||
for role_div in self.parser.select(self.document.getroot(),'div.filmo'):
|
for role_div in self.parser.select(self.document.getroot(),'div.filmo'):
|
||||||
|
|
@ -205,6 +190,9 @@ class FilmographyPage(BasePage):
|
||||||
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
id = a.attrib.get('href','').strip('/').split('/')[-1]
|
||||||
if id.startswith('tt'):
|
if id.startswith('tt'):
|
||||||
title = a.text
|
title = a.text
|
||||||
#movie = self.browser.get_movie(id)
|
role_detail = NotAvailable
|
||||||
|
if len(a.tail) > 0:
|
||||||
|
role_detail = unicode(' '.join(a.tail.replace('..','').split()))
|
||||||
movie = Movie(id,title)
|
movie = Movie(id,title)
|
||||||
|
movie.short_description = role_detail
|
||||||
yield movie
|
yield movie
|
||||||
|
|
|
||||||
|
|
@ -203,7 +203,8 @@ class Cineoob(ReplApplication):
|
||||||
inter = list(set(lid1) & set(lid2))
|
inter = list(set(lid1) & set(lid2))
|
||||||
for common in inter:
|
for common in inter:
|
||||||
movie = self.get_object(common, 'get_movie')
|
movie = self.get_object(common, 'get_movie')
|
||||||
self.cached_format(movie)
|
if movie:
|
||||||
|
self.cached_format(movie)
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|
||||||
def do_persons_in_common(self, line):
|
def do_persons_in_common(self, line):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue