[cineoob] new command : biography
This commit is contained in:
parent
393800e7fd
commit
27c36d412b
5 changed files with 67 additions and 17 deletions
|
|
@ -62,3 +62,6 @@ class ImdbBackend(BaseBackend, ICapCinema):
|
||||||
|
|
||||||
def iter_movie_persons_ids(self, id):
|
def iter_movie_persons_ids(self, id):
|
||||||
return self.browser.iter_movie_persons_ids(id)
|
return self.browser.iter_movie_persons_ids(id)
|
||||||
|
|
||||||
|
def get_person_biography(self,id):
|
||||||
|
return self.browser.get_person_biography(id)
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ from weboob.capabilities.base import NotAvailable
|
||||||
from weboob.capabilities.cinema import Movie
|
from weboob.capabilities.cinema import Movie
|
||||||
from weboob.tools.json import json
|
from weboob.tools.json import json
|
||||||
|
|
||||||
from .pages import MoviePage, PersonPage, MovieCrewPage
|
from .pages import MoviePage, PersonPage, MovieCrewPage, BiographyPage
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
@ -38,7 +38,8 @@ class ImdbBrowser(BaseBrowser):
|
||||||
PAGES = {
|
PAGES = {
|
||||||
'http://www.imdb.com/title/tt[0-9]*/*': MoviePage,
|
'http://www.imdb.com/title/tt[0-9]*/*': MoviePage,
|
||||||
'http://www.imdb.com/title/tt[0-9]*/fullcredits.*': MovieCrewPage,
|
'http://www.imdb.com/title/tt[0-9]*/fullcredits.*': MovieCrewPage,
|
||||||
'http://www.imdb.com/name/nm.*': PersonPage,
|
'http://www.imdb.com/name/nm[0-9]*/*': PersonPage,
|
||||||
|
'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def iter_movies(self, pattern):
|
def iter_movies(self, pattern):
|
||||||
|
|
@ -121,6 +122,11 @@ class ImdbBrowser(BaseBrowser):
|
||||||
assert self.is_on_page(PersonPage)
|
assert self.is_on_page(PersonPage)
|
||||||
return self.page.get_person(id)
|
return self.page.get_person(id)
|
||||||
|
|
||||||
|
def get_person_biography(self, id):
|
||||||
|
self.location('http://www.imdb.com/name/%s/bio' % id)
|
||||||
|
assert self.is_on_page(BiographyPage)
|
||||||
|
return self.page.get_biography()
|
||||||
|
|
||||||
def iter_movie_persons(self, movie_id):
|
def iter_movie_persons(self, movie_id):
|
||||||
self.location('http://www.imdb.com/title/%s' % movie_id)
|
self.location('http://www.imdb.com/title/%s' % movie_id)
|
||||||
assert self.is_on_page(MoviePage)
|
assert self.is_on_page(MoviePage)
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,17 @@ class MoviePage(BasePage):
|
||||||
yield p
|
yield p
|
||||||
|
|
||||||
|
|
||||||
|
class BiographyPage(BasePage):
|
||||||
|
''' Page containing biography of a person
|
||||||
|
'''
|
||||||
|
def get_biography(self):
|
||||||
|
bio = ''
|
||||||
|
tn = self.parser.select(self.document.getroot(),'div#tn15content',1)
|
||||||
|
for p in self.parser.select(tn,'p'):
|
||||||
|
bio += '\n\n%s'%p.text_content().strip()
|
||||||
|
return bio
|
||||||
|
|
||||||
|
|
||||||
class MovieCrewPage(BasePage):
|
class MovieCrewPage(BasePage):
|
||||||
''' Page listing all the persons related to a movie
|
''' Page listing all the persons related to a movie
|
||||||
'''
|
'''
|
||||||
|
|
@ -72,7 +83,7 @@ class PersonPage(BasePage):
|
||||||
'''
|
'''
|
||||||
def get_person(self,id):
|
def get_person(self,id):
|
||||||
name = NotAvailable
|
name = NotAvailable
|
||||||
biography = NotAvailable
|
short_biography = NotAvailable
|
||||||
birth_place = NotAvailable
|
birth_place = NotAvailable
|
||||||
birth_date = NotAvailable
|
birth_date = NotAvailable
|
||||||
death_date = NotAvailable
|
death_date = NotAvailable
|
||||||
|
|
@ -83,7 +94,7 @@ class PersonPage(BasePage):
|
||||||
td_overview = self.parser.select(self.document.getroot(),'td#overview-top',1)
|
td_overview = self.parser.select(self.document.getroot(),'td#overview-top',1)
|
||||||
descs = self.parser.select(td_overview,'span[itemprop=description]')
|
descs = self.parser.select(td_overview,'span[itemprop=description]')
|
||||||
if len(descs) > 0:
|
if len(descs) > 0:
|
||||||
biography = descs[0].text
|
short_biography = descs[0].text
|
||||||
rname_block = self.parser.select(td_overview,'div.txt-block h4.inline')
|
rname_block = self.parser.select(td_overview,'div.txt-block h4.inline')
|
||||||
if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
|
if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
|
||||||
links = self.parser.select(rname_block[0].getparent(),'a')
|
links = self.parser.select(rname_block[0].getparent(),'a')
|
||||||
|
|
@ -114,17 +125,31 @@ class PersonPage(BasePage):
|
||||||
dtime.append('1')
|
dtime.append('1')
|
||||||
dtime.append('1')
|
dtime.append('1')
|
||||||
death_date = datetime(int(dtime[0]),int(dtime[1]),int(dtime[2]))
|
death_date = datetime(int(dtime[0]),int(dtime[1]),int(dtime[2]))
|
||||||
# TODO IMPROVE THIS -----------
|
# TODO IMPROVE THIS, apparently there's an error in parsing, quite hard to handle -----------
|
||||||
#for role in ['Actor','Composer']:
|
|
||||||
# show_span = self.parser.select(self.document.getroot(),'span[id=show-%s]' % role)
|
#filmo_block = self.parser.select(self.document.getroot(),'div#filmography',1)
|
||||||
# if len(show_span) > 0:
|
#role_list = []
|
||||||
# roles[role] = []
|
#for span in self.parser.select(self.document.getroot(),'span.show-link'):
|
||||||
# filmo_block = show_span[0].getparent()
|
# role_list.append(span.attrib.get('id','').replace('show-',''))
|
||||||
# filmo_block = filmo_block.getnext()
|
#role_index = -1
|
||||||
roles['actor'] = []
|
#current_parent = None
|
||||||
|
##for sp in self.parser.select(filmo_block[0],'span.show-link'):
|
||||||
|
#for divmovie in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'):
|
||||||
|
# divhead = divmovie.getparent()
|
||||||
|
# print "-- %s"%(self.document.getpath(divhead))
|
||||||
|
# print divmovie.attrib.get('class','')
|
||||||
|
# if current_parent != self.document.getpath(divhead):
|
||||||
|
# role_index += 1
|
||||||
|
# current_parent = self.document.getpath(divhead)
|
||||||
|
# role = role_list[role_index]
|
||||||
|
# a = self.parser.select(divmovie,'b a',1)
|
||||||
|
# roles[role].append(a.text)
|
||||||
|
#print roles
|
||||||
|
|
||||||
|
roles['any activity'] = []
|
||||||
for movie_div in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'):
|
for movie_div in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'):
|
||||||
a = self.parser.select(movie_div,'b a',1)
|
a = self.parser.select(movie_div,'b a',1)
|
||||||
roles['actor'].append(a.text)
|
roles['any activity'].append(a.text)
|
||||||
|
|
||||||
person = Person(id,name)
|
person = Person(id,name)
|
||||||
person.real_name = real_name
|
person.real_name = real_name
|
||||||
|
|
@ -133,7 +158,7 @@ class PersonPage(BasePage):
|
||||||
person.birth_place = birth_place
|
person.birth_place = birth_place
|
||||||
person.gender = gender
|
person.gender = gender
|
||||||
person.nationality = nationality
|
person.nationality = nationality
|
||||||
person.biography = biography
|
person.short_biography = short_biography
|
||||||
person.roles = roles
|
person.roles = roles
|
||||||
return person
|
return person
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,7 @@ def num_years(begin, end=None):
|
||||||
return num_years
|
return num_years
|
||||||
|
|
||||||
class PersonInfoFormatter(IFormatter):
|
class PersonInfoFormatter(IFormatter):
|
||||||
MANDATORY_FIELDS = ('id', 'name', 'real_name', 'birth_date', 'birth_place', 'gender', 'nationality', 'biography', 'roles')
|
MANDATORY_FIELDS = ('id', 'name', 'real_name', 'birth_date', 'birth_place', 'gender', 'nationality', 'short_biography', 'roles')
|
||||||
|
|
||||||
def format_obj(self, obj, alias):
|
def format_obj(self, obj, alias):
|
||||||
result = u'%s%s%s\n' % (self.BOLD, obj.name, self.NC)
|
result = u'%s%s%s\n' % (self.BOLD, obj.name, self.NC)
|
||||||
|
|
@ -124,7 +124,7 @@ class PersonInfoFormatter(IFormatter):
|
||||||
for movie in lmovies:
|
for movie in lmovies:
|
||||||
result += ' * %s\n' % movie
|
result += ' * %s\n' % movie
|
||||||
result += '\n%sBiography%s\n' % (self.BOLD, self.NC)
|
result += '\n%sBiography%s\n' % (self.BOLD, self.NC)
|
||||||
result += '%s'%obj.biography
|
result += '%s'%obj.short_biography
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -335,3 +335,19 @@ class Cineoob(ReplApplication):
|
||||||
for backend, movie in self.do('iter_person_movies', person.id):
|
for backend, movie in self.do('iter_person_movies', person.id):
|
||||||
self.cached_format(movie)
|
self.cached_format(movie)
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|
||||||
|
def do_biography(self, person_id):
|
||||||
|
"""
|
||||||
|
biography person_ID
|
||||||
|
|
||||||
|
Show the complete biography of a person.
|
||||||
|
"""
|
||||||
|
person = self.get_object(person_id, 'get_person')
|
||||||
|
if not person:
|
||||||
|
print >>sys.stderr, 'Person not found: %s' % id
|
||||||
|
return 3
|
||||||
|
|
||||||
|
self.change_path([u'biography'])
|
||||||
|
for backend, bio in self.do('get_person_biography', person.id):
|
||||||
|
print bio
|
||||||
|
self.flush()
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ class Person(CapBaseObject):
|
||||||
birth_place = StringField('City and country of birth of a person')
|
birth_place = StringField('City and country of birth of a person')
|
||||||
gender = StringField('Gender of a person')
|
gender = StringField('Gender of a person')
|
||||||
nationality = StringField('Nationality of a person')
|
nationality = StringField('Nationality of a person')
|
||||||
biography = StringField('Short biography of a person')
|
short_biography = StringField('Short biography of a person')
|
||||||
roles = Field('Lists of movies related to the person indexed by roles',dict)
|
roles = Field('Lists of movies related to the person indexed by roles',dict)
|
||||||
|
|
||||||
def __init__(self, id, name):
|
def __init__(self, id, name):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue