[cineoob] new command : biography
This commit is contained in:
parent
393800e7fd
commit
27c36d412b
5 changed files with 67 additions and 17 deletions
|
|
@ -62,3 +62,6 @@ class ImdbBackend(BaseBackend, ICapCinema):
|
|||
|
||||
def iter_movie_persons_ids(self, id):
|
||||
return self.browser.iter_movie_persons_ids(id)
|
||||
|
||||
def get_person_biography(self,id):
|
||||
return self.browser.get_person_biography(id)
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ from weboob.capabilities.base import NotAvailable
|
|||
from weboob.capabilities.cinema import Movie
|
||||
from weboob.tools.json import json
|
||||
|
||||
from .pages import MoviePage, PersonPage, MovieCrewPage
|
||||
from .pages import MoviePage, PersonPage, MovieCrewPage, BiographyPage
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
|
|
@ -38,7 +38,8 @@ class ImdbBrowser(BaseBrowser):
|
|||
PAGES = {
|
||||
'http://www.imdb.com/title/tt[0-9]*/*': MoviePage,
|
||||
'http://www.imdb.com/title/tt[0-9]*/fullcredits.*': MovieCrewPage,
|
||||
'http://www.imdb.com/name/nm.*': PersonPage,
|
||||
'http://www.imdb.com/name/nm[0-9]*/*': PersonPage,
|
||||
'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage,
|
||||
}
|
||||
|
||||
def iter_movies(self, pattern):
|
||||
|
|
@ -121,6 +122,11 @@ class ImdbBrowser(BaseBrowser):
|
|||
assert self.is_on_page(PersonPage)
|
||||
return self.page.get_person(id)
|
||||
|
||||
def get_person_biography(self, id):
|
||||
self.location('http://www.imdb.com/name/%s/bio' % id)
|
||||
assert self.is_on_page(BiographyPage)
|
||||
return self.page.get_biography()
|
||||
|
||||
def iter_movie_persons(self, movie_id):
|
||||
self.location('http://www.imdb.com/title/%s' % movie_id)
|
||||
assert self.is_on_page(MoviePage)
|
||||
|
|
|
|||
|
|
@ -44,6 +44,17 @@ class MoviePage(BasePage):
|
|||
yield p
|
||||
|
||||
|
||||
class BiographyPage(BasePage):
|
||||
''' Page containing biography of a person
|
||||
'''
|
||||
def get_biography(self):
|
||||
bio = ''
|
||||
tn = self.parser.select(self.document.getroot(),'div#tn15content',1)
|
||||
for p in self.parser.select(tn,'p'):
|
||||
bio += '\n\n%s'%p.text_content().strip()
|
||||
return bio
|
||||
|
||||
|
||||
class MovieCrewPage(BasePage):
|
||||
''' Page listing all the persons related to a movie
|
||||
'''
|
||||
|
|
@ -72,7 +83,7 @@ class PersonPage(BasePage):
|
|||
'''
|
||||
def get_person(self,id):
|
||||
name = NotAvailable
|
||||
biography = NotAvailable
|
||||
short_biography = NotAvailable
|
||||
birth_place = NotAvailable
|
||||
birth_date = NotAvailable
|
||||
death_date = NotAvailable
|
||||
|
|
@ -83,7 +94,7 @@ class PersonPage(BasePage):
|
|||
td_overview = self.parser.select(self.document.getroot(),'td#overview-top',1)
|
||||
descs = self.parser.select(td_overview,'span[itemprop=description]')
|
||||
if len(descs) > 0:
|
||||
biography = descs[0].text
|
||||
short_biography = descs[0].text
|
||||
rname_block = self.parser.select(td_overview,'div.txt-block h4.inline')
|
||||
if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
|
||||
links = self.parser.select(rname_block[0].getparent(),'a')
|
||||
|
|
@ -114,17 +125,31 @@ class PersonPage(BasePage):
|
|||
dtime.append('1')
|
||||
dtime.append('1')
|
||||
death_date = datetime(int(dtime[0]),int(dtime[1]),int(dtime[2]))
|
||||
# TODO IMPROVE THIS -----------
|
||||
#for role in ['Actor','Composer']:
|
||||
# show_span = self.parser.select(self.document.getroot(),'span[id=show-%s]' % role)
|
||||
# if len(show_span) > 0:
|
||||
# roles[role] = []
|
||||
# filmo_block = show_span[0].getparent()
|
||||
# filmo_block = filmo_block.getnext()
|
||||
roles['actor'] = []
|
||||
# TODO IMPROVE THIS, apparently there's an error in parsing, quite hard to handle -----------
|
||||
|
||||
#filmo_block = self.parser.select(self.document.getroot(),'div#filmography',1)
|
||||
#role_list = []
|
||||
#for span in self.parser.select(self.document.getroot(),'span.show-link'):
|
||||
# role_list.append(span.attrib.get('id','').replace('show-',''))
|
||||
#role_index = -1
|
||||
#current_parent = None
|
||||
##for sp in self.parser.select(filmo_block[0],'span.show-link'):
|
||||
#for divmovie in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'):
|
||||
# divhead = divmovie.getparent()
|
||||
# print "-- %s"%(self.document.getpath(divhead))
|
||||
# print divmovie.attrib.get('class','')
|
||||
# if current_parent != self.document.getpath(divhead):
|
||||
# role_index += 1
|
||||
# current_parent = self.document.getpath(divhead)
|
||||
# role = role_list[role_index]
|
||||
# a = self.parser.select(divmovie,'b a',1)
|
||||
# roles[role].append(a.text)
|
||||
#print roles
|
||||
|
||||
roles['any activity'] = []
|
||||
for movie_div in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'):
|
||||
a = self.parser.select(movie_div,'b a',1)
|
||||
roles['actor'].append(a.text)
|
||||
roles['any activity'].append(a.text)
|
||||
|
||||
person = Person(id,name)
|
||||
person.real_name = real_name
|
||||
|
|
@ -133,7 +158,7 @@ class PersonPage(BasePage):
|
|||
person.birth_place = birth_place
|
||||
person.gender = gender
|
||||
person.nationality = nationality
|
||||
person.biography = biography
|
||||
person.short_biography = short_biography
|
||||
person.roles = roles
|
||||
return person
|
||||
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ def num_years(begin, end=None):
|
|||
return num_years
|
||||
|
||||
class PersonInfoFormatter(IFormatter):
|
||||
MANDATORY_FIELDS = ('id', 'name', 'real_name', 'birth_date', 'birth_place', 'gender', 'nationality', 'biography', 'roles')
|
||||
MANDATORY_FIELDS = ('id', 'name', 'real_name', 'birth_date', 'birth_place', 'gender', 'nationality', 'short_biography', 'roles')
|
||||
|
||||
def format_obj(self, obj, alias):
|
||||
result = u'%s%s%s\n' % (self.BOLD, obj.name, self.NC)
|
||||
|
|
@ -124,7 +124,7 @@ class PersonInfoFormatter(IFormatter):
|
|||
for movie in lmovies:
|
||||
result += ' * %s\n' % movie
|
||||
result += '\n%sBiography%s\n' % (self.BOLD, self.NC)
|
||||
result += '%s'%obj.biography
|
||||
result += '%s'%obj.short_biography
|
||||
return result
|
||||
|
||||
|
||||
|
|
@ -335,3 +335,19 @@ class Cineoob(ReplApplication):
|
|||
for backend, movie in self.do('iter_person_movies', person.id):
|
||||
self.cached_format(movie)
|
||||
self.flush()
|
||||
|
||||
def do_biography(self, person_id):
|
||||
"""
|
||||
biography person_ID
|
||||
|
||||
Show the complete biography of a person.
|
||||
"""
|
||||
person = self.get_object(person_id, 'get_person')
|
||||
if not person:
|
||||
print >>sys.stderr, 'Person not found: %s' % id
|
||||
return 3
|
||||
|
||||
self.change_path([u'biography'])
|
||||
for backend, bio in self.do('get_person_biography', person.id):
|
||||
print bio
|
||||
self.flush()
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ class Person(CapBaseObject):
|
|||
birth_place = StringField('City and country of birth of a person')
|
||||
gender = StringField('Gender of a person')
|
||||
nationality = StringField('Nationality of a person')
|
||||
biography = StringField('Short biography of a person')
|
||||
short_biography = StringField('Short biography of a person')
|
||||
roles = Field('Lists of movies related to the person indexed by roles',dict)
|
||||
|
||||
def __init__(self, id, name):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue