[cineoob] new command : biography

This commit is contained in:
Julien Veyssier 2013-03-05 16:24:10 +01:00
commit 27c36d412b
5 changed files with 67 additions and 17 deletions

View file

@ -62,3 +62,6 @@ class ImdbBackend(BaseBackend, ICapCinema):
def iter_movie_persons_ids(self, id):
return self.browser.iter_movie_persons_ids(id)
def get_person_biography(self,id):
return self.browser.get_person_biography(id)

View file

@ -23,7 +23,7 @@ from weboob.capabilities.base import NotAvailable
from weboob.capabilities.cinema import Movie
from weboob.tools.json import json
from .pages import MoviePage, PersonPage, MovieCrewPage
from .pages import MoviePage, PersonPage, MovieCrewPage, BiographyPage
from datetime import datetime
@ -38,7 +38,8 @@ class ImdbBrowser(BaseBrowser):
PAGES = {
'http://www.imdb.com/title/tt[0-9]*/*': MoviePage,
'http://www.imdb.com/title/tt[0-9]*/fullcredits.*': MovieCrewPage,
'http://www.imdb.com/name/nm.*': PersonPage,
'http://www.imdb.com/name/nm[0-9]*/*': PersonPage,
'http://www.imdb.com/name/nm[0-9]*/bio.*': BiographyPage,
}
def iter_movies(self, pattern):
@ -121,6 +122,11 @@ class ImdbBrowser(BaseBrowser):
assert self.is_on_page(PersonPage)
return self.page.get_person(id)
def get_person_biography(self, id):
self.location('http://www.imdb.com/name/%s/bio' % id)
assert self.is_on_page(BiographyPage)
return self.page.get_biography()
def iter_movie_persons(self, movie_id):
self.location('http://www.imdb.com/title/%s' % movie_id)
assert self.is_on_page(MoviePage)

View file

@ -44,6 +44,17 @@ class MoviePage(BasePage):
yield p
class BiographyPage(BasePage):
''' Page containing biography of a person
'''
def get_biography(self):
bio = ''
tn = self.parser.select(self.document.getroot(),'div#tn15content',1)
for p in self.parser.select(tn,'p'):
bio += '\n\n%s'%p.text_content().strip()
return bio
class MovieCrewPage(BasePage):
''' Page listing all the persons related to a movie
'''
@ -72,7 +83,7 @@ class PersonPage(BasePage):
'''
def get_person(self,id):
name = NotAvailable
biography = NotAvailable
short_biography = NotAvailable
birth_place = NotAvailable
birth_date = NotAvailable
death_date = NotAvailable
@ -83,7 +94,7 @@ class PersonPage(BasePage):
td_overview = self.parser.select(self.document.getroot(),'td#overview-top',1)
descs = self.parser.select(td_overview,'span[itemprop=description]')
if len(descs) > 0:
biography = descs[0].text
short_biography = descs[0].text
rname_block = self.parser.select(td_overview,'div.txt-block h4.inline')
if len(rname_block) > 0 and "born" in rname_block[0].text.lower():
links = self.parser.select(rname_block[0].getparent(),'a')
@ -114,17 +125,31 @@ class PersonPage(BasePage):
dtime.append('1')
dtime.append('1')
death_date = datetime(int(dtime[0]),int(dtime[1]),int(dtime[2]))
# TODO IMPROVE THIS -----------
#for role in ['Actor','Composer']:
# show_span = self.parser.select(self.document.getroot(),'span[id=show-%s]' % role)
# if len(show_span) > 0:
# roles[role] = []
# filmo_block = show_span[0].getparent()
# filmo_block = filmo_block.getnext()
roles['actor'] = []
# TODO IMPROVE THIS, apparently there's an error in parsing, quite hard to handle -----------
#filmo_block = self.parser.select(self.document.getroot(),'div#filmography',1)
#role_list = []
#for span in self.parser.select(self.document.getroot(),'span.show-link'):
# role_list.append(span.attrib.get('id','').replace('show-',''))
#role_index = -1
#current_parent = None
##for sp in self.parser.select(filmo_block[0],'span.show-link'):
#for divmovie in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'):
# divhead = divmovie.getparent()
# print "-- %s"%(self.document.getpath(divhead))
# print divmovie.attrib.get('class','')
# if current_parent != self.document.getpath(divhead):
# role_index += 1
# current_parent = self.document.getpath(divhead)
# role = role_list[role_index]
# a = self.parser.select(divmovie,'b a',1)
# roles[role].append(a.text)
#print roles
roles['any activity'] = []
for movie_div in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'):
a = self.parser.select(movie_div,'b a',1)
roles['actor'].append(a.text)
roles['any activity'].append(a.text)
person = Person(id,name)
person.real_name = real_name
@ -133,7 +158,7 @@ class PersonPage(BasePage):
person.birth_place = birth_place
person.gender = gender
person.nationality = nationality
person.biography = biography
person.short_biography = short_biography
person.roles = roles
return person

View file

@ -98,7 +98,7 @@ def num_years(begin, end=None):
return num_years
class PersonInfoFormatter(IFormatter):
MANDATORY_FIELDS = ('id', 'name', 'real_name', 'birth_date', 'birth_place', 'gender', 'nationality', 'biography', 'roles')
MANDATORY_FIELDS = ('id', 'name', 'real_name', 'birth_date', 'birth_place', 'gender', 'nationality', 'short_biography', 'roles')
def format_obj(self, obj, alias):
result = u'%s%s%s\n' % (self.BOLD, obj.name, self.NC)
@ -124,7 +124,7 @@ class PersonInfoFormatter(IFormatter):
for movie in lmovies:
result += ' * %s\n' % movie
result += '\n%sBiography%s\n' % (self.BOLD, self.NC)
result += '%s'%obj.biography
result += '%s'%obj.short_biography
return result
@ -335,3 +335,19 @@ class Cineoob(ReplApplication):
for backend, movie in self.do('iter_person_movies', person.id):
self.cached_format(movie)
self.flush()
def do_biography(self, person_id):
"""
biography person_ID
Show the complete biography of a person.
"""
person = self.get_object(person_id, 'get_person')
if not person:
print >>sys.stderr, 'Person not found: %s' % id
return 3
self.change_path([u'biography'])
for backend, bio in self.do('get_person_biography', person.id):
print bio
self.flush()

View file

@ -53,7 +53,7 @@ class Person(CapBaseObject):
birth_place = StringField('City and country of birth of a person')
gender = StringField('Gender of a person')
nationality = StringField('Nationality of a person')
biography = StringField('Short biography of a person')
short_biography = StringField('Short biography of a person')
roles = Field('Lists of movies related to the person indexed by roles',dict)
def __init__(self, id, name):