[imdb] biography purified
This commit is contained in:
parent
2d0d76a6ad
commit
c23352fc22
2 changed files with 7 additions and 7 deletions
|
|
@ -48,13 +48,13 @@ class BiographyPage(BasePage):
|
|||
''' Page containing biography of a person
|
||||
'''
|
||||
def get_biography(self):
|
||||
bio = ''
|
||||
bio = unicode()
|
||||
tn = self.parser.select(self.document.getroot(),'div#tn15content',1)
|
||||
#for p in self.parser.select(tn,'p'):
|
||||
# bio += '\n\n%s'%p.text_content().strip()
|
||||
# get children, append if label or tag = a,p,h...
|
||||
bio = tn.text_content().strip()
|
||||
if bio == "":
|
||||
# we only read paragraphs, titles and links
|
||||
for ch in tn.getchildren():
|
||||
if ch.tag in ['p','h5','a']:
|
||||
bio += '%s\n\n'%ch.text_content().strip()
|
||||
if bio == u'':
|
||||
bio = NotAvailable
|
||||
return bio
|
||||
|
||||
|
|
|
|||
|
|
@ -367,6 +367,6 @@ class Cineoob(ReplApplication):
|
|||
return 3
|
||||
|
||||
for backend, bio in self.do('get_person_biography', person.id):
|
||||
print bio
|
||||
print '%s :\n\n%s' % (person.name,bio)
|
||||
if bio != NotAvailable:
|
||||
self.flush()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue