[imdb] biography purified

This commit is contained in:
Julien Veyssier 2013-03-06 15:29:13 +01:00
commit c23352fc22
2 changed files with 7 additions and 7 deletions

View file

@ -48,13 +48,13 @@ class BiographyPage(BasePage):
''' Page containing biography of a person
'''
def get_biography(self):
bio = ''
bio = unicode()
tn = self.parser.select(self.document.getroot(),'div#tn15content',1)
#for p in self.parser.select(tn,'p'):
# bio += '\n\n%s'%p.text_content().strip()
# get children, append if label or tag = a,p,h...
bio = tn.text_content().strip()
if bio == "":
# we only read paragraphs, titles and links
for ch in tn.getchildren():
if ch.tag in ['p','h5','a']:
bio += '%s\n\n'%ch.text_content().strip()
if bio == u'':
bio = NotAvailable
return bio

View file

@ -367,6 +367,6 @@ class Cineoob(ReplApplication):
return 3
for backend, bio in self.do('get_person_biography', person.id):
print bio
print '%s :\n\n%s' % (person.name,bio)
if bio != NotAvailable:
self.flush()