[imdb] biography purified

This commit is contained in:
Julien Veyssier 2013-03-06 15:29:13 +01:00
commit c23352fc22
2 changed files with 7 additions and 7 deletions

View file

@ -48,13 +48,13 @@ class BiographyPage(BasePage):
''' Page containing biography of a person ''' Page containing biography of a person
''' '''
def get_biography(self): def get_biography(self):
bio = '' bio = unicode()
tn = self.parser.select(self.document.getroot(),'div#tn15content',1) tn = self.parser.select(self.document.getroot(),'div#tn15content',1)
#for p in self.parser.select(tn,'p'): # we only read paragraphs, titles and links
# bio += '\n\n%s'%p.text_content().strip() for ch in tn.getchildren():
# get children, append if label or tag = a,p,h... if ch.tag in ['p','h5','a']:
bio = tn.text_content().strip() bio += '%s\n\n'%ch.text_content().strip()
if bio == "": if bio == u'':
bio = NotAvailable bio = NotAvailable
return bio return bio

View file

@ -367,6 +367,6 @@ class Cineoob(ReplApplication):
return 3 return 3
for backend, bio in self.do('get_person_biography', person.id): for backend, bio in self.do('get_person_biography', person.id):
print bio print '%s :\n\n%s' % (person.name,bio)
if bio != NotAvailable: if bio != NotAvailable:
self.flush() self.flush()