[imdb] biography purified
This commit is contained in:
parent
2d0d76a6ad
commit
c23352fc22
2 changed files with 7 additions and 7 deletions
|
|
@ -48,13 +48,13 @@ class BiographyPage(BasePage):
|
||||||
''' Page containing biography of a person
|
''' Page containing biography of a person
|
||||||
'''
|
'''
|
||||||
def get_biography(self):
|
def get_biography(self):
|
||||||
bio = ''
|
bio = unicode()
|
||||||
tn = self.parser.select(self.document.getroot(),'div#tn15content',1)
|
tn = self.parser.select(self.document.getroot(),'div#tn15content',1)
|
||||||
#for p in self.parser.select(tn,'p'):
|
# we only read paragraphs, titles and links
|
||||||
# bio += '\n\n%s'%p.text_content().strip()
|
for ch in tn.getchildren():
|
||||||
# get children, append if label or tag = a,p,h...
|
if ch.tag in ['p','h5','a']:
|
||||||
bio = tn.text_content().strip()
|
bio += '%s\n\n'%ch.text_content().strip()
|
||||||
if bio == "":
|
if bio == u'':
|
||||||
bio = NotAvailable
|
bio = NotAvailable
|
||||||
return bio
|
return bio
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -367,6 +367,6 @@ class Cineoob(ReplApplication):
|
||||||
return 3
|
return 3
|
||||||
|
|
||||||
for backend, bio in self.do('get_person_biography', person.id):
|
for backend, bio in self.do('get_person_biography', person.id):
|
||||||
print bio
|
print '%s :\n\n%s' % (person.name,bio)
|
||||||
if bio != NotAvailable:
|
if bio != NotAvailable:
|
||||||
self.flush()
|
self.flush()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue