From c23352fc224e7ad2c2bb09bcb72cc4cbb867377c Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Wed, 6 Mar 2013 15:29:13 +0100 Subject: [PATCH] [imdb] biography purified --- modules/imdb/pages.py | 12 ++++++------ weboob/applications/cineoob/cineoob.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/imdb/pages.py b/modules/imdb/pages.py index 2b9c96dc..3e9dd7f4 100644 --- a/modules/imdb/pages.py +++ b/modules/imdb/pages.py @@ -48,13 +48,13 @@ class BiographyPage(BasePage): ''' Page containing biography of a person ''' def get_biography(self): - bio = '' + bio = unicode() tn = self.parser.select(self.document.getroot(),'div#tn15content',1) - #for p in self.parser.select(tn,'p'): - # bio += '\n\n%s'%p.text_content().strip() - # get children, append if label or tag = a,p,h... - bio = tn.text_content().strip() - if bio == "": + # we only read paragraphs, titles and links + for ch in tn.getchildren(): + if ch.tag in ['p','h5','a']: + bio += '%s\n\n'%ch.text_content().strip() + if bio == u'': bio = NotAvailable return bio diff --git a/weboob/applications/cineoob/cineoob.py b/weboob/applications/cineoob/cineoob.py index fbebe677..1ab5e0bf 100644 --- a/weboob/applications/cineoob/cineoob.py +++ b/weboob/applications/cineoob/cineoob.py @@ -367,6 +367,6 @@ class Cineoob(ReplApplication): return 3 for backend, bio in self.do('get_person_biography', person.id): - print bio + print '%s :\n\n%s' % (person.name,bio) if bio != NotAvailable: self.flush()