From a82483727bdbcddfbaf40c2350b5b96e98b2651a Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Mon, 4 Mar 2013 05:06:31 +0100 Subject: [PATCH] imdb in progress... --- modules/imdb/browser.py | 8 ++-- modules/imdb/pages.py | 64 ++++++++++++++++++++------ weboob/applications/cineoob/cineoob.py | 9 ++-- weboob/applications/suboob/suboob.py | 2 +- 4 files changed, 62 insertions(+), 21 deletions(-) diff --git a/modules/imdb/browser.py b/modules/imdb/browser.py index 2bfec383..692441ee 100644 --- a/modules/imdb/browser.py +++ b/modules/imdb/browser.py @@ -42,7 +42,8 @@ class ImdbBrowser(BaseBrowser): # the api leads to a json result or the html movie page if there is only one result self.location('http://www.imdb.com/xml/find?json=1&tt=on&q=%s' % pattern.encode('utf-8')) if self.is_on_page(MoviePage): - yield self.page.get_movie() + id = 'tt'+self.geturl().split('/tt')[1].split('/')[0] + yield self.page.get_movie(id) else: res = self.readurl('http://www.imdb.com/xml/find?json=1&tt=on&q=%s' % pattern.encode('utf-8')) jres = json.loads(res) @@ -51,10 +52,11 @@ class ImdbBrowser(BaseBrowser): yield self.get_movie(m['id']) def iter_persons(self, pattern): - # the api leads to a json result or the html movie page if there is only one result + # the api leads to a json result or the html person page if there is only one result self.location('http://www.imdb.com/xml/find?json=1&nm=on&q=%s' % pattern.encode('utf-8')) if self.is_on_page(PersonPage): - yield self.page.get_person() + id = 'nm'+self.geturl().split('/nm')[1].split('/')[0] + yield self.page.get_person(id) else: res = self.readurl('http://www.imdb.com/xml/find?json=1&nm=on&q=%s' % pattern.encode('utf-8')) jres = json.loads(res) diff --git a/modules/imdb/pages.py b/modules/imdb/pages.py index ae23bf9a..9ca4922c 100644 --- a/modules/imdb/pages.py +++ b/modules/imdb/pages.py @@ -33,7 +33,8 @@ class MoviePage(BasePage): def get_movie(self,id): title = NotAvailable duration = NotAvailable - description = NotAvailable.__unicode__() + release_date = NotAvailable + description = NotAvailable td_overview = self.parser.select(self.document.getroot(),'td#overview-top',1) for span in self.parser.select(td_overview,'h1.header span[itemprop=name]'): if span.attrib.get('class','') == 'itemprop': @@ -42,10 +43,12 @@ class MoviePage(BasePage): title = other_titles elif span.attrib.get('class','') == 'title-extra': title = span.text - meta = self.parser.select(td_overview,'meta[itemprop=datePublished]',1) - datestrings = meta.attrib.get('content','').split('-') - if len(datestrings) == 2: - datestrings.append('1') + metas = self.parser.select(td_overview,'meta[itemprop=datePublished]') + if len(metas) > 0: + datestrings = metas[0].attrib.get('content','').split('-') + if len(datestrings) == 2: + datestrings.append('1') + release_date = datetime(int(datestrings[0]),int(datestrings[1]),int(datestrings[2])) time = self.parser.select(td_overview,'time[itemprop=duration]') if len(time) > 0: duration = int(time[0].attrib.get('datetime','').strip(string.letters)) @@ -54,7 +57,7 @@ class MoviePage(BasePage): description = desc[0].text movie = Movie(id,title.strip()) movie.other_titles = other_titles.strip() - movie.release_date = datetime(int(datestrings[0]),int(datestrings[1]),int(datestrings[2])) + movie.release_date = release_date movie.duration = duration movie.description = description movie.note = "10/10" @@ -81,22 +84,55 @@ class MovieCrewPage(BasePage): person.real_name = NotAvailable person.birth_date = NotAvailable person.nationality = NotAvailable + person.biography = NotAvailable person.gender = NotAvailable yield person class PersonPage(BasePage): def get_person(self,id): - person = Person(id,'nameplop') - person.real_name = 'rn' - person.birth_date = datetime.now() - person.birth_place = "place" - person.gender = "M" - person.nationality = "nn" - person.biography = 'bio' + name = NotAvailable + biography = NotAvailable + birth_place = NotAvailable + birth_date = NotAvailable + real_name = NotAvailable + gender = NotAvailable + nationality = NotAvailable + td_overview = self.parser.select(self.document.getroot(),'td#overview-top',1) + descs = self.parser.select(td_overview,'span[itemprop=description]') + if len(descs) > 0: + biography = descs[0].text + names = self.parser.select(td_overview,'h1[itemprop=name]') + if len(names) > 0: + name = names[0].text + times = self.parser.select(td_overview,'time[itemprop=birthDate]') + if len(times) > 0: + time = times[0].attrib.get('datetime','').split('-') + birth_date = datetime(int(time[0]),int(time[1]),int(time[2])) + + person = Person(id,name) + person.real_name = real_name + person.birth_date = birth_date + person.birth_place = birth_place + person.gender = gender + person.nationality = nationality + person.biography = biography person.awards = ["aw1","aw2"] person.roles = {} return person def iter_movies(self,person_id): - pass + for movie_div in self.parser.select(self.document.getroot(),'div[class~=filmo-row]'): + a = self.parser.select(movie_div,'b a',1) + id = a.attrib.get('href','').strip('/').split('/')[-1] + yield self.browser.get_movie(id) + #title = a.text + #movie = Movie(id,title) + #movie.other_titles = NotAvailable + #movie.release_date = NotAvailable + #movie.duration = NotAvailable + #movie.description = NotAvailable + #movie.note = NotAvailable + #movie.awards = NotAvailable + #movie.roles = NotAvailable + #yield movie diff --git a/weboob/applications/cineoob/cineoob.py b/weboob/applications/cineoob/cineoob.py index 0b8632c4..14be0b8d 100644 --- a/weboob/applications/cineoob/cineoob.py +++ b/weboob/applications/cineoob/cineoob.py @@ -52,7 +52,7 @@ class MovieInfoFormatter(IFormatter): for a in obj.awards: result += ' * %s\n' % a result += '\n%sDescription%s\n' % (self.BOLD, self.NC) - result += obj.description + result += '%s'%obj.description return result @@ -93,7 +93,10 @@ class PersonInfoFormatter(IFormatter): result += 'ID: %s\n' % obj.fullid result += 'Real name: %s\n' % obj.real_name result += 'Birth date: %s\n' % obj.birth_date - age = num_years(obj.birth_date) + if obj.birth_date != NotAvailable: + age = num_years(obj.birth_date) + else: + age = NotAvailable result += 'Age: %s\n' % age result += 'Birth place: %s\n' % obj.birth_place result += 'Gender: %s\n' % obj.gender @@ -109,7 +112,7 @@ class PersonInfoFormatter(IFormatter): for a in obj.awards: result += ' * %s\n' % a result += '\n%sBiography%s\n' % (self.BOLD, self.NC) - result += obj.biography + result += '%s'%obj.biography return result diff --git a/weboob/applications/suboob/suboob.py b/weboob/applications/suboob/suboob.py index 6edd960f..35842995 100644 --- a/weboob/applications/suboob/suboob.py +++ b/weboob/applications/suboob/suboob.py @@ -47,7 +47,7 @@ class SubtitleInfoFormatter(IFormatter): result += 'LANG: %s\n' % obj.language result += 'NB CD: %s\n' % obj.nb_cd result += '\n%sDescription%s\n' % (self.BOLD, self.NC) - result += obj.description + result += '%s'%obj.description return result