[imdb] get movie with imdbapi

This commit is contained in:
Julien Veyssier 2013-03-04 13:30:21 +01:00
commit ce64153161
4 changed files with 87 additions and 33 deletions

View file

@ -19,10 +19,13 @@
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.cinema import Movie
from weboob.tools.json import json from weboob.tools.json import json
from .pages import MoviePage, PersonPage, MovieCrewPage from .pages import MoviePage, PersonPage, MovieCrewPage
from datetime import datetime
__all__ = ['ImdbBrowser'] __all__ = ['ImdbBrowser']
@ -39,35 +42,81 @@ class ImdbBrowser(BaseBrowser):
} }
def iter_movies(self, pattern): def iter_movies(self, pattern):
# the api leads to a json result or the html movie page if there is only one result res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q=%s' % pattern.encode('utf-8'))
self.location('http://www.imdb.com/xml/find?json=1&tt=on&q=%s' % pattern.encode('utf-8')) jres = json.loads(res)
if self.is_on_page(MoviePage): for cat in ['title_exact','title_popular','title_approx']:
id = 'tt'+self.geturl().split('/tt')[1].split('/')[0] if jres.has_key(cat):
yield self.page.get_movie(id) for m in jres[cat]:
else:
res = self.readurl('http://www.imdb.com/xml/find?json=1&tt=on&q=%s' % pattern.encode('utf-8'))
jres = json.loads(res)
for restype,mlist in jres.items():
for m in mlist:
yield self.get_movie(m['id']) yield self.get_movie(m['id'])
def iter_persons(self, pattern): def iter_persons(self, pattern):
# the api leads to a json result or the html person page if there is only one result res = self.readurl('http://www.imdb.com/xml/find?json=1&nr=1&nm=on&q=%s' % pattern.encode('utf-8'))
self.location('http://www.imdb.com/xml/find?json=1&nm=on&q=%s' % pattern.encode('utf-8')) jres = json.loads(res)
if self.is_on_page(PersonPage): for cat in ['name_exact','name_popular','name_approx']:
id = 'nm'+self.geturl().split('/nm')[1].split('/')[0] if jres.has_key(cat):
yield self.page.get_person(id) for p in jres[cat]:
else:
res = self.readurl('http://www.imdb.com/xml/find?json=1&nm=on&q=%s' % pattern.encode('utf-8'))
jres = json.loads(res)
for restype,plist in jres.items():
for p in plist:
yield self.get_person(p['id']) yield self.get_person(p['id'])
def get_movie(self, id): def get_movie(self, id):
self.location('http://www.imdb.com/title/%s' % id) res = self.readurl('http://imdbapi.org/?id=%s&type=json&plot=simple&episode=1&lang=en-US&aka=full&release=simple&business=0&tech=0' % id )
assert self.is_on_page(MoviePage) jres = json.loads(res)
return self.page.get_movie(id)
title = NotAvailable
duration = NotAvailable
release_date = NotAvailable
description = NotAvailable
country = NotAvailable
note = NotAvailable
other_titles = []
roles = {}
title = jres['title']
if jres.has_key('runtime'):
duration = int(jres['runtime'][0].split()[0])
if jres.has_key('also_known_as'):
for other_t in jres['also_known_as']:
if other_t.has_key('country') and other_t.has_key('title'):
other_titles.append('%s : %s' % (other_t['country'],other_t['title']))
if jres.has_key('release_date'):
dstr = str(jres['release_date'])
year = int(dstr[:4])
if year == 0:
year = 1
month = int(dstr[4:5])
if month == 0:
month = 1
day = int(dstr[-2:])
if day == 0:
day = 1
release_date = datetime(year,month,day)
if jres.has_key('country'):
country = ''
for c in jres['country']:
country += '%s, '%c
country = country[:-2]
if jres.has_key('plot_simple'):
description = jres['plot_simple']
if jres.has_key('rating') and jres.has_key('rating_count'):
note = "%s/10 (%s votes)"%(jres['rating'],jres['rating_count'])
for r in ['actor','director','writer']:
if jres.has_key('%ss'%r):
roles['%s'%r] = list(jres['%ss'%r])
movie = Movie(id,title.strip())
movie.other_titles = other_titles
movie.release_date = release_date
movie.duration = duration
movie.description = description
movie.country = country
movie.note = note
movie.roles = roles
return movie
#self.location('http://www.imdb.com/title/%s' % id)
#assert self.is_on_page(MoviePage)
#return self.page.get_movie(id)
def get_person(self, id): def get_person(self, id):
self.location('http://www.imdb.com/name/%s' % id) self.location('http://www.imdb.com/name/%s' % id)

View file

@ -108,6 +108,11 @@ class PersonPage(BasePage):
times = self.parser.select(td_overview,'time[itemprop=birthDate]') times = self.parser.select(td_overview,'time[itemprop=birthDate]')
if len(times) > 0: if len(times) > 0:
time = times[0].attrib.get('datetime','').split('-') time = times[0].attrib.get('datetime','').split('-')
if len(time) == 2:
time.append('1')
elif len(time) == 1:
time.append('1')
time.append('1')
birth_date = datetime(int(time[0]),int(time[1]),int(time[2])) birth_date = datetime(int(time[0]),int(time[1]),int(time[2]))
person = Person(id,name) person = Person(id,name)

View file

@ -32,25 +32,25 @@ __all__ = ['Cineoob']
class MovieInfoFormatter(IFormatter): class MovieInfoFormatter(IFormatter):
MANDATORY_FIELDS = ('id', 'original_title', 'release_date', 'other_titles', 'duration', 'description', 'note', 'awards','roles') MANDATORY_FIELDS = ('id', 'original_title', 'release_date', 'other_titles', 'duration', 'description', 'note', 'roles', 'country')
def format_obj(self, obj, alias): def format_obj(self, obj, alias):
result = u'%s%s%s\n' % (self.BOLD, obj.original_title, self.NC) result = u'%s%s%s\n' % (self.BOLD, obj.original_title, self.NC)
result += 'ID: %s\n' % obj.fullid result += 'ID: %s\n' % obj.fullid
result += 'Other titles: %s\n' % obj.other_titles
result += 'Released: %s\n' % obj.release_date result += 'Released: %s\n' % obj.release_date
result += 'Country: %s\n' % obj.country
result += 'Duration: %s\n' % obj.duration result += 'Duration: %s\n' % obj.duration
result += 'Note: %s\n' % obj.note result += 'Note: %s\n' % obj.note
if obj.roles: if obj.roles:
result += '\n%sRelated persons%s\n' % (self.BOLD, self.NC) result += '\n%sRelated persons%s\n' % (self.BOLD, self.NC)
for role,lpersons in obj.roles.items(): for role,lpersons in obj.roles.items():
result += ' -- %s\n' % role result += ' -- %s\n' % role
for person in lpersons: for name in lpersons:
result += ' * %s\n' % person.name result += ' * %s\n' % name
if obj.awards: if obj.other_titles:
result += '\n%sAwards%s\n' % (self.BOLD, self.NC) result += '\n%sOther titles%s\n' % (self.BOLD, self.NC)
for a in obj.awards: for t in obj.other_titles:
result += ' * %s\n' % a result += ' * %s\n' % t
result += '\n%sDescription%s\n' % (self.BOLD, self.NC) result += '\n%sDescription%s\n' % (self.BOLD, self.NC)
result += '%s'%obj.description result += '%s'%obj.description
return result return result

View file

@ -29,12 +29,12 @@ class Movie(CapBaseObject):
Movie object. Movie object.
""" """
original_title = StringField('Original title of the movie') original_title = StringField('Original title of the movie')
other_titles = StringField('Titles in other languages') other_titles = Field('Titles in other countries',list)
release_date = DateField('Release date of the movie') release_date = DateField('Release date of the movie')
duration = IntField('Duration of the movie in minutes') duration = IntField('Duration of the movie in minutes')
description = StringField('Short description of the movie') description = StringField('Short description of the movie')
country = StringField('Origin country of the movie')
note = StringField('Notation of the movie') note = StringField('Notation of the movie')
awards = Field('Awards won by the movie',list)
roles = Field('Lists of Persons related to the movie indexed by roles',dict) roles = Field('Lists of Persons related to the movie indexed by roles',dict)
def __init__(self, id, original_title): def __init__(self, id, original_title):