From 0036ee21a7ef43727f1ac39eb1d0a74ac83023d1 Mon Sep 17 00:00:00 2001 From: Julien Veyssier Date: Mon, 4 Mar 2013 16:57:47 +0100 Subject: [PATCH] [cineoob] new command persons_in_common --- modules/imdb/backend.py | 3 ++ modules/imdb/browser.py | 5 ++++ modules/imdb/pages.py | 24 ++++++++++++++++ weboob/applications/cineoob/cineoob.py | 39 +++++++++++++++++++++++++- weboob/capabilities/cinema.py | 10 +++++++ 5 files changed, 80 insertions(+), 1 deletion(-) diff --git a/modules/imdb/backend.py b/modules/imdb/backend.py index b73056ef..d4b40b6d 100644 --- a/modules/imdb/backend.py +++ b/modules/imdb/backend.py @@ -59,3 +59,6 @@ class ImdbBackend(BaseBackend, ICapCinema): def iter_person_movies_ids(self, id): return self.browser.iter_person_movies_ids(id) + + def iter_movie_persons_ids(self, id): + return self.browser.iter_movie_persons_ids(id) diff --git a/modules/imdb/browser.py b/modules/imdb/browser.py index e20c54e7..efdce967 100644 --- a/modules/imdb/browser.py +++ b/modules/imdb/browser.py @@ -135,3 +135,8 @@ class ImdbBrowser(BaseBrowser): self.location('http://www.imdb.com/name/%s' % person_id) assert self.is_on_page(PersonPage) return self.page.iter_movies_ids(person_id) + + def iter_movie_persons_ids(self, movie_id): + self.location('http://www.imdb.com/title/%s' % movie_id) + assert self.is_on_page(MoviePage) + return self.page.iter_persons_ids(movie_id) diff --git a/modules/imdb/pages.py b/modules/imdb/pages.py index 44da0d45..e6d5ebbe 100644 --- a/modules/imdb/pages.py +++ b/modules/imdb/pages.py @@ -37,6 +37,12 @@ class MoviePage(BasePage): for p in self.browser.page.iter_persons(): yield p + def iter_persons_ids(self,id): + self.browser.location('http://www.imdb.com/title/%s/fullcredits'%id) + assert self.browser.is_on_page(MovieCrewPage) + for p in self.browser.page.iter_persons_ids(): + yield p + class MovieCrewPage(BasePage): ''' Page listing all the persons related to a movie @@ -50,6 +56,15 @@ class MovieCrewPage(BasePage): id = td.find('a').attrib.get('href','').strip('/').split('/')[-1] yield self.browser.get_person(id) + def iter_persons_ids(self): + tables = self.parser.select(self.document.getroot(),'table.cast') + if len(tables) > 0: + table = tables[0] + tds = self.parser.select(table,'td.nm') + for td in tds: + id = td.find('a').attrib.get('href','').strip('/').split('/')[-1] + yield id + class PersonPage(BasePage): ''' Page giving informations about a person @@ -67,6 +82,15 @@ class PersonPage(BasePage): descs = self.parser.select(td_overview,'span[itemprop=description]') if len(descs) > 0: biography = descs[0].text + rname_block = self.parser.select(td_overview,'div.txt-block h4.inline') + if len(rname_block) > 0 and "born" in rname_block[0].text.lower(): + links = self.parser.select(rname_block[0].getparent(),'a') + for a in links: + href = a.attrib.get('href','').strip() + if href == 'bio': + real_name = a.text.strip() + elif 'birth_place' in href: + birth_place = a.text.lower().strip() names = self.parser.select(td_overview,'h1[itemprop=name]') if len(names) > 0: name = names[0].text.strip() diff --git a/weboob/applications/cineoob/cineoob.py b/weboob/applications/cineoob/cineoob.py index d0f18a62..4190290e 100644 --- a/weboob/applications/cineoob/cineoob.py +++ b/weboob/applications/cineoob/cineoob.py @@ -159,7 +159,8 @@ class Cineoob(ReplApplication): 'info_person': 'person_info', 'casting': 'person_list', 'filmography': 'movie_list', - 'movies_in_common':'movie_list' + 'movies_in_common':'movie_list', + 'persons_in_common':'person_list' } def complete_info(self, text, line, *ignored): @@ -202,6 +203,42 @@ class Cineoob(ReplApplication): self.cached_format(movie) self.flush() + def do_persons_in_common(self, line): + """ + persons_in_common movie_ID movie_ID + + Get the list of common persons between two movies. + """ + id1, id2 = self.parse_command_args(line, 2, 1) + self.flush() + + movie1 = self.get_object(id1, 'get_movie') + if not movie1: + print >>sys.stderr, 'Movie not found: %s' % id1 + return 3 + movie2 = self.get_object(id2, 'get_movie') + if not movie2: + print >>sys.stderr, 'Movie not found: %s' % id2 + return 3 + + initial_count = self.options.count + self.options.count = None + + lid1 = [] + for backend, id in self.do('iter_movie_persons_ids', movie1.id): + lid1.append(id) + self.flush() + lid2 = [] + for backend, id in self.do('iter_movie_persons_ids', movie2.id): + lid2.append(id) + self.flush() + self.options.count = initial_count + inter = list(set(lid1) & set(lid2)) + for common in inter: + person = self.get_object(common, 'get_person') + self.cached_format(person) + self.flush() + def do_info_movie(self, id): """ info_movie movie_ID diff --git a/weboob/capabilities/cinema.py b/weboob/capabilities/cinema.py index 65d57fc4..c68f6417 100644 --- a/weboob/capabilities/cinema.py +++ b/weboob/capabilities/cinema.py @@ -143,3 +143,13 @@ class ICapCinema(IBaseCap): :rtype: iter[str] """ raise NotImplementedError() + + def iter_movie_persons_ids(self, _id): + """ + Get the list of person ids related to a movie. + + :param _id: ID of movie + :type _id: str + :rtype: iter[str] + """ + raise NotImplementedError()