Bugs fix and improvement of the coverage.

All fixes done are basically CSS selection corrections due to few changing in the HTML structure of pages.
This commit is contained in:
blckshrk 2013-11-03 11:35:19 +01:00 committed by Florent
commit ad3de2eb3c
2 changed files with 36 additions and 13 deletions

View file

@ -23,6 +23,7 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from datetime import datetime from datetime import datetime
import re
__all__ = ['PersonPage', 'MovieCrewPage', 'BiographyPage', 'FilmographyPage', 'ReleasePage'] __all__ = ['PersonPage', 'MovieCrewPage', 'BiographyPage', 'FilmographyPage', 'ReleasePage']
@ -33,11 +34,13 @@ class ReleasePage(BasePage):
''' '''
def get_movie_releases(self, country_filter): def get_movie_releases(self, country_filter):
result = unicode() result = unicode()
links = self.parser.select(self.document.getroot(), 'b a') links = self.parser.select(self.document.getroot(), 'table#release_dates a')
for a in links: for a in links:
href = a.attrib.get('href', '') href = a.attrib.get('href', '')
# XXX: search() could raise an exception
if href.strip('/').split('/')[0] == 'calendar' and\ if href.strip('/').split('/')[0] == 'calendar' and\
(country_filter is None or href.split('region=')[-1].lower() == country_filter): (country_filter is None or re.search('region=([a-zA-Z]+)&', href).group(1).lower() == country_filter):
country = a.text country = a.text
td_date = self.parser.select(a.getparent().getparent().getparent(), 'td')[1] td_date = self.parser.select(a.getparent().getparent().getparent(), 'td')[1]
date_links = self.parser.select(td_date, 'a') date_links = self.parser.select(td_date, 'a')
@ -74,14 +77,15 @@ class MovieCrewPage(BasePage):
''' '''
def iter_persons(self, role_filter=None): def iter_persons(self, role_filter=None):
if (role_filter is None or (role_filter is not None and role_filter == 'actor')): if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
tables = self.parser.select(self.document.getroot(), 'table.cast') tables = self.parser.select(self.document.getroot(), 'table.cast_list')
if len(tables) > 0: if len(tables) > 0:
table = tables[0] table = tables[0]
tds = self.parser.select(table, 'td.nm') tds = self.parser.select(table, 'td.itemprop')
for td in tds: for td in tds:
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1] id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
name = unicode(td.find('a').text) name = unicode(td.find('a').text)
char_name = unicode(self.parser.select(td.getparent(), 'td.char', 1).text_content()) char_name = unicode(self.parser.select(td.getparent(), 'td.character', 1).text_content())
person = Person(id, name) person = Person(id, name)
person.short_description = char_name person.short_description = char_name
person.real_name = NotLoaded person.real_name = NotLoaded
@ -95,7 +99,7 @@ class MovieCrewPage(BasePage):
person.thumbnail_url = NotLoaded person.thumbnail_url = NotLoaded
yield person yield person
for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing=1] h5 a'): for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing="1"] h5 a'):
role = gloss_link.attrib.get('name', '').rstrip('s') role = gloss_link.attrib.get('name', '').rstrip('s')
if (role_filter is None or (role_filter is not None and role == role_filter)): if (role_filter is None or (role_filter is not None and role == role_filter)):
tbody = gloss_link.getparent().getparent().getparent().getparent() tbody = gloss_link.getparent().getparent().getparent().getparent()
@ -114,12 +118,12 @@ class MovieCrewPage(BasePage):
# yield self.browser.get_person(id) # yield self.browser.get_person(id)
def iter_persons_ids(self): def iter_persons_ids(self):
tables = self.parser.select(self.document.getroot(), 'table.cast') tables = self.parser.select(self.document.getroot(), 'table.cast_list')
if len(tables) > 0: if len(tables) > 0:
table = tables[0] table = tables[0]
tds = self.parser.select(table, 'td.nm') tds = self.parser.select(table, 'td.itemprop')
for td in tds: for td in tds:
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1] id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
yield id yield id
@ -152,7 +156,7 @@ class PersonPage(BasePage):
real_name = unicode(a.text.strip()) real_name = unicode(a.text.strip())
elif 'birth_place' in href: elif 'birth_place' in href:
birth_place = unicode(a.text.lower().strip()) birth_place = unicode(a.text.lower().strip())
names = self.parser.select(td_overview, 'h1[itemprop=name]') names = self.parser.select(td_overview, 'h1 span[itemprop=name]')
if len(names) > 0: if len(names) > 0:
name = unicode(names[0].text.strip()) name = unicode(names[0].text.strip())
times = self.parser.select(td_overview, 'time[itemprop=birthDate]') times = self.parser.select(td_overview, 'time[itemprop=birthDate]')

View file

@ -19,39 +19,45 @@
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
class ImdbTest(BackendTest): class ImdbTest(BackendTest):
BACKEND = 'imdb' BACKEND = 'imdb'
def test_search_movie(self): def test_search_movie(self):
movies = list(self.backend.iter_movies('spiderman')) movies = list(self.backend.iter_movies('spiderman'))
assert len(movies) > 0
for movie in movies: for movie in movies:
assert movie.id assert movie.id
def test_get_movie(self): def test_get_movie(self):
movie = self.backend.get_movie('tt0079980') movie = self.backend.get_movie('tt0079980')
assert movie
assert movie.id assert movie.id
assert movie.original_title assert movie.original_title
def test_search_person(self): def test_search_person(self):
persons = list(self.backend.iter_persons('dewaere')) persons = list(self.backend.iter_persons('dewaere'))
assert len(persons) > 0
for person in persons: for person in persons:
assert person.id assert person.id
def test_get_person(self): def test_get_person(self):
person = self.backend.get_person('nm0223033') person = self.backend.get_person('nm0223033')
assert person
assert person.id assert person.id
assert person.name assert person.name
assert person.birth_date assert person.birth_date
def test_movie_persons(self): def test_movie_persons(self):
persons = list(self.backend.iter_movie_persons('tt0079980')) persons = list(self.backend.iter_movie_persons('tt0079980'))
assert len(persons) > 0
for person in persons: for person in persons:
assert person.id assert person.id
assert person.name assert person.name
assert person.short_description
def test_person_movies(self): def test_person_movies(self):
movies = list(self.backend.iter_person_movies('nm0223033')) movies = list(self.backend.iter_person_movies('nm0223033'))
assert len(movies) > 0
for movie in movies: for movie in movies:
assert movie.id assert movie.id
assert movie.original_title assert movie.original_title
@ -62,6 +68,19 @@ class ImdbTest(BackendTest):
assert bio is not None assert bio is not None
def test_get_movie_releases(self): def test_get_movie_releases(self):
rel = self.backend.get_movie_releases('tt0079980') rel = self.backend.get_movie_releases('tt0079980', 'fr')
assert rel != '' assert rel != ''
assert rel is not None assert rel is not None
assert rel == 'France : 25 April 1979'
def test_iter_person_movies_ids(self):
movies_ids = list(self.backend.iter_person_movies_ids('nm0223033'))
assert len(movies_ids) > 0
for movie_id in movies_ids:
assert movie_id
def test_iter_movie_persons_ids(self):
persons_ids = list(self.backend.iter_movie_persons_ids('tt0079980'))
assert len(persons_ids) > 0
for person_id in persons_ids:
assert person_id