Bugs fix and improvement of the coverage.

All fixes done are basically CSS selection corrections due to few changing in the HTML structure of pages.
This commit is contained in:
blckshrk 2013-11-03 11:35:19 +01:00 committed by Florent
commit ad3de2eb3c
2 changed files with 36 additions and 13 deletions

View file

@ -23,6 +23,7 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
from weboob.tools.browser import BasePage
from datetime import datetime
import re
__all__ = ['PersonPage', 'MovieCrewPage', 'BiographyPage', 'FilmographyPage', 'ReleasePage']
@ -33,11 +34,13 @@ class ReleasePage(BasePage):
'''
def get_movie_releases(self, country_filter):
result = unicode()
links = self.parser.select(self.document.getroot(), 'b a')
links = self.parser.select(self.document.getroot(), 'table#release_dates a')
for a in links:
href = a.attrib.get('href', '')
# XXX: search() could raise an exception
if href.strip('/').split('/')[0] == 'calendar' and\
(country_filter is None or href.split('region=')[-1].lower() == country_filter):
(country_filter is None or re.search('region=([a-zA-Z]+)&', href).group(1).lower() == country_filter):
country = a.text
td_date = self.parser.select(a.getparent().getparent().getparent(), 'td')[1]
date_links = self.parser.select(td_date, 'a')
@ -74,14 +77,15 @@ class MovieCrewPage(BasePage):
'''
def iter_persons(self, role_filter=None):
if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
tables = self.parser.select(self.document.getroot(), 'table.cast')
tables = self.parser.select(self.document.getroot(), 'table.cast_list')
if len(tables) > 0:
table = tables[0]
tds = self.parser.select(table, 'td.nm')
tds = self.parser.select(table, 'td.itemprop')
for td in tds:
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
name = unicode(td.find('a').text)
char_name = unicode(self.parser.select(td.getparent(), 'td.char', 1).text_content())
char_name = unicode(self.parser.select(td.getparent(), 'td.character', 1).text_content())
person = Person(id, name)
person.short_description = char_name
person.real_name = NotLoaded
@ -95,7 +99,7 @@ class MovieCrewPage(BasePage):
person.thumbnail_url = NotLoaded
yield person
for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing=1] h5 a'):
for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing="1"] h5 a'):
role = gloss_link.attrib.get('name', '').rstrip('s')
if (role_filter is None or (role_filter is not None and role == role_filter)):
tbody = gloss_link.getparent().getparent().getparent().getparent()
@ -114,12 +118,12 @@ class MovieCrewPage(BasePage):
# yield self.browser.get_person(id)
def iter_persons_ids(self):
tables = self.parser.select(self.document.getroot(), 'table.cast')
tables = self.parser.select(self.document.getroot(), 'table.cast_list')
if len(tables) > 0:
table = tables[0]
tds = self.parser.select(table, 'td.nm')
tds = self.parser.select(table, 'td.itemprop')
for td in tds:
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
yield id
@ -152,7 +156,7 @@ class PersonPage(BasePage):
real_name = unicode(a.text.strip())
elif 'birth_place' in href:
birth_place = unicode(a.text.lower().strip())
names = self.parser.select(td_overview, 'h1[itemprop=name]')
names = self.parser.select(td_overview, 'h1 span[itemprop=name]')
if len(names) > 0:
name = unicode(names[0].text.strip())
times = self.parser.select(td_overview, 'time[itemprop=birthDate]')

View file

@ -19,39 +19,45 @@
from weboob.tools.test import BackendTest
class ImdbTest(BackendTest):
BACKEND = 'imdb'
def test_search_movie(self):
movies = list(self.backend.iter_movies('spiderman'))
assert len(movies) > 0
for movie in movies:
assert movie.id
def test_get_movie(self):
movie = self.backend.get_movie('tt0079980')
assert movie
assert movie.id
assert movie.original_title
def test_search_person(self):
persons = list(self.backend.iter_persons('dewaere'))
assert len(persons) > 0
for person in persons:
assert person.id
def test_get_person(self):
person = self.backend.get_person('nm0223033')
assert person
assert person.id
assert person.name
assert person.birth_date
def test_movie_persons(self):
persons = list(self.backend.iter_movie_persons('tt0079980'))
assert len(persons) > 0
for person in persons:
assert person.id
assert person.name
assert person.short_description
def test_person_movies(self):
movies = list(self.backend.iter_person_movies('nm0223033'))
assert len(movies) > 0
for movie in movies:
assert movie.id
assert movie.original_title
@ -62,6 +68,19 @@ class ImdbTest(BackendTest):
assert bio is not None
def test_get_movie_releases(self):
rel = self.backend.get_movie_releases('tt0079980')
rel = self.backend.get_movie_releases('tt0079980', 'fr')
assert rel != ''
assert rel is not None
assert rel == 'France : 25 April 1979'
def test_iter_person_movies_ids(self):
movies_ids = list(self.backend.iter_person_movies_ids('nm0223033'))
assert len(movies_ids) > 0
for movie_id in movies_ids:
assert movie_id
def test_iter_movie_persons_ids(self):
persons_ids = list(self.backend.iter_movie_persons_ids('tt0079980'))
assert len(persons_ids) > 0
for person_id in persons_ids:
assert person_id