Bugs fix and improvement of the coverage.
All fixes done are basically CSS selection corrections due to few changing in the HTML structure of pages.
This commit is contained in:
parent
f13f7bad6c
commit
ad3de2eb3c
2 changed files with 36 additions and 13 deletions
|
|
@ -23,6 +23,7 @@ from weboob.capabilities.base import NotAvailable, NotLoaded
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['PersonPage', 'MovieCrewPage', 'BiographyPage', 'FilmographyPage', 'ReleasePage']
|
__all__ = ['PersonPage', 'MovieCrewPage', 'BiographyPage', 'FilmographyPage', 'ReleasePage']
|
||||||
|
|
@ -33,11 +34,13 @@ class ReleasePage(BasePage):
|
||||||
'''
|
'''
|
||||||
def get_movie_releases(self, country_filter):
|
def get_movie_releases(self, country_filter):
|
||||||
result = unicode()
|
result = unicode()
|
||||||
links = self.parser.select(self.document.getroot(), 'b a')
|
links = self.parser.select(self.document.getroot(), 'table#release_dates a')
|
||||||
for a in links:
|
for a in links:
|
||||||
href = a.attrib.get('href', '')
|
href = a.attrib.get('href', '')
|
||||||
|
|
||||||
|
# XXX: search() could raise an exception
|
||||||
if href.strip('/').split('/')[0] == 'calendar' and\
|
if href.strip('/').split('/')[0] == 'calendar' and\
|
||||||
(country_filter is None or href.split('region=')[-1].lower() == country_filter):
|
(country_filter is None or re.search('region=([a-zA-Z]+)&', href).group(1).lower() == country_filter):
|
||||||
country = a.text
|
country = a.text
|
||||||
td_date = self.parser.select(a.getparent().getparent().getparent(), 'td')[1]
|
td_date = self.parser.select(a.getparent().getparent().getparent(), 'td')[1]
|
||||||
date_links = self.parser.select(td_date, 'a')
|
date_links = self.parser.select(td_date, 'a')
|
||||||
|
|
@ -74,14 +77,15 @@ class MovieCrewPage(BasePage):
|
||||||
'''
|
'''
|
||||||
def iter_persons(self, role_filter=None):
|
def iter_persons(self, role_filter=None):
|
||||||
if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
|
if (role_filter is None or (role_filter is not None and role_filter == 'actor')):
|
||||||
tables = self.parser.select(self.document.getroot(), 'table.cast')
|
tables = self.parser.select(self.document.getroot(), 'table.cast_list')
|
||||||
if len(tables) > 0:
|
if len(tables) > 0:
|
||||||
table = tables[0]
|
table = tables[0]
|
||||||
tds = self.parser.select(table, 'td.nm')
|
tds = self.parser.select(table, 'td.itemprop')
|
||||||
|
|
||||||
for td in tds:
|
for td in tds:
|
||||||
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
|
id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
|
||||||
name = unicode(td.find('a').text)
|
name = unicode(td.find('a').text)
|
||||||
char_name = unicode(self.parser.select(td.getparent(), 'td.char', 1).text_content())
|
char_name = unicode(self.parser.select(td.getparent(), 'td.character', 1).text_content())
|
||||||
person = Person(id, name)
|
person = Person(id, name)
|
||||||
person.short_description = char_name
|
person.short_description = char_name
|
||||||
person.real_name = NotLoaded
|
person.real_name = NotLoaded
|
||||||
|
|
@ -95,7 +99,7 @@ class MovieCrewPage(BasePage):
|
||||||
person.thumbnail_url = NotLoaded
|
person.thumbnail_url = NotLoaded
|
||||||
yield person
|
yield person
|
||||||
|
|
||||||
for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing=1] h5 a'):
|
for gloss_link in self.parser.select(self.document.getroot(), 'table[cellspacing="1"] h5 a'):
|
||||||
role = gloss_link.attrib.get('name', '').rstrip('s')
|
role = gloss_link.attrib.get('name', '').rstrip('s')
|
||||||
if (role_filter is None or (role_filter is not None and role == role_filter)):
|
if (role_filter is None or (role_filter is not None and role == role_filter)):
|
||||||
tbody = gloss_link.getparent().getparent().getparent().getparent()
|
tbody = gloss_link.getparent().getparent().getparent().getparent()
|
||||||
|
|
@ -114,12 +118,12 @@ class MovieCrewPage(BasePage):
|
||||||
# yield self.browser.get_person(id)
|
# yield self.browser.get_person(id)
|
||||||
|
|
||||||
def iter_persons_ids(self):
|
def iter_persons_ids(self):
|
||||||
tables = self.parser.select(self.document.getroot(), 'table.cast')
|
tables = self.parser.select(self.document.getroot(), 'table.cast_list')
|
||||||
if len(tables) > 0:
|
if len(tables) > 0:
|
||||||
table = tables[0]
|
table = tables[0]
|
||||||
tds = self.parser.select(table, 'td.nm')
|
tds = self.parser.select(table, 'td.itemprop')
|
||||||
for td in tds:
|
for td in tds:
|
||||||
id = td.find('a').attrib.get('href', '').strip('/').split('/')[-1]
|
id = td.find('a').attrib.get('href', '').strip('/').split('/')[1]
|
||||||
yield id
|
yield id
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -152,7 +156,7 @@ class PersonPage(BasePage):
|
||||||
real_name = unicode(a.text.strip())
|
real_name = unicode(a.text.strip())
|
||||||
elif 'birth_place' in href:
|
elif 'birth_place' in href:
|
||||||
birth_place = unicode(a.text.lower().strip())
|
birth_place = unicode(a.text.lower().strip())
|
||||||
names = self.parser.select(td_overview, 'h1[itemprop=name]')
|
names = self.parser.select(td_overview, 'h1 span[itemprop=name]')
|
||||||
if len(names) > 0:
|
if len(names) > 0:
|
||||||
name = unicode(names[0].text.strip())
|
name = unicode(names[0].text.strip())
|
||||||
times = self.parser.select(td_overview, 'time[itemprop=birthDate]')
|
times = self.parser.select(td_overview, 'time[itemprop=birthDate]')
|
||||||
|
|
|
||||||
|
|
@ -19,39 +19,45 @@
|
||||||
|
|
||||||
from weboob.tools.test import BackendTest
|
from weboob.tools.test import BackendTest
|
||||||
|
|
||||||
|
|
||||||
class ImdbTest(BackendTest):
|
class ImdbTest(BackendTest):
|
||||||
BACKEND = 'imdb'
|
BACKEND = 'imdb'
|
||||||
|
|
||||||
def test_search_movie(self):
|
def test_search_movie(self):
|
||||||
movies = list(self.backend.iter_movies('spiderman'))
|
movies = list(self.backend.iter_movies('spiderman'))
|
||||||
|
assert len(movies) > 0
|
||||||
for movie in movies:
|
for movie in movies:
|
||||||
assert movie.id
|
assert movie.id
|
||||||
|
|
||||||
def test_get_movie(self):
|
def test_get_movie(self):
|
||||||
movie = self.backend.get_movie('tt0079980')
|
movie = self.backend.get_movie('tt0079980')
|
||||||
|
assert movie
|
||||||
assert movie.id
|
assert movie.id
|
||||||
assert movie.original_title
|
assert movie.original_title
|
||||||
|
|
||||||
def test_search_person(self):
|
def test_search_person(self):
|
||||||
persons = list(self.backend.iter_persons('dewaere'))
|
persons = list(self.backend.iter_persons('dewaere'))
|
||||||
|
assert len(persons) > 0
|
||||||
for person in persons:
|
for person in persons:
|
||||||
assert person.id
|
assert person.id
|
||||||
|
|
||||||
def test_get_person(self):
|
def test_get_person(self):
|
||||||
person = self.backend.get_person('nm0223033')
|
person = self.backend.get_person('nm0223033')
|
||||||
|
assert person
|
||||||
assert person.id
|
assert person.id
|
||||||
assert person.name
|
assert person.name
|
||||||
assert person.birth_date
|
assert person.birth_date
|
||||||
|
|
||||||
def test_movie_persons(self):
|
def test_movie_persons(self):
|
||||||
persons = list(self.backend.iter_movie_persons('tt0079980'))
|
persons = list(self.backend.iter_movie_persons('tt0079980'))
|
||||||
|
assert len(persons) > 0
|
||||||
for person in persons:
|
for person in persons:
|
||||||
assert person.id
|
assert person.id
|
||||||
assert person.name
|
assert person.name
|
||||||
|
assert person.short_description
|
||||||
|
|
||||||
def test_person_movies(self):
|
def test_person_movies(self):
|
||||||
movies = list(self.backend.iter_person_movies('nm0223033'))
|
movies = list(self.backend.iter_person_movies('nm0223033'))
|
||||||
|
assert len(movies) > 0
|
||||||
for movie in movies:
|
for movie in movies:
|
||||||
assert movie.id
|
assert movie.id
|
||||||
assert movie.original_title
|
assert movie.original_title
|
||||||
|
|
@ -62,6 +68,19 @@ class ImdbTest(BackendTest):
|
||||||
assert bio is not None
|
assert bio is not None
|
||||||
|
|
||||||
def test_get_movie_releases(self):
|
def test_get_movie_releases(self):
|
||||||
rel = self.backend.get_movie_releases('tt0079980')
|
rel = self.backend.get_movie_releases('tt0079980', 'fr')
|
||||||
assert rel != ''
|
assert rel != ''
|
||||||
assert rel is not None
|
assert rel is not None
|
||||||
|
assert rel == 'France : 25 April 1979'
|
||||||
|
|
||||||
|
def test_iter_person_movies_ids(self):
|
||||||
|
movies_ids = list(self.backend.iter_person_movies_ids('nm0223033'))
|
||||||
|
assert len(movies_ids) > 0
|
||||||
|
for movie_id in movies_ids:
|
||||||
|
assert movie_id
|
||||||
|
|
||||||
|
def test_iter_movie_persons_ids(self):
|
||||||
|
persons_ids = list(self.backend.iter_movie_persons_ids('tt0079980'))
|
||||||
|
assert len(persons_ids) > 0
|
||||||
|
for person_id in persons_ids:
|
||||||
|
assert person_id
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue