From fbdf44e27af95534b657b56d7138b2d765fee246 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Fri, 8 Jun 2012 13:56:13 +0200 Subject: [PATCH] search works again --- modules/nolifetv/browser.py | 13 ++++++++++--- modules/nolifetv/pages/index.py | 22 +++++++++------------- modules/nolifetv/pages/video.py | 2 ++ modules/nolifetv/video.py | 4 ++-- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/modules/nolifetv/browser.py b/modules/nolifetv/browser.py index 827b8f91..bdb1d31b 100644 --- a/modules/nolifetv/browser.py +++ b/modules/nolifetv/browser.py @@ -33,10 +33,11 @@ __all__ = ['NolifeTVBrowser'] class NolifeTVBrowser(BaseBrowser): DOMAIN = 'online.nolife-tv.com' - ENCODING = None + ENCODING = 'utf-8' PAGES = {r'http://online.nolife-tv.com/index.php\??': IndexPage, r'http://online.nolife-tv.com/': IndexPage, - r'http://online.nolife-tv.com/index.php\?id=(?P.+)': VideoPage} + r'http://online.nolife-tv.com/do.php': IndexPage, + r'http://online.nolife-tv.com/emission-(?P.+)/?.*': VideoPage} def is_logged(self): if self.password is None: @@ -70,7 +71,13 @@ class NolifeTVBrowser(BaseBrowser): return self.page.get_video(video) def search_videos(self, pattern): - self.location('/index.php?', 'search=%s' % urllib.quote_plus(pattern.encode('utf-8'))) + data = {'a': 'search', + 'search': pattern.encode('utf-8'), + 'vu': 'all', + } + self.openurl('/do.php', urllib.urlencode(data)) + self.location('/do.php', 'a=em') + assert self.is_on_page(IndexPage) return self.page.iter_videos() diff --git a/modules/nolifetv/pages/index.py b/modules/nolifetv/pages/index.py index 7fc58400..3c41c013 100644 --- a/modules/nolifetv/pages/index.py +++ b/modules/nolifetv/pages/index.py @@ -33,26 +33,22 @@ __all__ = ['IndexPage'] class IndexPage(BasePage): def iter_videos(self): - div_list = self.parser.select(self.document.getroot(), 'div.ligne_video') - for div in div_list: - m = re.match('index.php\?id=(\d+)', div.find('a').attrib['href']) + for div in self.parser.select(self.document.getroot(), 'div.data_emissions ul li'): + m = re.match('id-(\d+)', div.attrib.get('class', '')) if not m: continue + + img = self.parser.select(div, 'a img', 1) + video = NolifeTVVideo(m.group(1)) - video.title = self.parser.select(div, 'span.span_title', 1).text - video.description = self.parser.select(div, 'span.span_description', 1).text - video.thumbnail = Thumbnail(self.parser.select(div, 'div.screen_video', 1).find('img').attrib['src']) + video.title = unicode(img.attrib['alt']) + video.description = unicode(self.parser.select(div, 'div.tooltip div.border-bottom p')[-1].text) + video.thumbnail = Thumbnail(unicode(img.attrib['src'])) try: video.date = parse_dt(self.parser.select(div, 'div.infos_video span.span_title', 1).text.strip()) except Exception: video.date = NotAvailable - rating_url = self.parser.select(div, 'span.description img')[0].attrib['src'] - m = re.match('.*view_level(\d+)\.gif', rating_url) - if m: - video.rating = int(m.group(1)) - video.rating_max = 21 - else: - video.rating = video.rating_max = NotAvailable + video.set_empty_fields(NotAvailable, ('url',)) yield video diff --git a/modules/nolifetv/pages/video.py b/modules/nolifetv/pages/video.py index 2d71c5c8..33d70a4c 100644 --- a/modules/nolifetv/pages/video.py +++ b/modules/nolifetv/pages/video.py @@ -83,4 +83,6 @@ class VideoPage(BasePage): raise ForbiddenVideo(values.get('message', 'Not available').decode('iso-8859-15')) video.url = values['url'] + video.set_empty_fields(NotAvailable) + return video diff --git a/modules/nolifetv/video.py b/modules/nolifetv/video.py index d8c276d1..088570fd 100644 --- a/modules/nolifetv/video.py +++ b/modules/nolifetv/video.py @@ -27,8 +27,8 @@ __all__ = ['NolifeTVVideo'] class NolifeTVVideo(BaseVideo): def __init__(self, *args, **kwargs): BaseVideo.__init__(self, *args, **kwargs) - self.ext = 'mp4' + self.ext = u'mp4' @classmethod def id2url(cls, _id): - return 'http://online.nolife-tv.com/index.php?id=%s' % _id + return 'http://online.nolife-tv.com/emission-%s/' % _id