upgrade to browser2

This commit is contained in:
Romain Bignon 2014-03-09 15:44:28 +01:00
commit af9197fba7
4 changed files with 89 additions and 83 deletions

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
# Copyright(C) 2010-2014 Roger Philibert
#
# This file is part of weboob.
#
@ -19,11 +19,12 @@
import datetime
import lxml.html
import re
from weboob.tools.browser2 import HTMLPage
from weboob.tools.browser2.page import method, ItemElement
from weboob.tools.browser2.filters import CleanText, Env
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.misc import to_unicode
from ..video import YoujizzVideo
@ -32,36 +33,36 @@ from ..video import YoujizzVideo
__all__ = ['VideoPage']
class VideoPage(BasePage):
def get_video(self, video=None):
_id = to_unicode(self.group_dict['id'])
if video is None:
video = YoujizzVideo(_id)
title_el = self.parser.select(self.document.getroot(), 'title', 1)
video.title = to_unicode(title_el.text.strip())
class VideoPage(HTMLPage):
@method
class get_video(ItemElement):
klass = YoujizzVideo
# youjizz HTML is crap, we must parse it with regexps
data = lxml.html.tostring(self.document.getroot())
m = re.search(r'<strong>.*?Runtime.*?</strong> (.+?)</div>', data)
if m:
txt = m.group(1).strip()
if txt == 'Unknown':
video.duration = NotAvailable
obj_id = Env('id')
obj_title = CleanText('//title')
def obj_duration(self):
# youjizz HTML is crap, we must parse it with regexps
m = re.search(r'<strong>.*?Runtime.*?</strong> (.+?)</div>', self.page.response.text)
if m:
txt = m.group(1).strip()
if txt == 'Unknown':
return NotAvailable
else:
minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
return datetime.timedelta(minutes=minutes, seconds=seconds)
else:
minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
video.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
else:
raise BrokenPageError('Unable to retrieve video duration')
raise ValueError('Unable to retrieve video duration')
real_id = int(_id.split('-')[-1])
data = self.browser.readurl('http://www.youjizz.com/videos/embed/%s' % real_id)
def obj_url(self):
real_id = int(self.env['id'].split('-')[-1])
response = self.page.browser.open('http://www.youjizz.com/videos/embed/%s' % real_id)
data = response.text
video_file_urls = re.findall(r'"(http://[^",]+\.youjizz\.com[^",]+\.flv(?:\?[^"]*)?)"', data)
if len(video_file_urls) == 0:
raise BrokenPageError('Video URL not found')
elif len(video_file_urls) > 1:
raise BrokenPageError('Many video file URL found')
else:
video.url = to_unicode(video_file_urls[0])
return video
video_file_urls = re.findall(r'"(http://[^",]+\.youjizz\.com[^",]+\.flv(?:\?[^"]*)?)"', data)
if len(video_file_urls) == 0:
raise ValueError('Video URL not found')
elif len(video_file_urls) > 1:
raise ValueError('Many video file URL found')
else:
return to_unicode(video_file_urls[0])