upgrade to browser2

This commit is contained in:
Romain Bignon 2014-03-09 15:44:28 +01:00
commit af9197fba7
4 changed files with 89 additions and 83 deletions

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2012 Roger Philibert
# Copyright(C) 2010-2014 Roger Philibert
#
# This file is part of weboob.
#
@ -21,9 +21,10 @@
import datetime
import re
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.browser2 import HTMLPage
from weboob.tools.browser2.page import ListElement, method, ItemElement
from weboob.tools.browser2.filters import Filter, Link, CleanText
from weboob.capabilities.image import BaseImage
from weboob.tools.misc import to_unicode
from ..video import YoujizzVideo
@ -31,35 +32,42 @@ from ..video import YoujizzVideo
__all__ = ['IndexPage']
class IndexPage(BasePage):
def iter_videos(self):
span_list = self.parser.select(self.document.getroot(), 'span#miniatura')
for span in span_list:
a = self.parser.select(span, 'a', 1)
url = a.attrib['href']
_id = re.sub(r'/videos/(.+)\.html', r'\1', url)
class IndexPage(HTMLPage):
@method
class iter_videos(ListElement):
item_xpath = '//span[@id="miniatura"]'
video = YoujizzVideo(_id)
next_page = Link(u'//a[text()="Next »"]')
video.thumbnail = BaseImage(span.find('.//img').attrib['data-original'])
video.thumbnail.url = video.thumbnail.id
class item(ItemElement):
klass = YoujizzVideo
title_el = self.parser.select(span, 'span#title1', 1)
video.title = to_unicode(title_el.text.strip())
class Id(Filter):
def filter(self, link):
return re.sub(r'/videos/(.+)\.html', r'\1', link)
time_span = self.parser.select(span, 'span.thumbtime span', 1)
time_txt = time_span.text.strip().replace(';', ':')
hours, minutes, seconds = 0, 0, 0
if ':' in time_txt:
t = time_txt.split(':')
t.reverse()
seconds = int(t[0])
minutes = int(t[1])
if len(t) == 3:
hours = int(t[2])
elif time_txt != 'N/A':
raise BrokenPageError('Unable to parse the video duration: %s' % time_txt)
class Duration(Filter):
def filter(self, txt):
time_txt = txt.replace(';', ':')
hours, minutes, seconds = 0, 0, 0
if ':' in time_txt:
t = time_txt.split(':')
t.reverse()
seconds = int(t[0])
minutes = int(t[1])
if len(t) == 3:
hours = int(t[2])
elif time_txt != 'N/A':
raise ValueError('Unable to parse the video duration: %s' % time_txt)
video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
return datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
yield video
obj_id = Id(Link('.//a'))
obj_title = CleanText('.//span[@id="title1"]')
obj_duration = Duration(CleanText('.//span[@class="thumbtime"]//span'))
def obj_thumbnail(self):
thumbnail = BaseImage(self.xpath('.//img')[0].attrib['data-original'])
thumbnail.url = thumbnail.id
return thumbnail