move select() in parser

This commit is contained in:
Romain Bignon 2011-04-08 19:39:57 +02:00
commit 9afb301ebe
30 changed files with 197 additions and 197 deletions

View file

@ -22,7 +22,7 @@ import datetime
import re
from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select, SelectElementException
from weboob.tools.browser import BrokenPageError
from ..video import YoujizzVideo
@ -32,25 +32,25 @@ __all__ = ['IndexPage']
class IndexPage(BasePage):
def iter_videos(self):
span_list = select(self.document.getroot(), 'span#miniatura')
span_list = self.parser.select(self.document.getroot(), 'span#miniatura')
for span in span_list:
a = select(span, 'a', 1)
a = self.parser.select(span, 'a', 1)
url = a.attrib['href']
_id = re.sub(r'/videos/(.+)\.html', r'\1', url)
thumbnail_url = span.find('.//img').attrib['src']
title_el = select(span, 'span#title1', 1)
title_el = self.parser.select(span, 'span#title1', 1)
title = title_el.text.strip()
time_span = select(span, 'span.thumbtime span', 1)
time_span = self.parser.select(span, 'span.thumbtime span', 1)
time_txt = time_span.text.strip().replace(';', ':')
if time_txt == 'N/A':
minutes, seconds = 0, 0
elif ':' in time_txt:
minutes, seconds = (int(v) for v in time_txt.split(':'))
else:
raise SelectElementException('Unable to parse the video duration: %s' % time_txt)
raise BrokenPageError('Unable to parse the video duration: %s' % time_txt)
yield YoujizzVideo(_id,

View file

@ -23,9 +23,8 @@ import lxml.html
import re
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BasePage
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.misc import to_unicode
from weboob.tools.parsers.lxmlparser import select, SelectElementException
from ..video import YoujizzVideo
@ -39,7 +38,7 @@ class VideoPage(BasePage):
_id = to_unicode(self.group_dict['id'])
if video is None:
video = YoujizzVideo(_id)
title_el = select(self.document.getroot(), 'title', 1)
title_el = self.parser.select(self.document.getroot(), 'title', 1)
video.title = to_unicode(title_el.text.strip())
# youjizz HTML is crap, we must parse it with regexps
@ -53,13 +52,13 @@ class VideoPage(BasePage):
minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
video.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
else:
raise SelectElementException('Unable to retrieve video duration')
raise BrokenPageError('Unable to retrieve video duration')
video_file_urls = re.findall(r'"(http://media[^ ,]+\.flv)"', data)
if len(video_file_urls) == 0:
raise SelectElementException('Video URL not found')
raise BrokenPageError('Video URL not found')
elif len(video_file_urls) > 1:
raise SelectElementException('Many video file URL found')
raise BrokenPageError('Many video file URL found')
else:
video.url = video_file_urls[0]