move select() in parser

2011-04-08 19:39:57 +02:00 · 2011-04-08 19:39:57 +02:00 · 9afb301ebe
commit 9afb301ebe
parent cf2dca7520
30 changed files with 197 additions and 197 deletions
--- a/weboob/backends/youjizz/pages/index.py
+++ b/weboob/backends/youjizz/pages/index.py
@ -22,7 +22,7 @@ import datetime
 import re

 from weboob.tools.browser import BasePage
-from weboob.tools.parsers.lxmlparser import select, SelectElementException
+from weboob.tools.browser import BrokenPageError

 from ..video import YoujizzVideo

@ -32,25 +32,25 @@ __all__ = ['IndexPage']

 class IndexPage(BasePage):
    def iter_videos(self):
-        span_list = select(self.document.getroot(), 'span#miniatura')
+        span_list = self.parser.select(self.document.getroot(), 'span#miniatura')
        for span in span_list:
-            a = select(span, 'a', 1)
+            a = self.parser.select(span, 'a', 1)
            url = a.attrib['href']
            _id = re.sub(r'/videos/(.+)\.html', r'\1', url)

            thumbnail_url = span.find('.//img').attrib['src']

-            title_el = select(span, 'span#title1', 1)
+            title_el = self.parser.select(span, 'span#title1', 1)
            title = title_el.text.strip()

-            time_span = select(span, 'span.thumbtime span', 1)
+            time_span = self.parser.select(span, 'span.thumbtime span', 1)
            time_txt = time_span.text.strip().replace(';', ':')
            if time_txt == 'N/A':
                minutes, seconds = 0, 0
            elif ':' in time_txt:
                minutes, seconds = (int(v) for v in time_txt.split(':'))
            else:
-                raise SelectElementException('Unable to parse the video duration: %s' % time_txt)
+                raise BrokenPageError('Unable to parse the video duration: %s' % time_txt)


            yield YoujizzVideo(_id,
--- a/weboob/backends/youjizz/pages/video.py
+++ b/weboob/backends/youjizz/pages/video.py
@ -23,9 +23,8 @@ import lxml.html
 import re

 from weboob.capabilities.base import NotAvailable
-from weboob.tools.browser import BasePage
+from weboob.tools.browser import BasePage, BrokenPageError
 from weboob.tools.misc import to_unicode
-from weboob.tools.parsers.lxmlparser import select, SelectElementException

 from ..video import YoujizzVideo

@ -39,7 +38,7 @@ class VideoPage(BasePage):
        _id = to_unicode(self.group_dict['id'])
        if video is None:
            video = YoujizzVideo(_id)
-        title_el = select(self.document.getroot(), 'title', 1)
+        title_el = self.parser.select(self.document.getroot(), 'title', 1)
        video.title = to_unicode(title_el.text.strip())

        # youjizz HTML is crap, we must parse it with regexps
@ -53,13 +52,13 @@ class VideoPage(BasePage):
                minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
                video.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
        else:
-            raise SelectElementException('Unable to retrieve video duration')
+            raise BrokenPageError('Unable to retrieve video duration')

        video_file_urls = re.findall(r'"(http://media[^ ,]+\.flv)"', data)
        if len(video_file_urls) == 0:
-            raise SelectElementException('Video URL not found')
+            raise BrokenPageError('Video URL not found')
        elif len(video_file_urls) > 1:
-            raise SelectElementException('Many video file URL found')
+            raise BrokenPageError('Many video file URL found')
        else:
            video.url = video_file_urls[0]