upgrade to browser2

2014-03-09 15:44:28 +01:00 · 2014-03-09 15:44:28 +01:00 · af9197fba7
commit af9197fba7
parent 1b2d3cfe48
4 changed files with 89 additions and 83 deletions
--- a/modules/youjizz/pages/video.py
+++ b/modules/youjizz/pages/video.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2010-2011 Roger Philibert
+# Copyright(C) 2010-2014 Roger Philibert
 #
 # This file is part of weboob.
 #
@ -19,11 +19,12 @@


 import datetime
-import lxml.html
 import re

+from weboob.tools.browser2 import HTMLPage
+from weboob.tools.browser2.page import method, ItemElement
+from weboob.tools.browser2.filters import CleanText, Env
 from weboob.capabilities.base import NotAvailable
-from weboob.tools.browser import BasePage, BrokenPageError
 from weboob.tools.misc import to_unicode

 from ..video import YoujizzVideo
@ -32,36 +33,36 @@ from ..video import YoujizzVideo
 __all__ = ['VideoPage']


-class VideoPage(BasePage):
-    def get_video(self, video=None):
-        _id = to_unicode(self.group_dict['id'])
-        if video is None:
-            video = YoujizzVideo(_id)
-        title_el = self.parser.select(self.document.getroot(), 'title', 1)
-        video.title = to_unicode(title_el.text.strip())
+class VideoPage(HTMLPage):
+    @method
+    class get_video(ItemElement):
+        klass = YoujizzVideo

-        # youjizz HTML is crap, we must parse it with regexps
-        data = lxml.html.tostring(self.document.getroot())
-        m = re.search(r'<strong>.*?Runtime.*?</strong> (.+?)</div>', data)
-        if m:
-            txt = m.group(1).strip()
-            if txt == 'Unknown':
-                video.duration = NotAvailable
+        obj_id = Env('id')
+        obj_title = CleanText('//title')
+
+        def obj_duration(self):
+            # youjizz HTML is crap, we must parse it with regexps
+            m = re.search(r'<strong>.*?Runtime.*?</strong> (.+?)</div>', self.page.response.text)
+            if m:
+                txt = m.group(1).strip()
+                if txt == 'Unknown':
+                    return NotAvailable
+                else:
+                    minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
+                    return datetime.timedelta(minutes=minutes, seconds=seconds)
            else:
-                minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
-                video.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
-        else:
-            raise BrokenPageError('Unable to retrieve video duration')
+                raise ValueError('Unable to retrieve video duration')

-        real_id = int(_id.split('-')[-1])
-        data = self.browser.readurl('http://www.youjizz.com/videos/embed/%s' % real_id)
+        def obj_url(self):
+            real_id = int(self.env['id'].split('-')[-1])
+            response = self.page.browser.open('http://www.youjizz.com/videos/embed/%s' % real_id)
+            data = response.text

-        video_file_urls = re.findall(r'"(http://[^",]+\.youjizz\.com[^",]+\.flv(?:\?[^"]*)?)"', data)
-        if len(video_file_urls) == 0:
-            raise BrokenPageError('Video URL not found')
-        elif len(video_file_urls) > 1:
-            raise BrokenPageError('Many video file URL found')
-        else:
-            video.url = to_unicode(video_file_urls[0])
-
-        return video
+            video_file_urls = re.findall(r'"(http://[^",]+\.youjizz\.com[^",]+\.flv(?:\?[^"]*)?)"', data)
+            if len(video_file_urls) == 0:
+                raise ValueError('Video URL not found')
+            elif len(video_file_urls) > 1:
+                raise ValueError('Many video file URL found')
+            else:
+                return to_unicode(video_file_urls[0])