diff --git a/weboob/backends/youjizz/browser.py b/weboob/backends/youjizz/browser.py
index 71efb156..01745a88 100644
--- a/weboob/backends/youjizz/browser.py
+++ b/weboob/backends/youjizz/browser.py
@@ -1,29 +1,29 @@
# -*- coding: utf-8 -*-
-"""
-Copyright(C) 2010 Roger Philibert
+# Copyright(C) 2010 Roger Philibert
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, version 3 of the License.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-"""
+import lxml
+import re
import urllib
from weboob.tools.browser import BaseBrowser
from .pages.index import IndexPage
-from .pages.video import VideoPage
+from .video import YoujizzVideo
__all__ = ['YoujizzBrowser']
@@ -33,13 +33,30 @@ class YoujizzBrowser(BaseBrowser):
DOMAIN = 'youjizz.com'
PROTOCOL = 'http'
PAGES = {r'http://.*youjizz\.com/?': IndexPage,
- r'http://.*youjizz\.com/videos/.+\.html': VideoPage,
r'http://.*youjizz\.com/search/.+\.html': IndexPage,
}
-
+
def get_video(self, url):
- self.location(url)
- return self.page.video
+ data = self.openurl(url).read()
+ def _get_url():
+ video_file_urls = re.findall(r'"(http://media[^ ,]+\.flv)"', data)
+ if len(video_file_urls) == 0:
+ return None
+ else:
+ if len(video_file_urls) > 1:
+ warning('Many video file URL found for given URL: %s' % video_file_urls)
+ return video_file_urls[0]
+ m = re.search(r'http://.*youjizz\.com/videos/(.+)\.html', url)
+ _id = unicode(m.group(1)) if m else None
+ m = re.search(r'
(.+)', data)
+ title = unicode(m.group(1)) if m else None
+ m = re.search(r'.*Runtime.*(.+)', data)
+ if m:
+ minutes, seconds = (int(v) for v in unicode(m.group(1).strip()).split(':'))
+ duration = minutes * 60 + seconds
+ else:
+ duration = 0
+ return YoujizzVideo(_id=u'youjizz:%s' % _id, title=title, url=_get_url(), duration=duration, nsfw=True)
def iter_page_urls(self, mozaic_url):
raise NotImplementedError()
diff --git a/weboob/backends/youjizz/pages/video.py b/weboob/backends/youjizz/pages/video.py
deleted file mode 100644
index ef5b5d53..00000000
--- a/weboob/backends/youjizz/pages/video.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-Copyright(C) 2010 Roger Philibert
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, version 3 of the License.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-"""
-
-from logging import error, warning
-import re
-
-from weboob.tools.browser import BasePage
-
-from ..video import YoujizzVideo
-
-class VideoPage(BasePage):
- URL_REGEX = re.compile(r'http://.*youjizz\.com/videos/.+-(\d+)\.html')
- VIDEO_FILE_REGEX = re.compile(r'"(http://media[^ ,]+\.flv)"')
-
- def on_loaded(self):
- details = self.get_details()
- self.video = YoujizzVideo(_id=self.get_id(), title=details.get('title', u''), url=self.get_url(),
- duration=details.get('duration', 0), nsfw=True)
-
- def get_id(self):
- m = self.URL_REGEX.match(self.url)
- if m:
- return int(m.group(1))
- warning("Unable to parse ID")
- return 0
-
- def get_url(self):
- video_file_urls = re.findall(self.VIDEO_FILE_REGEX, self.browser.parser.tostring(self.document))
- if len(video_file_urls) == 0:
- return None
- else:
- if len(video_file_urls) > 1:
- error('Many video file URL found for given URL: %s' % video_file_urls)
- return video_file_urls[0]
-
- def get_details(self):
- results = {}
- div = self.document.getroot().cssselect('#video_text')[0]
- results['title'] = unicode(div.find('h2').text).strip()
- minutes, seconds = [int(v) for v in [e for e in div.cssselect('strong') if e.text.startswith('Runtime')][0].tail.split(':')]
- results['duration'] = minutes * 60 + seconds
- return results