# -*- coding: utf-8 -*- """ Copyright(C) 2010 Roger Philibert This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 3 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ import re from weboob.capabilities.video import Video from weboob.tools.browser import BasePage class VideoPage(BasePage): URL_REGEX = re.compile(r'http://.*youjizz\.com/videos/.+-(\d+)\.html') VIDEO_FILE_REGEX = re.compile(r'"(http://media[^ ,]+\.flv)"') def on_loaded(self): details = self.get_details() self.video = Video(_id=self.get_id(), title=details.get('title', u''), url=self.get_url(), duration=details.get('duration', 0), nsfw=True) def get_id(self): m = self.URL_REGEX.match(self.url) if m: return int(m.group(1)) warning("Unable to parse ID") return 0 def get_url(self): video_file_urls = re.findall(self.VIDEO_FILE_REGEX, self.browser.parser.tostring(self.document)) if len(video_file_urls) == 0: return None else: if len(video_file_urls) > 1: error('Many video file URL found for given URL: %s' % video_file_urls) return video_file_urls[0] def get_details(self): results = {} div = self.document.getroot().cssselect('#video_text')[0] results['title'] = unicode(div.find('h2').text).strip() minutes, seconds = [int(v) for v in [e for e in div.cssselect('strong') if e.text.startswith('Runtime')][0].tail.split(':')] print minutes, seconds results['duration'] = minutes * 60 + seconds return results