first version of arretsurimages module. Search does not work and download name of the file is incorrect. Don't know why.

Signed-off-by: franek <franek@chicour.net>
2013-03-31 19:31:57 +02:00 · 2013-03-31 19:31:57 +02:00 · d8732f9c4b
commit d8732f9c4b
parent a37fcf448b
6 changed files with 334 additions and 0 deletions
--- a/modules/arretsurimages/pages.py
+++ b/modules/arretsurimages/pages.py
@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2013      franek
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+import mechanize
+
+from weboob.capabilities.base import UserError
+from weboob.tools.browser import BasePage, BrokenPageError
+from weboob.tools.capabilities.thumbnail import Thumbnail
+from weboob.capabilities import NotAvailable
+
+
+__all__ = ['IndexPage', 'VideoPage', 'ForbiddenVideo', 'LoginPage', 'LoginRedirectPage']
+
+from .video import ArretSurImagesVideo
+
+
+class IndexPage(BasePage):
+    def iter_videos(self):
+        videos = self.document.getroot().cssselect("div[class=bloc-contenu-8]")
+        for div in videos:
+            title = self.parser.select(div, 'a.typo-titre', 1).text_content().replace('  ', ' ')
+            m = re.match(r'/contenu.php\?id=(.*)', div.find('a').attrib['href'])
+            _id = ''
+            if m:
+                _id = m.group(1)
+
+
+            video = ArretSurImagesVideo(_id)
+            video.title = unicode(title)
+            video.rating = None
+            video.rating_max = None
+
+            thumb = self.parser.select(div, 'img', 1)
+            video.thumbnail = Thumbnail(u'http://www.arretsurimages.net' + thumb.attrib['src'])
+
+            yield video
+
+class ForbiddenVideo(UserError):
+    pass
+
+
+class VideoPage(BasePage):
+    def is_logged(self):
+        try:
+            self.parser.select(self.document.getroot(), '#user-info', 1)
+        except BrokenPageError:
+            return False
+        else:
+            return True
+
+    def on_loaded(self):
+        if not self.is_logged():
+            raise ForbiddenVideo('This video or group may contain content that is inappropriate for some users')
+
+    def get_video(self, video=None):
+        if not video:
+            video = ArretSurImagesVideo(self.get_id)
+        video.title = unicode(self.get_title())
+        video.url = unicode(self.get_url())
+        video.set_empty_fields(NotAvailable)
+        return video
+
+    def get_title(self):
+        title = self.document.getroot().cssselect('div[id=titrage-contenu] h1')[0].text
+        return title;
+        
+    def get_id(self):
+        m = self.URL_REGEXP.match(self['url'])
+        if m:
+            return self.create_id(m.group(1))
+        self.logger.warning('Unable to parse ID')
+        return 0
+        
+    def get_url(self):
+        obj = self.parser.select(self.document.getroot(), 'a.bouton-telecharger', 1)
+        firstUrl = obj.attrib['href']
+        doc = self.browser.get_document(self.browser.openurl(firstUrl))
+        links = doc.xpath('//a');
+        url = None;
+        i = 1
+        for link in links :
+            # we take the second link of the page
+            if i == 2:
+                url = link.attrib['href']
+            i=i+1
+        return url
+        
+class LoginPage(BasePage):
+    def login(self, username, password):
+        response = self.browser.response()
+        response.set_data(response.get_data().replace("<br/>", "<br />")) #Python mechanize is broken, fixing it.
+        self.browser.set_response(response)
+        self.browser.select_form(nr=0)
+        self.browser.form.set_all_readonly(False) 
+        self.browser['redir'] = '/forum/index.php'
+        self.browser['username'] = username
+        self.browser['password'] = password
+        self.browser.submit()        
+
+
+class LoginRedirectPage(BasePage):
+    pass