diff --git a/modules/arretsurimages/__init__.py b/modules/arretsurimages/__init__.py new file mode 100644 index 00000000..dc0aead7 --- /dev/null +++ b/modules/arretsurimages/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 franek +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .backend import ArretSurImagesBackend + + +__all__ = ['ArretSurImagesBackend'] diff --git a/modules/arretsurimages/backend.py b/modules/arretsurimages/backend.py new file mode 100644 index 00000000..e508aff2 --- /dev/null +++ b/modules/arretsurimages/backend.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 franek +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import ICapVideo, BaseVideo +from weboob.capabilities.collection import ICapCollection, CollectionNotFound +from weboob.tools.backend import BaseBackend, BackendConfig +from weboob.tools.value import ValueBackendPassword + +from .browser import ArretSurImagesBrowser +from .video import ArretSurImagesVideo + +__all__ = ['ArretSurImagesBackend'] + + +class ArretSurImagesBackend(BaseBackend, ICapVideo, ICapCollection): + NAME = 'arretsurimages' + DESCRIPTION = u'arretsurimages website' + MAINTAINER = u'franek' + EMAIL = 'franek@chicour.net' + VERSION = '0.f' + + CONFIG = BackendConfig(ValueBackendPassword('login', label='email', masked=False), + ValueBackendPassword('password', label='Password')) + BROWSER = ArretSurImagesBrowser + + def create_default_browser(self): + return self.create_browser(self.config['login'].get(), self.config['password'].get()) + + def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): + with self.browser: + return self.browser.search_videos(pattern) + + def get_video(self, _id): + with self.browser: + return self.browser.get_video(_id) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + with self.browser: + video = self.browser.get_video(ArretSurImagesVideo.id2url(video.id), video) + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + OBJECTS = {ArretSurImagesVideo: fill_video} diff --git a/modules/arretsurimages/browser.py b/modules/arretsurimages/browser.py new file mode 100644 index 00000000..aeea08c3 --- /dev/null +++ b/modules/arretsurimages/browser.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 franek +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword +from weboob.tools.browser.decorators import id2url + +from .pages import VideoPage, IndexPage, LoginPage, LoginRedirectPage +from .video import ArretSurImagesVideo + + +__all__ = ['ArretSurImagesBrowser'] + + +class ArretSurImagesBrowser(BaseBrowser): + PROTOCOL = 'http' + DOMAIN = 'www.arretsurimages.net' + ENCODING = None + + PAGES = { + '%s://%s/contenu.php\?id=.+' % (PROTOCOL, DOMAIN): VideoPage, + '%s://%s/emissions.php' % (PROTOCOL, DOMAIN): IndexPage, + '%s://%s/forum/login.php' % (PROTOCOL, DOMAIN): LoginPage, + '%s://%s/forum/index.php' % (PROTOCOL, DOMAIN): LoginRedirectPage, + } + + def home(self): + self.location('http://www.arretsurimages.net') + + def search_videos(self, pattern): + self.location(self.buildurl('/emissions.php')) + assert self.is_on_page(IndexPage) + return self.page.iter_videos() + + @id2url(ArretSurImagesVideo.id2url) + def get_video(self, url, video=None): + self.login() + self.location(url) + return self.page.get_video(video) + + def is_logged(self): + return self.is_on_page(LoginPage) == False + + def login(self): + if not self.is_on_page(LoginPage): + self.location('http://www.arretsurimages.net/forum/login.php', no_login=True) + + self.page.login(self.username, self.password) + + if not self.is_logged(): + raise BrowserIncorrectPassword() + diff --git a/modules/arretsurimages/pages.py b/modules/arretsurimages/pages.py new file mode 100644 index 00000000..068e9051 --- /dev/null +++ b/modules/arretsurimages/pages.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 franek +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import re +import mechanize + +from weboob.capabilities.base import UserError +from weboob.tools.browser import BasePage, BrokenPageError +from weboob.tools.capabilities.thumbnail import Thumbnail +from weboob.capabilities import NotAvailable + + +__all__ = ['IndexPage', 'VideoPage', 'ForbiddenVideo', 'LoginPage', 'LoginRedirectPage'] + +from .video import ArretSurImagesVideo + + +class IndexPage(BasePage): + def iter_videos(self): + videos = self.document.getroot().cssselect("div[class=bloc-contenu-8]") + for div in videos: + title = self.parser.select(div, 'a.typo-titre', 1).text_content().replace(' ', ' ') + m = re.match(r'/contenu.php\?id=(.*)', div.find('a').attrib['href']) + _id = '' + if m: + _id = m.group(1) + + + video = ArretSurImagesVideo(_id) + video.title = unicode(title) + video.rating = None + video.rating_max = None + + thumb = self.parser.select(div, 'img', 1) + video.thumbnail = Thumbnail(u'http://www.arretsurimages.net' + thumb.attrib['src']) + + yield video + +class ForbiddenVideo(UserError): + pass + + +class VideoPage(BasePage): + def is_logged(self): + try: + self.parser.select(self.document.getroot(), '#user-info', 1) + except BrokenPageError: + return False + else: + return True + + def on_loaded(self): + if not self.is_logged(): + raise ForbiddenVideo('This video or group may contain content that is inappropriate for some users') + + def get_video(self, video=None): + if not video: + video = ArretSurImagesVideo(self.get_id) + video.title = unicode(self.get_title()) + video.url = unicode(self.get_url()) + video.set_empty_fields(NotAvailable) + return video + + def get_title(self): + title = self.document.getroot().cssselect('div[id=titrage-contenu] h1')[0].text + return title; + + def get_id(self): + m = self.URL_REGEXP.match(self['url']) + if m: + return self.create_id(m.group(1)) + self.logger.warning('Unable to parse ID') + return 0 + + def get_url(self): + obj = self.parser.select(self.document.getroot(), 'a.bouton-telecharger', 1) + firstUrl = obj.attrib['href'] + doc = self.browser.get_document(self.browser.openurl(firstUrl)) + links = doc.xpath('//a'); + url = None; + i = 1 + for link in links : + # we take the second link of the page + if i == 2: + url = link.attrib['href'] + i=i+1 + return url + +class LoginPage(BasePage): + def login(self, username, password): + response = self.browser.response() + response.set_data(response.get_data().replace("
", "
")) #Python mechanize is broken, fixing it. + self.browser.set_response(response) + self.browser.select_form(nr=0) + self.browser.form.set_all_readonly(False) + self.browser['redir'] = '/forum/index.php' + self.browser['username'] = username + self.browser['password'] = password + self.browser.submit() + + +class LoginRedirectPage(BasePage): + pass diff --git a/modules/arretsurimages/test.py b/modules/arretsurimages/test.py new file mode 100644 index 00000000..79dc2825 --- /dev/null +++ b/modules/arretsurimages/test.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 franek +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +class ArretSurImagesTest(BackendTest): + BACKEND = 'arretsurimages' + + def test_arretsurimages(self): + raise NotImplementedError() diff --git a/modules/arretsurimages/video.py b/modules/arretsurimages/video.py new file mode 100644 index 00000000..2d973ce4 --- /dev/null +++ b/modules/arretsurimages/video.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Christophe Benz +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import BaseVideo + + +__all__ = ['ArretSurImagesVideo'] + + +class ArretSurImagesVideo(BaseVideo): + @classmethod + def id2url(cls, _id): + return 'http://www.arretsurimages.net/contenu.php?id=%s' % _id