diff --git a/modules/pastealacon/backend.py b/modules/pastealacon/backend.py index 9a6d65a2..9963d3ba 100644 --- a/modules/pastealacon/backend.py +++ b/modules/pastealacon/backend.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2011 Laurent Bachelier +# Copyright(C) 2011-2014 Laurent Bachelier # # This file is part of weboob. # @@ -18,19 +18,13 @@ # along with weboob. If not, see . - - import re from weboob.tools.capabilities.paste import BasePasteBackend from weboob.tools.backend import BaseBackend from weboob.capabilities.base import NotLoaded -from .browser import PastealaconBrowser -from .paste import PastealaconPaste - - -__all__ = ['PastealaconBackend'] +from .browser import PastealaconBrowser, PastealaconPaste class PastealaconBackend(BaseBackend, BasePasteBackend): @@ -53,7 +47,7 @@ class PastealaconBackend(BaseBackend, BasePasteBackend): def can_post(self, contents, title=None, public=None, max_age=None): try: - contents.encode(self.browser.ENCODING) + contents.encode('ISO-8859-1') except UnicodeEncodeError: return 0 if public is False: @@ -67,20 +61,17 @@ class PastealaconBackend(BaseBackend, BasePasteBackend): return 1 def get_paste(self, _id): - with self.browser: - return self.browser.get_paste(_id) + return self.browser.get_paste(_id) def fill_paste(self, paste, fields): # if we only want the contents if fields == ['contents']: if paste.contents is NotLoaded: - with self.browser: - contents = self.browser.get_contents(paste.id) - paste.contents = contents + contents = self.browser.get_contents(paste.id) + paste.contents = contents # get all fields elif fields is None or len(fields): - with self.browser: - self.browser.fill_paste(paste) + self.browser.fill_paste(paste) return paste def post_paste(self, paste, max_age=None): @@ -88,7 +79,6 @@ class PastealaconBackend(BaseBackend, BasePasteBackend): expiration = self.get_closest_expiration(max_age) else: expiration = None - with self.browser: - self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) + self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) OBJECTS = {PastealaconPaste: fill_paste} diff --git a/modules/pastealacon/browser.py b/modules/pastealacon/browser.py index b6acfde9..f9c4e586 100644 --- a/modules/pastealacon/browser.py +++ b/modules/pastealacon/browser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2011 Laurent Bachelier +# Copyright(C) 2011-2014 Laurent Bachelier # # This file is part of weboob. # @@ -17,43 +17,84 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from mechanize import RobustFactory import re -from weboob.tools.browser import BaseBrowser, BrowserUnavailable, BrowserHTTPNotFound +import requests -from weboob.capabilities.paste import PasteNotFound -from weboob.tools.browser.decorators import id2url, check_url - -from .pages import PastePage, CaptchaPage, PostPage -from .paste import PastealaconPaste - -__all__ = ['PastealaconBrowser'] +from weboob.capabilities.paste import BasePaste, PasteNotFound +from weboob.tools.browser2 import HTMLPage, PagesBrowser, URL -class PastealaconBrowser(BaseBrowser): - DOMAIN = 'pastealacon.com' - ENCODING = 'ISO-8859-1' - PASTE_URL = 'http://%s/(?P\d+)' % DOMAIN - PAGES = {PASTE_URL: PastePage, - 'http://%s/%s' % (DOMAIN, re.escape('pastebin.php?captcha=1')): CaptchaPage, - 'http://%s/' % DOMAIN: PostPage} +class Spam(Exception): + def __init__(self): + super(Spam, self).__init__("Detected as spam and unable to handle the captcha") - def __init__(self, *args, **kwargs): - kwargs['factory'] = RobustFactory() - BaseBrowser.__init__(self, *args, **kwargs) - @id2url(PastealaconPaste.id2url) - @check_url(PASTE_URL) +class PastealaconPaste(BasePaste): + # all pastes are public + public = True + + # TODO perhaps move this logic elsewhere, remove this and id2url from capability + # (page_url is required by pastoob) + @property + def page_url(self): + return '%s%s' % (PastealaconBrowser.BASEURL, self.id) + + +class PastePage(HTMLPage): + # TODO use magic Browser2 methods (if possible) + def fill_paste(self, paste): + # there is no 404, try to detect if there really is a content + if len(self.doc.xpath('id("content")/div[@class="syntax"]//ol')) != 1: + raise PasteNotFound() + + header = self.doc.xpath('id("content")/h3')[0] + matches = re.match(r'Posted by (?P.+) on (?P.+) \(', header.text) + paste.title = matches.groupdict().get('author') + paste.contents = unicode(self.doc.xpath('//textarea[@id="code"]')[0].text) + return paste + + def get_id(self): + return self.params['id'] + + +class CaptchaPage(HTMLPage): + pass + + +class PostPage(HTMLPage): + # TODO handle encoding in Browser2 + def post(self, paste, expiration=None): + encoding = 'ISO-8859-1' + + form = self.get_form(name='editor') + form['code2'] = paste.contents.encode(encoding) + form['poster'] = paste.title.encode(encoding) + if expiration: + form['expiry'] = expiration + form.submit() + + +class PastealaconBrowser(PagesBrowser): + BASEURL = 'http://pastealacon.com/' + + paste = URL(r'(?P\d+)', PastePage) + captcha = URL(r'%s' % re.escape('pastebin.php?captcha=1'), CaptchaPage) + raw = URL(r'%s(?P\d+)' % re.escape('pastebin.php?dl=')) + post = URL(r'$', PostPage) + + @paste.id2url def get_paste(self, url): - _id = re.match('^%s$' % self.PASTE_URL, url).groupdict()['id'] - return PastealaconPaste(_id) + url = self.absurl(url, base=True) + m = self.paste.match(url) + if m: + return PastealaconPaste(m.groupdict()['id']) def fill_paste(self, paste): """ Get as much as information possible from the paste page """ - self.location(paste.page_url) + self.paste.stay_or_go(id=paste.id) return self.page.fill_paste(paste) def get_contents(self, _id): @@ -63,13 +104,16 @@ class PastealaconBrowser(BaseBrowser): Returns unicode. """ try: - return self.readurl('http://%s/pastebin.php?dl=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING) - except BrowserHTTPNotFound: - raise PasteNotFound() + return self.raw.open(id=_id).text + # TODO maybe have Browser2 raise a specialized exception + except requests.exceptions.HTTPError as e: + if e.response.status_code == requests.codes.not_found: + raise PasteNotFound() + else: + raise e def post_paste(self, paste, expiration=None): - self.home() - self.page.post(paste, expiration=expiration) - if self.is_on_page(CaptchaPage): - raise BrowserUnavailable("Detected as spam and unable to handle the captcha") + self.post.stay_or_go().post(paste, expiration=expiration) + if self.captcha.is_here(): + raise Spam() paste.id = self.page.get_id() diff --git a/modules/pastealacon/pages.py b/modules/pastealacon/pages.py deleted file mode 100644 index 7a395f84..00000000 --- a/modules/pastealacon/pages.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Laurent Bachelier -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - -import re - -from weboob.tools.browser import BasePage, BrokenPageError - -from weboob.capabilities.paste import PasteNotFound - -__all__ = ['PastePage', 'PostPage', 'CaptchaPage'] - - -class PastePage(BasePage): - def fill_paste(self, paste): - root = self.document.getroot() - try: - # there is no 404, try to detect if there really is a content - self.parser.select(root, 'id("content")/div[@class="syntax"]//ol', 1, 'xpath') - except BrokenPageError: - raise PasteNotFound() - header = self.parser.select(root, 'id("content")/h3', 1, 'xpath') - matches = re.match(r'Posted by (?P.+) on (?P.+) \(', header.text) - paste.title = matches.groupdict().get('author') - paste.contents = self.parser.select(root, '//textarea[@id="code"]', 1, 'xpath').text - return paste - - def get_id(self): - """ - Find out the ID from the URL - """ - return self.group_dict['id'] - - -class PostPage(BasePage): - def post(self, paste, expiration=None): - self.browser.select_form(name='editor') - self.browser['code2'] = paste.contents.encode(self.browser.ENCODING) - self.browser['poster'] = paste.title.encode(self.browser.ENCODING) - if expiration: - self.browser['expiry'] = [expiration] - self.browser.submit() - - -class CaptchaPage(BasePage): - pass diff --git a/modules/pastealacon/paste.py b/modules/pastealacon/paste.py deleted file mode 100644 index 1c86c556..00000000 --- a/modules/pastealacon/paste.py +++ /dev/null @@ -1,33 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Laurent Bachelier -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - -from weboob.capabilities.paste import BasePaste - - -__all__ = ['PastealaconPaste'] - - -class PastealaconPaste(BasePaste): - # all pastes are public - public = True - - @classmethod - def id2url(cls, _id): - return 'http://pastealacon.com/%s' % _id diff --git a/modules/pastealacon/test.py b/modules/pastealacon/test.py index 7e87faed..d9c7adc8 100644 --- a/modules/pastealacon/test.py +++ b/modules/pastealacon/test.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2011 Laurent Bachelier +# Copyright(C) 2011-2014 Laurent Bachelier # # This file is part of weboob. # @@ -20,10 +20,11 @@ from weboob.tools.test import BackendTest from weboob.capabilities.base import NotLoaded -from weboob.tools.browser import BrowserUnavailable from weboob.capabilities.paste import PasteNotFound +from .browser import Spam + class PastealaconTest(BackendTest): BACKEND = 'pastealacon' @@ -62,10 +63,11 @@ class PastealaconTest(BackendTest): def test_spam(self): p = self.backend.new_paste(None, title=u'viagra', contents=u'http://example.com/') - self.assertRaises(BrowserUnavailable, self.backend.post_paste, p) + self.assertRaises(Spam, self.backend.post_paste, p) def test_notfound(self): - for _id in ('424242424242424242424242424242424242', 'http://pastealacon.com/424242424242424242424242424242424242'): + for _id in ('424242424242424242424242424242424242', + 'http://pastealacon.com/424242424242424242424242424242424242'): # html method p = self.backend.get_paste(_id) self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['title'])