diff --git a/weboob/backends/pastealacon/browser.py b/weboob/backends/pastealacon/browser.py index 39e04a1e..0afd72ee 100644 --- a/weboob/backends/pastealacon/browser.py +++ b/weboob/backends/pastealacon/browser.py @@ -20,7 +20,9 @@ from mechanize import RobustFactory import re -from weboob.tools.browser import BaseBrowser, BrowserUnavailable +from weboob.tools.browser import BaseBrowser, BrowserUnavailable, BrowserHTTPNotFound + +from weboob.capabilities.paste import PasteNotFound from .pages import PastePage, CaptchaPage, PostPage @@ -50,7 +52,10 @@ class PastealaconBrowser(BaseBrowser): This is the fastest and safest method if you only want the content. Returns unicode. """ - return self.readurl('http://%s/pastebin.php?dl=%s' % (self.DOMAIN, _id)).decode(self.ENCODING) + try: + return self.readurl('http://%s/pastebin.php?dl=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING) + except BrowserHTTPNotFound: + raise PasteNotFound() def post_paste(self, paste): self.home() diff --git a/weboob/backends/pastealacon/pages.py b/weboob/backends/pastealacon/pages.py index ec4a7cfa..c1dfabba 100644 --- a/weboob/backends/pastealacon/pages.py +++ b/weboob/backends/pastealacon/pages.py @@ -18,16 +18,23 @@ # along with weboob. If not, see . -from weboob.tools.browser import BasePage - import re +from weboob.tools.browser import BasePage, BrokenPageError + +from weboob.capabilities.paste import PasteNotFound + __all__ = ['PastePage', 'PostPage', 'CaptchaPage'] class PastePage(BasePage): def fill_paste(self, paste): root = self.document.getroot() - header = self.parser.select(root, 'id("content")//h3', 1, 'xpath') + try: + # there is no 404, try to detect if there really is a content + self.parser.select(root, 'id("content")/div[@class="syntax"]//ol', 1, 'xpath') + except BrokenPageError: + raise PasteNotFound() + header = self.parser.select(root, 'id("content")/h3', 1, 'xpath') matches = re.match(r'Posted by (?P.+) on (?P.+) \(', header.text) paste.title = matches.groupdict().get('author') paste.contents = self.parser.select(root, '//textarea[@id="code"]', 1, 'xpath').text diff --git a/weboob/backends/pastealacon/test.py b/weboob/backends/pastealacon/test.py index bc80fb2f..77088f61 100644 --- a/weboob/backends/pastealacon/test.py +++ b/weboob/backends/pastealacon/test.py @@ -17,9 +17,13 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . + from weboob.tools.test import BackendTest from weboob.capabilities.base import NotLoaded from weboob.tools.browser import BrowserUnavailable + +from weboob.capabilities.paste import PasteNotFound + from .paste import PastealaconPaste class PastealaconTest(BackendTest): @@ -53,3 +57,12 @@ class PastealaconTest(BackendTest): def test_spam(self): p = PastealaconPaste(None, title='viagra', contents='http://example.com/') self.assertRaises(BrowserUnavailable, self.backend.post_paste, p) + + def test_notfound(self): + # html method + p = self.backend.get_paste('424242424242424242424242424242424242') + self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['title']) + + # raw method + p = self.backend.get_paste('424242424242424242424242424242424242') + self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['contents']) diff --git a/weboob/backends/pastebin/browser.py b/weboob/backends/pastebin/browser.py index da7885ad..7eab6612 100644 --- a/weboob/backends/pastebin/browser.py +++ b/weboob/backends/pastebin/browser.py @@ -18,7 +18,9 @@ # along with weboob. If not, see . -from weboob.tools.browser import BaseBrowser +from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound + +from weboob.capabilities.paste import PasteNotFound from .pages import PastePage, PostPage @@ -43,8 +45,11 @@ class PastebinBrowser(BaseBrowser): """ Get as much as information possible from the paste page """ - self.location(paste.page_url) - return self.page.fill_paste(paste) + try: + self.location(paste.page_url) + return self.page.fill_paste(paste) + except BrowserHTTPNotFound: + raise PasteNotFound() def get_contents(self, _id): """ @@ -52,7 +57,10 @@ class PastebinBrowser(BaseBrowser): This is the fastest and safest method if you only want the content. Returns unicode. """ - return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id)).decode(self.ENCODING) + try: + return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING) + except BrowserHTTPNotFound: + raise PasteNotFound() def post_paste(self, paste): self.home() diff --git a/weboob/backends/pastebin/test.py b/weboob/backends/pastebin/test.py index 5b486ead..5bbe4dfe 100644 --- a/weboob/backends/pastebin/test.py +++ b/weboob/backends/pastebin/test.py @@ -20,6 +20,7 @@ from weboob.tools.test import BackendTest from .paste import PastebinPaste from weboob.capabilities.base import NotLoaded +from weboob.capabilities.paste import PasteNotFound class PastebinTest(BackendTest): BACKEND = 'pastebin' @@ -57,3 +58,12 @@ class PastebinTest(BackendTest): p2 = self.backend.get_paste(p1.id) self.backend.fillobj(p2, ['contents']) assert p2.contents == p1.contents + + def test_notfound(self): + # html method + p = self.backend.get_paste('weboooooooooooooooooooooooooob') + self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['title']) + + # raw method + p = self.backend.get_paste('weboooooooooooooooooooooooooob') + self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['contents']) diff --git a/weboob/capabilities/paste.py b/weboob/capabilities/paste.py index 8a1cfccb..067bc876 100644 --- a/weboob/capabilities/paste.py +++ b/weboob/capabilities/paste.py @@ -21,9 +21,12 @@ from .base import IBaseCap, CapBaseObject, NotLoaded -__all__ = ['BasePaste', 'ICapPaste'] +__all__ = ['PasteNotFound', 'BasePaste', 'ICapPaste'] +class PasteNotFound(Exception): + pass + class BasePaste(CapBaseObject): """ Represents a pasted text.