paste* backends: "Paste not found" exception

This commit is contained in:
Laurent Bachelier 2011-04-20 01:03:04 +02:00
commit 154c061398
6 changed files with 56 additions and 10 deletions

View file

@ -20,7 +20,9 @@
from mechanize import RobustFactory from mechanize import RobustFactory
import re import re
from weboob.tools.browser import BaseBrowser, BrowserUnavailable from weboob.tools.browser import BaseBrowser, BrowserUnavailable, BrowserHTTPNotFound
from weboob.capabilities.paste import PasteNotFound
from .pages import PastePage, CaptchaPage, PostPage from .pages import PastePage, CaptchaPage, PostPage
@ -50,7 +52,10 @@ class PastealaconBrowser(BaseBrowser):
This is the fastest and safest method if you only want the content. This is the fastest and safest method if you only want the content.
Returns unicode. Returns unicode.
""" """
return self.readurl('http://%s/pastebin.php?dl=%s' % (self.DOMAIN, _id)).decode(self.ENCODING) try:
return self.readurl('http://%s/pastebin.php?dl=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING)
except BrowserHTTPNotFound:
raise PasteNotFound()
def post_paste(self, paste): def post_paste(self, paste):
self.home() self.home()

View file

@ -18,16 +18,23 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BasePage
import re import re
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.capabilities.paste import PasteNotFound
__all__ = ['PastePage', 'PostPage', 'CaptchaPage'] __all__ = ['PastePage', 'PostPage', 'CaptchaPage']
class PastePage(BasePage): class PastePage(BasePage):
def fill_paste(self, paste): def fill_paste(self, paste):
root = self.document.getroot() root = self.document.getroot()
header = self.parser.select(root, 'id("content")//h3', 1, 'xpath') try:
# there is no 404, try to detect if there really is a content
self.parser.select(root, 'id("content")/div[@class="syntax"]//ol', 1, 'xpath')
except BrokenPageError:
raise PasteNotFound()
header = self.parser.select(root, 'id("content")/h3', 1, 'xpath')
matches = re.match(r'Posted by (?P<author>.+) on (?P<date>.+) \(', header.text) matches = re.match(r'Posted by (?P<author>.+) on (?P<date>.+) \(', header.text)
paste.title = matches.groupdict().get('author') paste.title = matches.groupdict().get('author')
paste.contents = self.parser.select(root, '//textarea[@id="code"]', 1, 'xpath').text paste.contents = self.parser.select(root, '//textarea[@id="code"]', 1, 'xpath').text

View file

@ -17,9 +17,13 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
from weboob.capabilities.base import NotLoaded from weboob.capabilities.base import NotLoaded
from weboob.tools.browser import BrowserUnavailable from weboob.tools.browser import BrowserUnavailable
from weboob.capabilities.paste import PasteNotFound
from .paste import PastealaconPaste from .paste import PastealaconPaste
class PastealaconTest(BackendTest): class PastealaconTest(BackendTest):
@ -53,3 +57,12 @@ class PastealaconTest(BackendTest):
def test_spam(self): def test_spam(self):
p = PastealaconPaste(None, title='viagra', contents='http://example.com/') p = PastealaconPaste(None, title='viagra', contents='http://example.com/')
self.assertRaises(BrowserUnavailable, self.backend.post_paste, p) self.assertRaises(BrowserUnavailable, self.backend.post_paste, p)
def test_notfound(self):
# html method
p = self.backend.get_paste('424242424242424242424242424242424242')
self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['title'])
# raw method
p = self.backend.get_paste('424242424242424242424242424242424242')
self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['contents'])

View file

@ -18,7 +18,9 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound
from weboob.capabilities.paste import PasteNotFound
from .pages import PastePage, PostPage from .pages import PastePage, PostPage
@ -43,8 +45,11 @@ class PastebinBrowser(BaseBrowser):
""" """
Get as much as information possible from the paste page Get as much as information possible from the paste page
""" """
try:
self.location(paste.page_url) self.location(paste.page_url)
return self.page.fill_paste(paste) return self.page.fill_paste(paste)
except BrowserHTTPNotFound:
raise PasteNotFound()
def get_contents(self, _id): def get_contents(self, _id):
""" """
@ -52,7 +57,10 @@ class PastebinBrowser(BaseBrowser):
This is the fastest and safest method if you only want the content. This is the fastest and safest method if you only want the content.
Returns unicode. Returns unicode.
""" """
return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id)).decode(self.ENCODING) try:
return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING)
except BrowserHTTPNotFound:
raise PasteNotFound()
def post_paste(self, paste): def post_paste(self, paste):
self.home() self.home()

View file

@ -20,6 +20,7 @@
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
from .paste import PastebinPaste from .paste import PastebinPaste
from weboob.capabilities.base import NotLoaded from weboob.capabilities.base import NotLoaded
from weboob.capabilities.paste import PasteNotFound
class PastebinTest(BackendTest): class PastebinTest(BackendTest):
BACKEND = 'pastebin' BACKEND = 'pastebin'
@ -57,3 +58,12 @@ class PastebinTest(BackendTest):
p2 = self.backend.get_paste(p1.id) p2 = self.backend.get_paste(p1.id)
self.backend.fillobj(p2, ['contents']) self.backend.fillobj(p2, ['contents'])
assert p2.contents == p1.contents assert p2.contents == p1.contents
def test_notfound(self):
# html method
p = self.backend.get_paste('weboooooooooooooooooooooooooob')
self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['title'])
# raw method
p = self.backend.get_paste('weboooooooooooooooooooooooooob')
self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['contents'])

View file

@ -21,9 +21,12 @@
from .base import IBaseCap, CapBaseObject, NotLoaded from .base import IBaseCap, CapBaseObject, NotLoaded
__all__ = ['BasePaste', 'ICapPaste'] __all__ = ['PasteNotFound', 'BasePaste', 'ICapPaste']
class PasteNotFound(Exception):
pass
class BasePaste(CapBaseObject): class BasePaste(CapBaseObject):
""" """
Represents a pasted text. Represents a pasted text.