diff --git a/modules/pastealacon/backend.py b/modules/pastealacon/backend.py
index 9a6d65a2..9963d3ba 100644
--- a/modules/pastealacon/backend.py
+++ b/modules/pastealacon/backend.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright(C) 2011 Laurent Bachelier
+# Copyright(C) 2011-2014 Laurent Bachelier
#
# This file is part of weboob.
#
@@ -18,19 +18,13 @@
# along with weboob. If not, see .
-
-
import re
from weboob.tools.capabilities.paste import BasePasteBackend
from weboob.tools.backend import BaseBackend
from weboob.capabilities.base import NotLoaded
-from .browser import PastealaconBrowser
-from .paste import PastealaconPaste
-
-
-__all__ = ['PastealaconBackend']
+from .browser import PastealaconBrowser, PastealaconPaste
class PastealaconBackend(BaseBackend, BasePasteBackend):
@@ -53,7 +47,7 @@ class PastealaconBackend(BaseBackend, BasePasteBackend):
def can_post(self, contents, title=None, public=None, max_age=None):
try:
- contents.encode(self.browser.ENCODING)
+ contents.encode('ISO-8859-1')
except UnicodeEncodeError:
return 0
if public is False:
@@ -67,20 +61,17 @@ class PastealaconBackend(BaseBackend, BasePasteBackend):
return 1
def get_paste(self, _id):
- with self.browser:
- return self.browser.get_paste(_id)
+ return self.browser.get_paste(_id)
def fill_paste(self, paste, fields):
# if we only want the contents
if fields == ['contents']:
if paste.contents is NotLoaded:
- with self.browser:
- contents = self.browser.get_contents(paste.id)
- paste.contents = contents
+ contents = self.browser.get_contents(paste.id)
+ paste.contents = contents
# get all fields
elif fields is None or len(fields):
- with self.browser:
- self.browser.fill_paste(paste)
+ self.browser.fill_paste(paste)
return paste
def post_paste(self, paste, max_age=None):
@@ -88,7 +79,6 @@ class PastealaconBackend(BaseBackend, BasePasteBackend):
expiration = self.get_closest_expiration(max_age)
else:
expiration = None
- with self.browser:
- self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration))
+ self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration))
OBJECTS = {PastealaconPaste: fill_paste}
diff --git a/modules/pastealacon/browser.py b/modules/pastealacon/browser.py
index b6acfde9..f9c4e586 100644
--- a/modules/pastealacon/browser.py
+++ b/modules/pastealacon/browser.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright(C) 2011 Laurent Bachelier
+# Copyright(C) 2011-2014 Laurent Bachelier
#
# This file is part of weboob.
#
@@ -17,43 +17,84 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see .
-from mechanize import RobustFactory
import re
-from weboob.tools.browser import BaseBrowser, BrowserUnavailable, BrowserHTTPNotFound
+import requests
-from weboob.capabilities.paste import PasteNotFound
-from weboob.tools.browser.decorators import id2url, check_url
-
-from .pages import PastePage, CaptchaPage, PostPage
-from .paste import PastealaconPaste
-
-__all__ = ['PastealaconBrowser']
+from weboob.capabilities.paste import BasePaste, PasteNotFound
+from weboob.tools.browser2 import HTMLPage, PagesBrowser, URL
-class PastealaconBrowser(BaseBrowser):
- DOMAIN = 'pastealacon.com'
- ENCODING = 'ISO-8859-1'
- PASTE_URL = 'http://%s/(?P\d+)' % DOMAIN
- PAGES = {PASTE_URL: PastePage,
- 'http://%s/%s' % (DOMAIN, re.escape('pastebin.php?captcha=1')): CaptchaPage,
- 'http://%s/' % DOMAIN: PostPage}
+class Spam(Exception):
+ def __init__(self):
+ super(Spam, self).__init__("Detected as spam and unable to handle the captcha")
- def __init__(self, *args, **kwargs):
- kwargs['factory'] = RobustFactory()
- BaseBrowser.__init__(self, *args, **kwargs)
- @id2url(PastealaconPaste.id2url)
- @check_url(PASTE_URL)
+class PastealaconPaste(BasePaste):
+ # all pastes are public
+ public = True
+
+ # TODO perhaps move this logic elsewhere, remove this and id2url from capability
+ # (page_url is required by pastoob)
+ @property
+ def page_url(self):
+ return '%s%s' % (PastealaconBrowser.BASEURL, self.id)
+
+
+class PastePage(HTMLPage):
+ # TODO use magic Browser2 methods (if possible)
+ def fill_paste(self, paste):
+ # there is no 404, try to detect if there really is a content
+ if len(self.doc.xpath('id("content")/div[@class="syntax"]//ol')) != 1:
+ raise PasteNotFound()
+
+ header = self.doc.xpath('id("content")/h3')[0]
+ matches = re.match(r'Posted by (?P.+) on (?P.+) \(', header.text)
+ paste.title = matches.groupdict().get('author')
+ paste.contents = unicode(self.doc.xpath('//textarea[@id="code"]')[0].text)
+ return paste
+
+ def get_id(self):
+ return self.params['id']
+
+
+class CaptchaPage(HTMLPage):
+ pass
+
+
+class PostPage(HTMLPage):
+ # TODO handle encoding in Browser2
+ def post(self, paste, expiration=None):
+ encoding = 'ISO-8859-1'
+
+ form = self.get_form(name='editor')
+ form['code2'] = paste.contents.encode(encoding)
+ form['poster'] = paste.title.encode(encoding)
+ if expiration:
+ form['expiry'] = expiration
+ form.submit()
+
+
+class PastealaconBrowser(PagesBrowser):
+ BASEURL = 'http://pastealacon.com/'
+
+ paste = URL(r'(?P\d+)', PastePage)
+ captcha = URL(r'%s' % re.escape('pastebin.php?captcha=1'), CaptchaPage)
+ raw = URL(r'%s(?P\d+)' % re.escape('pastebin.php?dl='))
+ post = URL(r'$', PostPage)
+
+ @paste.id2url
def get_paste(self, url):
- _id = re.match('^%s$' % self.PASTE_URL, url).groupdict()['id']
- return PastealaconPaste(_id)
+ url = self.absurl(url, base=True)
+ m = self.paste.match(url)
+ if m:
+ return PastealaconPaste(m.groupdict()['id'])
def fill_paste(self, paste):
"""
Get as much as information possible from the paste page
"""
- self.location(paste.page_url)
+ self.paste.stay_or_go(id=paste.id)
return self.page.fill_paste(paste)
def get_contents(self, _id):
@@ -63,13 +104,16 @@ class PastealaconBrowser(BaseBrowser):
Returns unicode.
"""
try:
- return self.readurl('http://%s/pastebin.php?dl=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING)
- except BrowserHTTPNotFound:
- raise PasteNotFound()
+ return self.raw.open(id=_id).text
+ # TODO maybe have Browser2 raise a specialized exception
+ except requests.exceptions.HTTPError as e:
+ if e.response.status_code == requests.codes.not_found:
+ raise PasteNotFound()
+ else:
+ raise e
def post_paste(self, paste, expiration=None):
- self.home()
- self.page.post(paste, expiration=expiration)
- if self.is_on_page(CaptchaPage):
- raise BrowserUnavailable("Detected as spam and unable to handle the captcha")
+ self.post.stay_or_go().post(paste, expiration=expiration)
+ if self.captcha.is_here():
+ raise Spam()
paste.id = self.page.get_id()
diff --git a/modules/pastealacon/pages.py b/modules/pastealacon/pages.py
deleted file mode 100644
index 7a395f84..00000000
--- a/modules/pastealacon/pages.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright(C) 2011 Laurent Bachelier
-#
-# This file is part of weboob.
-#
-# weboob is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# weboob is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with weboob. If not, see .
-
-
-import re
-
-from weboob.tools.browser import BasePage, BrokenPageError
-
-from weboob.capabilities.paste import PasteNotFound
-
-__all__ = ['PastePage', 'PostPage', 'CaptchaPage']
-
-
-class PastePage(BasePage):
- def fill_paste(self, paste):
- root = self.document.getroot()
- try:
- # there is no 404, try to detect if there really is a content
- self.parser.select(root, 'id("content")/div[@class="syntax"]//ol', 1, 'xpath')
- except BrokenPageError:
- raise PasteNotFound()
- header = self.parser.select(root, 'id("content")/h3', 1, 'xpath')
- matches = re.match(r'Posted by (?P.+) on (?P.+) \(', header.text)
- paste.title = matches.groupdict().get('author')
- paste.contents = self.parser.select(root, '//textarea[@id="code"]', 1, 'xpath').text
- return paste
-
- def get_id(self):
- """
- Find out the ID from the URL
- """
- return self.group_dict['id']
-
-
-class PostPage(BasePage):
- def post(self, paste, expiration=None):
- self.browser.select_form(name='editor')
- self.browser['code2'] = paste.contents.encode(self.browser.ENCODING)
- self.browser['poster'] = paste.title.encode(self.browser.ENCODING)
- if expiration:
- self.browser['expiry'] = [expiration]
- self.browser.submit()
-
-
-class CaptchaPage(BasePage):
- pass
diff --git a/modules/pastealacon/paste.py b/modules/pastealacon/paste.py
deleted file mode 100644
index 1c86c556..00000000
--- a/modules/pastealacon/paste.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# -*- coding: utf-8 -*-
-
-# Copyright(C) 2011 Laurent Bachelier
-#
-# This file is part of weboob.
-#
-# weboob is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# weboob is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with weboob. If not, see .
-
-
-from weboob.capabilities.paste import BasePaste
-
-
-__all__ = ['PastealaconPaste']
-
-
-class PastealaconPaste(BasePaste):
- # all pastes are public
- public = True
-
- @classmethod
- def id2url(cls, _id):
- return 'http://pastealacon.com/%s' % _id
diff --git a/modules/pastealacon/test.py b/modules/pastealacon/test.py
index 7e87faed..d9c7adc8 100644
--- a/modules/pastealacon/test.py
+++ b/modules/pastealacon/test.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright(C) 2011 Laurent Bachelier
+# Copyright(C) 2011-2014 Laurent Bachelier
#
# This file is part of weboob.
#
@@ -20,10 +20,11 @@
from weboob.tools.test import BackendTest
from weboob.capabilities.base import NotLoaded
-from weboob.tools.browser import BrowserUnavailable
from weboob.capabilities.paste import PasteNotFound
+from .browser import Spam
+
class PastealaconTest(BackendTest):
BACKEND = 'pastealacon'
@@ -62,10 +63,11 @@ class PastealaconTest(BackendTest):
def test_spam(self):
p = self.backend.new_paste(None, title=u'viagra', contents=u'http://example.com/')
- self.assertRaises(BrowserUnavailable, self.backend.post_paste, p)
+ self.assertRaises(Spam, self.backend.post_paste, p)
def test_notfound(self):
- for _id in ('424242424242424242424242424242424242', 'http://pastealacon.com/424242424242424242424242424242424242'):
+ for _id in ('424242424242424242424242424242424242',
+ 'http://pastealacon.com/424242424242424242424242424242424242'):
# html method
p = self.backend.get_paste(_id)
self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['title'])