pastebin backend: various fixes and enhancements

* Get the contents from the HTML page, eliminating one request
* Fix encoding support (everything is unicode) for all three Browser methods
* Enhance test
This commit is contained in:
Laurent Bachelier 2011-04-12 01:43:27 +02:00
commit 51d4b87ebb
4 changed files with 44 additions and 14 deletions

View file

@ -20,6 +20,7 @@
from weboob.capabilities.paste import ICapPaste
from weboob.tools.backend import BaseBackend
from weboob.capabilities.base import NotLoaded
from .browser import PastebinBrowser
from .paste import PastebinPaste
@ -38,15 +39,16 @@ class PastebinBackend(BaseBackend, ICapPaste):
BROWSER = PastebinBrowser
def get_paste(self, _id):
paste = PastebinPaste(_id)
self.browser.fill_paste(paste)
return paste
return PastebinPaste(_id)
def fill_paste(self, paste, fields):
self.browser.fill_paste(paste)
if 'contents' in fields:
contents = self.browser.get_contents(paste.id)
paste.contents = contents
# if we only want the contents
if fields == ['contents']:
if paste.contents is NotLoaded:
contents = self.browser.get_contents(paste.id)
paste.contents = contents
elif fields:
self.browser.fill_paste(paste)
return paste
def post_paste(self, paste):

View file

@ -24,8 +24,6 @@ from .pages import PastePage, PostPage
__all__ = ['PastebinBrowser']
from weboob.tools.browser import BaseBrowser
class PastebinBrowser(BaseBrowser):
DOMAIN = 'pastebin.com'
ENCODING = 'UTF-8'
@ -33,11 +31,19 @@ class PastebinBrowser(BaseBrowser):
'http://%s/' % DOMAIN: PostPage}
def fill_paste(self, paste):
"""
Get as much as information possible from the paste page
"""
self.location(paste.page_url)
return self.page.fill_paste(paste)
def get_contents(self, _id):
return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id))
"""
Get the contents from the raw URL
This is the fastest and safest method if you only want the content.
Returns unicode.
"""
return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id)).decode(self.ENCODING)
def post_paste(self, paste):
self.home()

View file

@ -29,6 +29,9 @@ class PastePage(BasePage):
'id("content_left")//div[@class="paste_box_info"]', 1, 'xpath')
paste.title = self.parser.select(header,
'//div[@class="paste_box_line1"]//h1', 1, 'xpath').text
paste.contents = self.parser.select(self.document.getroot(),
'//textarea[@id="paste_code"]', 1, 'xpath').text
return paste
def get_id(self):
"""
@ -41,6 +44,6 @@ class PastePage(BasePage):
class PostPage(BasePage):
def post(self, paste):
self.browser.select_form(name='myform')
self.browser['paste_code'] = paste.contents
self.browser['paste_name'] = paste.title
self.browser['paste_code'] = paste.contents.encode(self.browser.ENCODING)
self.browser['paste_name'] = paste.title.encode(self.browser.ENCODING)
self.browser.submit()

View file

@ -17,23 +17,42 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
from .paste import PastebinPaste
from weboob.capabilities.base import NotLoaded
class PastebinTest(BackendTest):
BACKEND = 'pastebin'
def test_get_paste(self):
# html method
p = self.backend.get_paste('7HmXwzyt')
self.backend.fillobj(p, ('title', 'contents'))
self.backend.fillobj(p, ['title'])
assert p.title == 'plop'
assert p.page_url == 'http://pastebin.com/7HmXwzyt'
assert p.contents == 'prout'
# raw method
p = self.backend.get_paste('7HmXwzyt')
self.backend.fillobj(p, ['contents'])
assert p.title is NotLoaded
assert p.page_url == 'http://pastebin.com/7HmXwzyt'
assert p.contents == 'prout'
def test_post(self):
p = PastebinPaste(None, title='ouiboube', contents='Weboob Test')
self.backend.post_paste(p)
assert p.id
assert p.title == 'ouiboube'
assert p.id in p.page_url
def test_specialchars(self):
# post a paste and get the contents through the HTML response
p1 = PastebinPaste(None, title='ouiboube', contents=u'Weboob <test>¿¡')
self.backend.post_paste(p1)
assert p1.id
# this should use the raw method to get the contents
p2 = self.backend.get_paste(p1.id)
self.backend.fillobj(p2, ['contents'])
assert p2.contents == p1.contents