From 51d4b87ebbe46f1e16e87a0b62b24e0971e85fcf Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Tue, 12 Apr 2011 01:43:27 +0200 Subject: [PATCH] pastebin backend: various fixes and enhancements * Get the contents from the HTML page, eliminating one request * Fix encoding support (everything is unicode) for all three Browser methods * Enhance test --- weboob/backends/pastebin/backend.py | 16 +++++++++------- weboob/backends/pastebin/browser.py | 12 +++++++++--- weboob/backends/pastebin/pages.py | 7 +++++-- weboob/backends/pastebin/test.py | 23 +++++++++++++++++++++-- 4 files changed, 44 insertions(+), 14 deletions(-) diff --git a/weboob/backends/pastebin/backend.py b/weboob/backends/pastebin/backend.py index 72ff6a8b..c1d9f403 100644 --- a/weboob/backends/pastebin/backend.py +++ b/weboob/backends/pastebin/backend.py @@ -20,6 +20,7 @@ from weboob.capabilities.paste import ICapPaste from weboob.tools.backend import BaseBackend +from weboob.capabilities.base import NotLoaded from .browser import PastebinBrowser from .paste import PastebinPaste @@ -38,15 +39,16 @@ class PastebinBackend(BaseBackend, ICapPaste): BROWSER = PastebinBrowser def get_paste(self, _id): - paste = PastebinPaste(_id) - self.browser.fill_paste(paste) - return paste + return PastebinPaste(_id) def fill_paste(self, paste, fields): - self.browser.fill_paste(paste) - if 'contents' in fields: - contents = self.browser.get_contents(paste.id) - paste.contents = contents + # if we only want the contents + if fields == ['contents']: + if paste.contents is NotLoaded: + contents = self.browser.get_contents(paste.id) + paste.contents = contents + elif fields: + self.browser.fill_paste(paste) return paste def post_paste(self, paste): diff --git a/weboob/backends/pastebin/browser.py b/weboob/backends/pastebin/browser.py index 7174cc12..9bb30534 100644 --- a/weboob/backends/pastebin/browser.py +++ b/weboob/backends/pastebin/browser.py @@ -24,8 +24,6 @@ from .pages import PastePage, PostPage __all__ = ['PastebinBrowser'] -from weboob.tools.browser import BaseBrowser - class PastebinBrowser(BaseBrowser): DOMAIN = 'pastebin.com' ENCODING = 'UTF-8' @@ -33,11 +31,19 @@ class PastebinBrowser(BaseBrowser): 'http://%s/' % DOMAIN: PostPage} def fill_paste(self, paste): + """ + Get as much as information possible from the paste page + """ self.location(paste.page_url) return self.page.fill_paste(paste) def get_contents(self, _id): - return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id)) + """ + Get the contents from the raw URL + This is the fastest and safest method if you only want the content. + Returns unicode. + """ + return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id)).decode(self.ENCODING) def post_paste(self, paste): self.home() diff --git a/weboob/backends/pastebin/pages.py b/weboob/backends/pastebin/pages.py index dad12f38..9a262825 100644 --- a/weboob/backends/pastebin/pages.py +++ b/weboob/backends/pastebin/pages.py @@ -29,6 +29,9 @@ class PastePage(BasePage): 'id("content_left")//div[@class="paste_box_info"]', 1, 'xpath') paste.title = self.parser.select(header, '//div[@class="paste_box_line1"]//h1', 1, 'xpath').text + paste.contents = self.parser.select(self.document.getroot(), + '//textarea[@id="paste_code"]', 1, 'xpath').text + return paste def get_id(self): """ @@ -41,6 +44,6 @@ class PastePage(BasePage): class PostPage(BasePage): def post(self, paste): self.browser.select_form(name='myform') - self.browser['paste_code'] = paste.contents - self.browser['paste_name'] = paste.title + self.browser['paste_code'] = paste.contents.encode(self.browser.ENCODING) + self.browser['paste_name'] = paste.title.encode(self.browser.ENCODING) self.browser.submit() diff --git a/weboob/backends/pastebin/test.py b/weboob/backends/pastebin/test.py index 964731ee..d7b1ff88 100644 --- a/weboob/backends/pastebin/test.py +++ b/weboob/backends/pastebin/test.py @@ -17,23 +17,42 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . - from weboob.tools.test import BackendTest from .paste import PastebinPaste +from weboob.capabilities.base import NotLoaded class PastebinTest(BackendTest): BACKEND = 'pastebin' def test_get_paste(self): + # html method p = self.backend.get_paste('7HmXwzyt') - self.backend.fillobj(p, ('title', 'contents')) + self.backend.fillobj(p, ['title']) assert p.title == 'plop' assert p.page_url == 'http://pastebin.com/7HmXwzyt' assert p.contents == 'prout' + # raw method + p = self.backend.get_paste('7HmXwzyt') + self.backend.fillobj(p, ['contents']) + assert p.title is NotLoaded + assert p.page_url == 'http://pastebin.com/7HmXwzyt' + assert p.contents == 'prout' + def test_post(self): p = PastebinPaste(None, title='ouiboube', contents='Weboob Test') self.backend.post_paste(p) assert p.id assert p.title == 'ouiboube' assert p.id in p.page_url + + def test_specialchars(self): + # post a paste and get the contents through the HTML response + p1 = PastebinPaste(None, title='ouiboube', contents=u'Weboob ¿¡') + self.backend.post_paste(p1) + assert p1.id + + # this should use the raw method to get the contents + p2 = self.backend.get_paste(p1.id) + self.backend.fillobj(p2, ['contents']) + assert p2.contents == p1.contents