pastebin backend: various fixes and enhancements

* Get the contents from the HTML page, eliminating one request
* Fix encoding support (everything is unicode) for all three Browser methods
* Enhance test
This commit is contained in:
Laurent Bachelier 2011-04-12 01:43:27 +02:00
commit 51d4b87ebb
4 changed files with 44 additions and 14 deletions

View file

@ -20,6 +20,7 @@
from weboob.capabilities.paste import ICapPaste from weboob.capabilities.paste import ICapPaste
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend
from weboob.capabilities.base import NotLoaded
from .browser import PastebinBrowser from .browser import PastebinBrowser
from .paste import PastebinPaste from .paste import PastebinPaste
@ -38,15 +39,16 @@ class PastebinBackend(BaseBackend, ICapPaste):
BROWSER = PastebinBrowser BROWSER = PastebinBrowser
def get_paste(self, _id): def get_paste(self, _id):
paste = PastebinPaste(_id) return PastebinPaste(_id)
self.browser.fill_paste(paste)
return paste
def fill_paste(self, paste, fields): def fill_paste(self, paste, fields):
self.browser.fill_paste(paste) # if we only want the contents
if 'contents' in fields: if fields == ['contents']:
contents = self.browser.get_contents(paste.id) if paste.contents is NotLoaded:
paste.contents = contents contents = self.browser.get_contents(paste.id)
paste.contents = contents
elif fields:
self.browser.fill_paste(paste)
return paste return paste
def post_paste(self, paste): def post_paste(self, paste):

View file

@ -24,8 +24,6 @@ from .pages import PastePage, PostPage
__all__ = ['PastebinBrowser'] __all__ = ['PastebinBrowser']
from weboob.tools.browser import BaseBrowser
class PastebinBrowser(BaseBrowser): class PastebinBrowser(BaseBrowser):
DOMAIN = 'pastebin.com' DOMAIN = 'pastebin.com'
ENCODING = 'UTF-8' ENCODING = 'UTF-8'
@ -33,11 +31,19 @@ class PastebinBrowser(BaseBrowser):
'http://%s/' % DOMAIN: PostPage} 'http://%s/' % DOMAIN: PostPage}
def fill_paste(self, paste): def fill_paste(self, paste):
"""
Get as much as information possible from the paste page
"""
self.location(paste.page_url) self.location(paste.page_url)
return self.page.fill_paste(paste) return self.page.fill_paste(paste)
def get_contents(self, _id): def get_contents(self, _id):
return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id)) """
Get the contents from the raw URL
This is the fastest and safest method if you only want the content.
Returns unicode.
"""
return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id)).decode(self.ENCODING)
def post_paste(self, paste): def post_paste(self, paste):
self.home() self.home()

View file

@ -29,6 +29,9 @@ class PastePage(BasePage):
'id("content_left")//div[@class="paste_box_info"]', 1, 'xpath') 'id("content_left")//div[@class="paste_box_info"]', 1, 'xpath')
paste.title = self.parser.select(header, paste.title = self.parser.select(header,
'//div[@class="paste_box_line1"]//h1', 1, 'xpath').text '//div[@class="paste_box_line1"]//h1', 1, 'xpath').text
paste.contents = self.parser.select(self.document.getroot(),
'//textarea[@id="paste_code"]', 1, 'xpath').text
return paste
def get_id(self): def get_id(self):
""" """
@ -41,6 +44,6 @@ class PastePage(BasePage):
class PostPage(BasePage): class PostPage(BasePage):
def post(self, paste): def post(self, paste):
self.browser.select_form(name='myform') self.browser.select_form(name='myform')
self.browser['paste_code'] = paste.contents self.browser['paste_code'] = paste.contents.encode(self.browser.ENCODING)
self.browser['paste_name'] = paste.title self.browser['paste_name'] = paste.title.encode(self.browser.ENCODING)
self.browser.submit() self.browser.submit()

View file

@ -17,23 +17,42 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
from .paste import PastebinPaste from .paste import PastebinPaste
from weboob.capabilities.base import NotLoaded
class PastebinTest(BackendTest): class PastebinTest(BackendTest):
BACKEND = 'pastebin' BACKEND = 'pastebin'
def test_get_paste(self): def test_get_paste(self):
# html method
p = self.backend.get_paste('7HmXwzyt') p = self.backend.get_paste('7HmXwzyt')
self.backend.fillobj(p, ('title', 'contents')) self.backend.fillobj(p, ['title'])
assert p.title == 'plop' assert p.title == 'plop'
assert p.page_url == 'http://pastebin.com/7HmXwzyt' assert p.page_url == 'http://pastebin.com/7HmXwzyt'
assert p.contents == 'prout' assert p.contents == 'prout'
# raw method
p = self.backend.get_paste('7HmXwzyt')
self.backend.fillobj(p, ['contents'])
assert p.title is NotLoaded
assert p.page_url == 'http://pastebin.com/7HmXwzyt'
assert p.contents == 'prout'
def test_post(self): def test_post(self):
p = PastebinPaste(None, title='ouiboube', contents='Weboob Test') p = PastebinPaste(None, title='ouiboube', contents='Weboob Test')
self.backend.post_paste(p) self.backend.post_paste(p)
assert p.id assert p.id
assert p.title == 'ouiboube' assert p.title == 'ouiboube'
assert p.id in p.page_url assert p.id in p.page_url
def test_specialchars(self):
# post a paste and get the contents through the HTML response
p1 = PastebinPaste(None, title='ouiboube', contents=u'Weboob <test>¿¡')
self.backend.post_paste(p1)
assert p1.id
# this should use the raw method to get the contents
p2 = self.backend.get_paste(p1.id)
self.backend.fillobj(p2, ['contents'])
assert p2.contents == p1.contents