pastebin: Convert to browser2, fix some little things

Stop testing public posts, this has been failing for a long time.
Retrieve the date, though it isn't in the capability yet.
Move most of the code in browser.py now that it is cleaner and simpler.

Some more improvements should be coming, but it is already in a much
better state than it was.
This commit is contained in:
Laurent Bachelier 2014-06-17 00:45:59 +02:00
commit 3010ae7a70
5 changed files with 131 additions and 194 deletions

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011-2012 Laurent Bachelier
# Copyright(C) 2011-2014 Laurent Bachelier
#
# This file is part of weboob.
#
@ -18,18 +18,12 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.capabilities.paste import BasePasteBackend
from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.capabilities.base import NotLoaded
from weboob.tools.backend import BackendConfig, BaseBackend
from weboob.tools.capabilities.paste import BasePasteBackend
from weboob.tools.value import Value, ValueBackendPassword
from .browser import PastebinBrowser
from .paste import PastebinPaste
__all__ = ['PastebinBackend']
from .browser import PastebinBrowser, PastebinPaste
class PastebinBackend(BaseBackend, BasePasteBackend):
@ -60,8 +54,8 @@ class PastebinBackend(BaseBackend, BasePasteBackend):
password = self.config['password'].get()
else:
password = None
return self.create_browser(self.config['api_key'].get() if self.config['api_key'].get() else None,
username, password, get_home=False)
return self.create_browser(self.config['api_key'].get() or None,
username, password)
def new_paste(self, *args, **kwargs):
return PastebinPaste(*args, **kwargs)
@ -75,20 +69,16 @@ class PastebinBackend(BaseBackend, BasePasteBackend):
return 1
def get_paste(self, _id):
with self.browser:
return self.browser.get_paste(_id)
return self.browser.get_paste(_id)
def fill_paste(self, paste, fields):
# if we only want the contents
if fields == ['contents']:
if paste.contents is NotLoaded:
with self.browser:
contents = self.browser.get_contents(paste.id)
paste.contents = contents
paste.contents = self.browser.get_contents(paste.id)
# get all fields
elif fields is None or len(fields):
with self.browser:
self.browser.fill_paste(paste)
self.browser.fill_paste(paste)
return paste
def post_paste(self, paste, max_age=None, use_api=True):
@ -96,10 +86,9 @@ class PastebinBackend(BaseBackend, BasePasteBackend):
expiration = self.get_closest_expiration(max_age)
else:
expiration = None
with self.browser:
if use_api and self.config.get('api_key').get():
self.browser.api_post_paste(paste, expiration=self.EXPIRATIONS.get(expiration))
else:
self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration))
if use_api and self.config.get('api_key').get():
self.browser.api_post_paste(paste, expiration=self.EXPIRATIONS.get(expiration))
else:
self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration))
OBJECTS = {PastebinPaste: fill_paste}

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011-2012 Laurent Bachelier
# Copyright(C) 2011-2014 Laurent Bachelier
#
# This file is part of weboob.
#
@ -18,58 +18,125 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound, BrowserIncorrectPassword
from weboob.tools.browser.decorators import id2url, check_url
from weboob.tools.ordereddict import OrderedDict
from weboob.capabilities.paste import PasteNotFound
from .pages import PastePage, PostPage, UserPage, LoginPage
from .paste import PastebinPaste
import urllib
import re
__all__ = ['PastebinBrowser']
from weboob.capabilities.paste import BasePaste, PasteNotFound
from weboob.tools.browser2 import HTMLPage, LoginBrowser, need_login, URL
from weboob.tools.browser2.filters import Attr, Base, CleanText, DateTime, Env, Filter, FilterError, RawText
from weboob.tools.browser2.page import ItemElement, method, RawPage
from weboob.tools.exceptions import BrowserHTTPNotFound, BrowserIncorrectPassword, BrowserUnavailable
class BadAPIRequest(Exception):
class PastebinPaste(BasePaste):
# TODO perhaps move this logic elsewhere, remove this and id2url from capability
# (page_url is required by pastoob)
@classmethod
def id2url(cls, _id):
return '%s%s' % (PastebinBrowser.BASEURL, _id)
class BasePastebinPage(HTMLPage):
@property
def logged(self):
for link in self.doc.xpath('//div[@id="header_bottom"]/ul[@class="top_menu"]//ul/li/a'):
if link.text == 'logout':
return True
if link.text == 'login':
return False
raise BrowserUnavailable('Unable to determine login state')
class LoginPage(BasePastebinPage):
def login(self, username, password):
form = self.get_form('myform')
form['user_name'] = username
form['user_password'] = password
form.submit()
class CleanVisibility(Filter):
def filter(self, txt):
if txt.startswith('Public'):
return True
if txt.startswith('Unlisted') or txt.startswith('Private'):
return False
return self.default_or_raise(FilterError('Unable to get the paste visibility'))
class PastePage(BasePastebinPage):
@method
class fill_paste(ItemElement):
klass = PastebinPaste
def parse(self, el):
self.env['header'] = el.find('//div[@id="content_left"]//div[@class="paste_box_info"]')
obj_id = Env('id')
obj_title = Base(Env('header'), CleanText('.//div[@class="paste_box_line1"]//h1'))
obj_contents = RawText('//textarea[@id="paste_code"]')
obj_public = Base(
Env('header'),
CleanVisibility(Attr('.//div[@class="paste_box_line1"]//img', 'title')))
obj__date = Base(Env('header'), DateTime(Attr('.//div[@class="paste_box_line2"]/span[1]', 'title')))
class PostPage(BasePastebinPage):
def post(self, paste, expiration=None):
form = self.get_form(name='myform')
form['paste_code'] = paste.contents
form['paste_name'] = paste.title
if paste.public is True:
form['paste_private'] = '0'
elif paste.public is False:
form['paste_private'] = '1'
if expiration:
form['paste_expire_date'] = expiration
form.submit()
class UserPage(BasePastebinPage):
pass
class PastebinBrowser(BaseBrowser):
DOMAIN = 'pastebin.com'
ENCODING = 'UTF-8'
PASTE_URL = 'http://%s/(?P<id>\w+)' % DOMAIN
API_URL = 'http://%s/api/api_post.php' % DOMAIN
PAGES = OrderedDict((
('http://%s/login' % DOMAIN, LoginPage),
('http://%s/u/(?P<username>.+)' % DOMAIN, UserPage),
('http://%s/' % DOMAIN, PostPage),
(PASTE_URL, PastePage),
))
class BadAPIRequest(BrowserUnavailable):
pass
class PastebinBrowser(LoginBrowser):
BASEURL = 'http://pastebin.com/'
api = URL('api/api_post\.php', RawPage)
apilogin = URL('api/api_login\.php', RawPage)
login = URL('login', LoginPage)
userprofile = URL('u/(?P<username>.+)', UserPage)
postpage = URL('$', PostPage)
paste = URL('(?P<id>\w+)', PastePage)
raw = URL('raw\.php\?i=(?P<id>\w+)', RawPage)
def __init__(self, api_key, *args, **kwargs):
super(PastebinBrowser, self).__init__(*args, **kwargs)
self.api_key = api_key
self.user_key = None
BaseBrowser.__init__(self, *args, **kwargs)
# being connected is optionnal at the module level, so require
# login only if an username is configured
if self.username:
self.post = need_login(self.post_paste)
def fill_paste(self, paste):
"""
Get as much as information possible from the paste page
"""
try:
self.location(paste.page_url, no_login=True)
return self.page.fill_paste(paste)
return self.paste.stay_or_go(id=paste.id).fill_paste(paste)
except BrowserHTTPNotFound:
raise PasteNotFound()
@id2url(PastebinPaste.id2url)
@check_url(PASTE_URL)
@paste.id2url
def get_paste(self, url):
_id = re.match('^%s$' % self.PASTE_URL, url).groupdict()['id']
return PastebinPaste(_id)
m = self.paste.match(url)
if m:
return PastebinPaste(m.groupdict()['id'])
def get_contents(self, _id):
"""
@ -78,22 +145,18 @@ class PastebinBrowser(BaseBrowser):
Returns unicode.
"""
try:
return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING)
return self.raw.open(id=_id).response.text
except BrowserHTTPNotFound:
raise PasteNotFound()
def post_paste(self, paste, expiration=None):
self.home()
if not self.is_on_page(PostPage):
self.home()
self.page.post(paste, expiration=expiration)
paste.id = self.page.get_id()
self.postpage.stay_or_go().post(paste, expiration=expiration)
self.page.fill_paste(paste)
def api_post_paste(self, paste, expiration=None):
data = {'api_dev_key': self.api_key,
'api_option': 'paste',
'api_paste_code': paste.contents.encode(self.ENCODING),
}
'api_paste_code': paste.contents}
if self.password:
data['api_user_key'] = self.api_login()
if paste.public is True:
@ -101,12 +164,12 @@ class PastebinBrowser(BaseBrowser):
elif paste.public is False:
data['api_paste_private'] = '1'
if paste.title:
data['api_paste_name'] = paste.title.encode(self.ENCODING)
data['api_paste_name'] = paste.title
if expiration:
data['api_paste_expire_date'] = expiration
res = self.readurl(self.API_URL, urllib.urlencode(data)).decode(self.ENCODING)
res = self.open(self.api.build(), data=data, data_encoding='utf-8').text
self._validate_api_response(res)
paste.id = re.match('^%s$' % self.PASTE_URL, res).groupdict()['id']
paste.id = self.paste.match(res).groupdict()['id']
def api_login(self):
# "The api_user_key does not expire."
@ -116,10 +179,8 @@ class PastebinBrowser(BaseBrowser):
data = {'api_dev_key': self.api_key,
'api_user_name': self.username,
'api_user_password': self.password
}
res = self.readurl('http://%s/api/api_login.php' % self.DOMAIN,
urllib.urlencode(data)).decode(self.ENCODING)
'api_user_password': self.password}
res = self.open(self.apilogin.build(), data=data, data_encoding='utf-8').text
try:
self._validate_api_response(res)
except BadAPIRequest as e:
@ -130,16 +191,14 @@ class PastebinBrowser(BaseBrowser):
self.user_key = res
return res
# TODO make it into a Page?
def _validate_api_response(self, res):
matches = re.match('Bad API request, (?P<error>.+)', res)
if matches:
raise BadAPIRequest(matches.groupdict().get('error'))
def is_logged(self):
return self.page and self.page.is_logged()
def login(self):
self.location('http://%s/login' % self.DOMAIN, no_login=True)
def do_login(self):
self.login.stay_or_go().login()
self.page.login(self.username, self.password)
if not self.is_logged():
if not self.page.logged:
raise BrowserIncorrectPassword()

View file

@ -1,85 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011-2012 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BasePage, BrokenPageError
__all__ = ['PastePage', 'PostPage', 'LoginPage']
class BasePastebinPage(BasePage):
def is_logged(self):
header = self.parser.select(self.document.getroot(),
'id("header_bottom")/ul[@class="top_menu"]', 1, 'xpath')
for link in header.xpath('//ul/li/a'):
if link.text == 'logout':
return True
if link.text == 'login':
return False
class LoginPage(BasePastebinPage):
def login(self, username, password):
self.browser.select_form(nr=1)
self.browser['user_name'] = username.encode(self.browser.ENCODING)
self.browser['user_password'] = password.encode(self.browser.ENCODING)
self.browser.submit()
class PastePage(BasePastebinPage):
def fill_paste(self, paste):
header = self.parser.select(self.document.getroot(),
'id("content_left")//div[@class="paste_box_info"]', 1, 'xpath')
paste.title = unicode(self.parser.select(header,
'//div[@class="paste_box_line1"]//h1', 1, 'xpath').text)
paste.contents = unicode(self.parser.select(self.document.getroot(),
'//textarea[@id="paste_code"]', 1, 'xpath').text)
visibility_text = self.parser.select(header,
'//div[@class="paste_box_line1"]//img', 1, 'xpath').attrib['title']
if visibility_text.startswith('Public'):
paste.public = True
elif visibility_text.startswith('Unlisted') or visibility_text.startswith('Private'):
paste.public = False
else:
raise BrokenPageError('Unable to get the paste visibility')
return paste
def get_id(self):
"""
Find out the ID from the URL
"""
return self.group_dict['id']
class PostPage(BasePastebinPage):
def post(self, paste, expiration=None):
self.browser.select_form(name='myform')
self.browser['paste_code'] = paste.contents.encode(self.browser.ENCODING)
self.browser['paste_name'] = paste.title.encode(self.browser.ENCODING)
if paste.public is True:
self.browser['paste_private'] = ['0']
elif paste.public is False:
self.browser['paste_private'] = ['1']
if expiration:
self.browser['paste_expire_date'] = [expiration]
self.browser.submit()
class UserPage(BasePastebinPage):
pass

View file

@ -1,30 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.paste import BasePaste
__all__ = ['PastebinPaste']
class PastebinPaste(BasePaste):
@classmethod
def id2url(cls, _id):
return 'http://pastebin.com/%s' % _id

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Laurent Bachelier
# Copyright(C) 2011-2014 Laurent Bachelier
#
# This file is part of weboob.
#
@ -17,9 +17,9 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
from weboob.capabilities.base import NotLoaded
from weboob.capabilities.paste import PasteNotFound
from weboob.tools.test import BackendTest
class PastebinTest(BackendTest):
@ -34,6 +34,7 @@ class PastebinTest(BackendTest):
assert p.page_url == 'http://pastebin.com/7HmXwzyt'
assert p.contents == u'prout'
assert p.public is True
assert p._date.year == 2011
# raw method
p = self.backend.get_paste(_id)
@ -44,13 +45,16 @@ class PastebinTest(BackendTest):
assert p.public is NotLoaded
def test_post(self):
p = self.backend.new_paste(None, title=u'ouiboube', contents=u'Weboob Test', public=True)
# we cannot test public pastes, as the website sometimes forces them as private
# there seems to be a very low post per day limit, even when logged in
p = self.backend.new_paste(None, title=u'ouiboube', contents=u'Weboob Test', public=False)
self.backend.post_paste(p, max_age=600)
assert p.id
assert not p.id.startswith('http://')
self.backend.fill_paste(p, ['title'])
assert p.title == u'ouiboube'
assert p.id in p.page_url
assert p.public is True
assert p.public is False
def test_specialchars(self):
# post a paste and get the contents through the HTML response