pastebin: Convert to browser2, fix some little things

Stop testing public posts, this has been failing for a long time.
Retrieve the date, though it isn't in the capability yet.
Move most of the code in browser.py now that it is cleaner and simpler.

Some more improvements should be coming, but it is already in a much
better state than it was.
This commit is contained in:
Laurent Bachelier 2014-06-17 00:45:59 +02:00
commit 3010ae7a70
5 changed files with 131 additions and 194 deletions

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011-2012 Laurent Bachelier
# Copyright(C) 2011-2014 Laurent Bachelier
#
# This file is part of weboob.
#
@ -18,58 +18,125 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound, BrowserIncorrectPassword
from weboob.tools.browser.decorators import id2url, check_url
from weboob.tools.ordereddict import OrderedDict
from weboob.capabilities.paste import PasteNotFound
from .pages import PastePage, PostPage, UserPage, LoginPage
from .paste import PastebinPaste
import urllib
import re
__all__ = ['PastebinBrowser']
from weboob.capabilities.paste import BasePaste, PasteNotFound
from weboob.tools.browser2 import HTMLPage, LoginBrowser, need_login, URL
from weboob.tools.browser2.filters import Attr, Base, CleanText, DateTime, Env, Filter, FilterError, RawText
from weboob.tools.browser2.page import ItemElement, method, RawPage
from weboob.tools.exceptions import BrowserHTTPNotFound, BrowserIncorrectPassword, BrowserUnavailable
class BadAPIRequest(Exception):
class PastebinPaste(BasePaste):
# TODO perhaps move this logic elsewhere, remove this and id2url from capability
# (page_url is required by pastoob)
@classmethod
def id2url(cls, _id):
return '%s%s' % (PastebinBrowser.BASEURL, _id)
class BasePastebinPage(HTMLPage):
@property
def logged(self):
for link in self.doc.xpath('//div[@id="header_bottom"]/ul[@class="top_menu"]//ul/li/a'):
if link.text == 'logout':
return True
if link.text == 'login':
return False
raise BrowserUnavailable('Unable to determine login state')
class LoginPage(BasePastebinPage):
def login(self, username, password):
form = self.get_form('myform')
form['user_name'] = username
form['user_password'] = password
form.submit()
class CleanVisibility(Filter):
def filter(self, txt):
if txt.startswith('Public'):
return True
if txt.startswith('Unlisted') or txt.startswith('Private'):
return False
return self.default_or_raise(FilterError('Unable to get the paste visibility'))
class PastePage(BasePastebinPage):
@method
class fill_paste(ItemElement):
klass = PastebinPaste
def parse(self, el):
self.env['header'] = el.find('//div[@id="content_left"]//div[@class="paste_box_info"]')
obj_id = Env('id')
obj_title = Base(Env('header'), CleanText('.//div[@class="paste_box_line1"]//h1'))
obj_contents = RawText('//textarea[@id="paste_code"]')
obj_public = Base(
Env('header'),
CleanVisibility(Attr('.//div[@class="paste_box_line1"]//img', 'title')))
obj__date = Base(Env('header'), DateTime(Attr('.//div[@class="paste_box_line2"]/span[1]', 'title')))
class PostPage(BasePastebinPage):
def post(self, paste, expiration=None):
form = self.get_form(name='myform')
form['paste_code'] = paste.contents
form['paste_name'] = paste.title
if paste.public is True:
form['paste_private'] = '0'
elif paste.public is False:
form['paste_private'] = '1'
if expiration:
form['paste_expire_date'] = expiration
form.submit()
class UserPage(BasePastebinPage):
pass
class PastebinBrowser(BaseBrowser):
DOMAIN = 'pastebin.com'
ENCODING = 'UTF-8'
PASTE_URL = 'http://%s/(?P<id>\w+)' % DOMAIN
API_URL = 'http://%s/api/api_post.php' % DOMAIN
PAGES = OrderedDict((
('http://%s/login' % DOMAIN, LoginPage),
('http://%s/u/(?P<username>.+)' % DOMAIN, UserPage),
('http://%s/' % DOMAIN, PostPage),
(PASTE_URL, PastePage),
))
class BadAPIRequest(BrowserUnavailable):
pass
class PastebinBrowser(LoginBrowser):
BASEURL = 'http://pastebin.com/'
api = URL('api/api_post\.php', RawPage)
apilogin = URL('api/api_login\.php', RawPage)
login = URL('login', LoginPage)
userprofile = URL('u/(?P<username>.+)', UserPage)
postpage = URL('$', PostPage)
paste = URL('(?P<id>\w+)', PastePage)
raw = URL('raw\.php\?i=(?P<id>\w+)', RawPage)
def __init__(self, api_key, *args, **kwargs):
super(PastebinBrowser, self).__init__(*args, **kwargs)
self.api_key = api_key
self.user_key = None
BaseBrowser.__init__(self, *args, **kwargs)
# being connected is optionnal at the module level, so require
# login only if an username is configured
if self.username:
self.post = need_login(self.post_paste)
def fill_paste(self, paste):
"""
Get as much as information possible from the paste page
"""
try:
self.location(paste.page_url, no_login=True)
return self.page.fill_paste(paste)
return self.paste.stay_or_go(id=paste.id).fill_paste(paste)
except BrowserHTTPNotFound:
raise PasteNotFound()
@id2url(PastebinPaste.id2url)
@check_url(PASTE_URL)
@paste.id2url
def get_paste(self, url):
_id = re.match('^%s$' % self.PASTE_URL, url).groupdict()['id']
return PastebinPaste(_id)
m = self.paste.match(url)
if m:
return PastebinPaste(m.groupdict()['id'])
def get_contents(self, _id):
"""
@ -78,22 +145,18 @@ class PastebinBrowser(BaseBrowser):
Returns unicode.
"""
try:
return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING)
return self.raw.open(id=_id).response.text
except BrowserHTTPNotFound:
raise PasteNotFound()
def post_paste(self, paste, expiration=None):
self.home()
if not self.is_on_page(PostPage):
self.home()
self.page.post(paste, expiration=expiration)
paste.id = self.page.get_id()
self.postpage.stay_or_go().post(paste, expiration=expiration)
self.page.fill_paste(paste)
def api_post_paste(self, paste, expiration=None):
data = {'api_dev_key': self.api_key,
'api_option': 'paste',
'api_paste_code': paste.contents.encode(self.ENCODING),
}
'api_paste_code': paste.contents}
if self.password:
data['api_user_key'] = self.api_login()
if paste.public is True:
@ -101,12 +164,12 @@ class PastebinBrowser(BaseBrowser):
elif paste.public is False:
data['api_paste_private'] = '1'
if paste.title:
data['api_paste_name'] = paste.title.encode(self.ENCODING)
data['api_paste_name'] = paste.title
if expiration:
data['api_paste_expire_date'] = expiration
res = self.readurl(self.API_URL, urllib.urlencode(data)).decode(self.ENCODING)
res = self.open(self.api.build(), data=data, data_encoding='utf-8').text
self._validate_api_response(res)
paste.id = re.match('^%s$' % self.PASTE_URL, res).groupdict()['id']
paste.id = self.paste.match(res).groupdict()['id']
def api_login(self):
# "The api_user_key does not expire."
@ -116,10 +179,8 @@ class PastebinBrowser(BaseBrowser):
data = {'api_dev_key': self.api_key,
'api_user_name': self.username,
'api_user_password': self.password
}
res = self.readurl('http://%s/api/api_login.php' % self.DOMAIN,
urllib.urlencode(data)).decode(self.ENCODING)
'api_user_password': self.password}
res = self.open(self.apilogin.build(), data=data, data_encoding='utf-8').text
try:
self._validate_api_response(res)
except BadAPIRequest as e:
@ -130,16 +191,14 @@ class PastebinBrowser(BaseBrowser):
self.user_key = res
return res
# TODO make it into a Page?
def _validate_api_response(self, res):
matches = re.match('Bad API request, (?P<error>.+)', res)
if matches:
raise BadAPIRequest(matches.groupdict().get('error'))
def is_logged(self):
return self.page and self.page.is_logged()
def login(self):
self.location('http://%s/login' % self.DOMAIN, no_login=True)
def do_login(self):
self.login.stay_or_go().login()
self.page.login(self.username, self.password)
if not self.is_logged():
if not self.page.logged:
raise BrowserIncorrectPassword()