pastebin: Convert to browser2, fix some little things
Stop testing public posts, this has been failing for a long time. Retrieve the date, though it isn't in the capability yet. Move most of the code in browser.py now that it is cleaner and simpler. Some more improvements should be coming, but it is already in a much better state than it was.
This commit is contained in:
parent
b013828ad0
commit
3010ae7a70
5 changed files with 131 additions and 194 deletions
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2011-2012 Laurent Bachelier
|
||||
# Copyright(C) 2011-2014 Laurent Bachelier
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
|
|
@ -18,58 +18,125 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound, BrowserIncorrectPassword
|
||||
from weboob.tools.browser.decorators import id2url, check_url
|
||||
from weboob.tools.ordereddict import OrderedDict
|
||||
|
||||
from weboob.capabilities.paste import PasteNotFound
|
||||
|
||||
from .pages import PastePage, PostPage, UserPage, LoginPage
|
||||
from .paste import PastebinPaste
|
||||
|
||||
import urllib
|
||||
import re
|
||||
|
||||
__all__ = ['PastebinBrowser']
|
||||
from weboob.capabilities.paste import BasePaste, PasteNotFound
|
||||
from weboob.tools.browser2 import HTMLPage, LoginBrowser, need_login, URL
|
||||
from weboob.tools.browser2.filters import Attr, Base, CleanText, DateTime, Env, Filter, FilterError, RawText
|
||||
from weboob.tools.browser2.page import ItemElement, method, RawPage
|
||||
from weboob.tools.exceptions import BrowserHTTPNotFound, BrowserIncorrectPassword, BrowserUnavailable
|
||||
|
||||
|
||||
class BadAPIRequest(Exception):
|
||||
class PastebinPaste(BasePaste):
|
||||
# TODO perhaps move this logic elsewhere, remove this and id2url from capability
|
||||
# (page_url is required by pastoob)
|
||||
@classmethod
|
||||
def id2url(cls, _id):
|
||||
return '%s%s' % (PastebinBrowser.BASEURL, _id)
|
||||
|
||||
|
||||
class BasePastebinPage(HTMLPage):
|
||||
@property
|
||||
def logged(self):
|
||||
for link in self.doc.xpath('//div[@id="header_bottom"]/ul[@class="top_menu"]//ul/li/a'):
|
||||
if link.text == 'logout':
|
||||
return True
|
||||
if link.text == 'login':
|
||||
return False
|
||||
raise BrowserUnavailable('Unable to determine login state')
|
||||
|
||||
|
||||
class LoginPage(BasePastebinPage):
|
||||
def login(self, username, password):
|
||||
form = self.get_form('myform')
|
||||
form['user_name'] = username
|
||||
form['user_password'] = password
|
||||
form.submit()
|
||||
|
||||
|
||||
class CleanVisibility(Filter):
|
||||
def filter(self, txt):
|
||||
if txt.startswith('Public'):
|
||||
return True
|
||||
if txt.startswith('Unlisted') or txt.startswith('Private'):
|
||||
return False
|
||||
return self.default_or_raise(FilterError('Unable to get the paste visibility'))
|
||||
|
||||
|
||||
class PastePage(BasePastebinPage):
|
||||
@method
|
||||
class fill_paste(ItemElement):
|
||||
klass = PastebinPaste
|
||||
|
||||
def parse(self, el):
|
||||
self.env['header'] = el.find('//div[@id="content_left"]//div[@class="paste_box_info"]')
|
||||
|
||||
obj_id = Env('id')
|
||||
obj_title = Base(Env('header'), CleanText('.//div[@class="paste_box_line1"]//h1'))
|
||||
obj_contents = RawText('//textarea[@id="paste_code"]')
|
||||
obj_public = Base(
|
||||
Env('header'),
|
||||
CleanVisibility(Attr('.//div[@class="paste_box_line1"]//img', 'title')))
|
||||
obj__date = Base(Env('header'), DateTime(Attr('.//div[@class="paste_box_line2"]/span[1]', 'title')))
|
||||
|
||||
|
||||
class PostPage(BasePastebinPage):
|
||||
def post(self, paste, expiration=None):
|
||||
form = self.get_form(name='myform')
|
||||
form['paste_code'] = paste.contents
|
||||
form['paste_name'] = paste.title
|
||||
if paste.public is True:
|
||||
form['paste_private'] = '0'
|
||||
elif paste.public is False:
|
||||
form['paste_private'] = '1'
|
||||
if expiration:
|
||||
form['paste_expire_date'] = expiration
|
||||
form.submit()
|
||||
|
||||
|
||||
class UserPage(BasePastebinPage):
|
||||
pass
|
||||
|
||||
|
||||
class PastebinBrowser(BaseBrowser):
|
||||
DOMAIN = 'pastebin.com'
|
||||
ENCODING = 'UTF-8'
|
||||
PASTE_URL = 'http://%s/(?P<id>\w+)' % DOMAIN
|
||||
API_URL = 'http://%s/api/api_post.php' % DOMAIN
|
||||
PAGES = OrderedDict((
|
||||
('http://%s/login' % DOMAIN, LoginPage),
|
||||
('http://%s/u/(?P<username>.+)' % DOMAIN, UserPage),
|
||||
('http://%s/' % DOMAIN, PostPage),
|
||||
(PASTE_URL, PastePage),
|
||||
))
|
||||
class BadAPIRequest(BrowserUnavailable):
|
||||
pass
|
||||
|
||||
|
||||
class PastebinBrowser(LoginBrowser):
|
||||
BASEURL = 'http://pastebin.com/'
|
||||
|
||||
api = URL('api/api_post\.php', RawPage)
|
||||
apilogin = URL('api/api_login\.php', RawPage)
|
||||
login = URL('login', LoginPage)
|
||||
userprofile = URL('u/(?P<username>.+)', UserPage)
|
||||
postpage = URL('$', PostPage)
|
||||
paste = URL('(?P<id>\w+)', PastePage)
|
||||
raw = URL('raw\.php\?i=(?P<id>\w+)', RawPage)
|
||||
|
||||
def __init__(self, api_key, *args, **kwargs):
|
||||
super(PastebinBrowser, self).__init__(*args, **kwargs)
|
||||
self.api_key = api_key
|
||||
self.user_key = None
|
||||
|
||||
BaseBrowser.__init__(self, *args, **kwargs)
|
||||
# being connected is optionnal at the module level, so require
|
||||
# login only if an username is configured
|
||||
if self.username:
|
||||
self.post = need_login(self.post_paste)
|
||||
|
||||
def fill_paste(self, paste):
|
||||
"""
|
||||
Get as much as information possible from the paste page
|
||||
"""
|
||||
try:
|
||||
self.location(paste.page_url, no_login=True)
|
||||
return self.page.fill_paste(paste)
|
||||
return self.paste.stay_or_go(id=paste.id).fill_paste(paste)
|
||||
except BrowserHTTPNotFound:
|
||||
raise PasteNotFound()
|
||||
|
||||
@id2url(PastebinPaste.id2url)
|
||||
@check_url(PASTE_URL)
|
||||
@paste.id2url
|
||||
def get_paste(self, url):
|
||||
_id = re.match('^%s$' % self.PASTE_URL, url).groupdict()['id']
|
||||
return PastebinPaste(_id)
|
||||
m = self.paste.match(url)
|
||||
if m:
|
||||
return PastebinPaste(m.groupdict()['id'])
|
||||
|
||||
def get_contents(self, _id):
|
||||
"""
|
||||
|
|
@ -78,22 +145,18 @@ class PastebinBrowser(BaseBrowser):
|
|||
Returns unicode.
|
||||
"""
|
||||
try:
|
||||
return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING)
|
||||
return self.raw.open(id=_id).response.text
|
||||
except BrowserHTTPNotFound:
|
||||
raise PasteNotFound()
|
||||
|
||||
def post_paste(self, paste, expiration=None):
|
||||
self.home()
|
||||
if not self.is_on_page(PostPage):
|
||||
self.home()
|
||||
self.page.post(paste, expiration=expiration)
|
||||
paste.id = self.page.get_id()
|
||||
self.postpage.stay_or_go().post(paste, expiration=expiration)
|
||||
self.page.fill_paste(paste)
|
||||
|
||||
def api_post_paste(self, paste, expiration=None):
|
||||
data = {'api_dev_key': self.api_key,
|
||||
'api_option': 'paste',
|
||||
'api_paste_code': paste.contents.encode(self.ENCODING),
|
||||
}
|
||||
'api_paste_code': paste.contents}
|
||||
if self.password:
|
||||
data['api_user_key'] = self.api_login()
|
||||
if paste.public is True:
|
||||
|
|
@ -101,12 +164,12 @@ class PastebinBrowser(BaseBrowser):
|
|||
elif paste.public is False:
|
||||
data['api_paste_private'] = '1'
|
||||
if paste.title:
|
||||
data['api_paste_name'] = paste.title.encode(self.ENCODING)
|
||||
data['api_paste_name'] = paste.title
|
||||
if expiration:
|
||||
data['api_paste_expire_date'] = expiration
|
||||
res = self.readurl(self.API_URL, urllib.urlencode(data)).decode(self.ENCODING)
|
||||
res = self.open(self.api.build(), data=data, data_encoding='utf-8').text
|
||||
self._validate_api_response(res)
|
||||
paste.id = re.match('^%s$' % self.PASTE_URL, res).groupdict()['id']
|
||||
paste.id = self.paste.match(res).groupdict()['id']
|
||||
|
||||
def api_login(self):
|
||||
# "The api_user_key does not expire."
|
||||
|
|
@ -116,10 +179,8 @@ class PastebinBrowser(BaseBrowser):
|
|||
|
||||
data = {'api_dev_key': self.api_key,
|
||||
'api_user_name': self.username,
|
||||
'api_user_password': self.password
|
||||
}
|
||||
res = self.readurl('http://%s/api/api_login.php' % self.DOMAIN,
|
||||
urllib.urlencode(data)).decode(self.ENCODING)
|
||||
'api_user_password': self.password}
|
||||
res = self.open(self.apilogin.build(), data=data, data_encoding='utf-8').text
|
||||
try:
|
||||
self._validate_api_response(res)
|
||||
except BadAPIRequest as e:
|
||||
|
|
@ -130,16 +191,14 @@ class PastebinBrowser(BaseBrowser):
|
|||
self.user_key = res
|
||||
return res
|
||||
|
||||
# TODO make it into a Page?
|
||||
def _validate_api_response(self, res):
|
||||
matches = re.match('Bad API request, (?P<error>.+)', res)
|
||||
if matches:
|
||||
raise BadAPIRequest(matches.groupdict().get('error'))
|
||||
|
||||
def is_logged(self):
|
||||
return self.page and self.page.is_logged()
|
||||
|
||||
def login(self):
|
||||
self.location('http://%s/login' % self.DOMAIN, no_login=True)
|
||||
def do_login(self):
|
||||
self.login.stay_or_go().login()
|
||||
self.page.login(self.username, self.password)
|
||||
if not self.is_logged():
|
||||
if not self.page.logged:
|
||||
raise BrowserIncorrectPassword()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue