From 3010ae7a706c71ea1a86970e010ad7bc4ab4d626 Mon Sep 17 00:00:00 2001 From: Laurent Bachelier Date: Tue, 17 Jun 2014 00:45:59 +0200 Subject: [PATCH] pastebin: Convert to browser2, fix some little things Stop testing public posts, this has been failing for a long time. Retrieve the date, though it isn't in the capability yet. Move most of the code in browser.py now that it is cleaner and simpler. Some more improvements should be coming, but it is already in a much better state than it was. --- modules/pastebin/backend.py | 37 +++----- modules/pastebin/browser.py | 163 ++++++++++++++++++++++++------------ modules/pastebin/pages.py | 85 ------------------- modules/pastebin/paste.py | 30 ------- modules/pastebin/test.py | 12 ++- 5 files changed, 132 insertions(+), 195 deletions(-) delete mode 100644 modules/pastebin/pages.py delete mode 100644 modules/pastebin/paste.py diff --git a/modules/pastebin/backend.py b/modules/pastebin/backend.py index 73be24f1..816dffdf 100644 --- a/modules/pastebin/backend.py +++ b/modules/pastebin/backend.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2011-2012 Laurent Bachelier +# Copyright(C) 2011-2014 Laurent Bachelier # # This file is part of weboob. # @@ -18,18 +18,12 @@ # along with weboob. If not, see . - - -from weboob.tools.capabilities.paste import BasePasteBackend -from weboob.tools.backend import BaseBackend, BackendConfig from weboob.capabilities.base import NotLoaded +from weboob.tools.backend import BackendConfig, BaseBackend +from weboob.tools.capabilities.paste import BasePasteBackend from weboob.tools.value import Value, ValueBackendPassword -from .browser import PastebinBrowser -from .paste import PastebinPaste - - -__all__ = ['PastebinBackend'] +from .browser import PastebinBrowser, PastebinPaste class PastebinBackend(BaseBackend, BasePasteBackend): @@ -60,8 +54,8 @@ class PastebinBackend(BaseBackend, BasePasteBackend): password = self.config['password'].get() else: password = None - return self.create_browser(self.config['api_key'].get() if self.config['api_key'].get() else None, - username, password, get_home=False) + return self.create_browser(self.config['api_key'].get() or None, + username, password) def new_paste(self, *args, **kwargs): return PastebinPaste(*args, **kwargs) @@ -75,20 +69,16 @@ class PastebinBackend(BaseBackend, BasePasteBackend): return 1 def get_paste(self, _id): - with self.browser: - return self.browser.get_paste(_id) + return self.browser.get_paste(_id) def fill_paste(self, paste, fields): # if we only want the contents if fields == ['contents']: if paste.contents is NotLoaded: - with self.browser: - contents = self.browser.get_contents(paste.id) - paste.contents = contents + paste.contents = self.browser.get_contents(paste.id) # get all fields elif fields is None or len(fields): - with self.browser: - self.browser.fill_paste(paste) + self.browser.fill_paste(paste) return paste def post_paste(self, paste, max_age=None, use_api=True): @@ -96,10 +86,9 @@ class PastebinBackend(BaseBackend, BasePasteBackend): expiration = self.get_closest_expiration(max_age) else: expiration = None - with self.browser: - if use_api and self.config.get('api_key').get(): - self.browser.api_post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) - else: - self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) + if use_api and self.config.get('api_key').get(): + self.browser.api_post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) + else: + self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) OBJECTS = {PastebinPaste: fill_paste} diff --git a/modules/pastebin/browser.py b/modules/pastebin/browser.py index b4d488d9..40dd7d86 100644 --- a/modules/pastebin/browser.py +++ b/modules/pastebin/browser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2011-2012 Laurent Bachelier +# Copyright(C) 2011-2014 Laurent Bachelier # # This file is part of weboob. # @@ -18,58 +18,125 @@ # along with weboob. If not, see . -from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound, BrowserIncorrectPassword -from weboob.tools.browser.decorators import id2url, check_url -from weboob.tools.ordereddict import OrderedDict - -from weboob.capabilities.paste import PasteNotFound - -from .pages import PastePage, PostPage, UserPage, LoginPage -from .paste import PastebinPaste - -import urllib import re -__all__ = ['PastebinBrowser'] +from weboob.capabilities.paste import BasePaste, PasteNotFound +from weboob.tools.browser2 import HTMLPage, LoginBrowser, need_login, URL +from weboob.tools.browser2.filters import Attr, Base, CleanText, DateTime, Env, Filter, FilterError, RawText +from weboob.tools.browser2.page import ItemElement, method, RawPage +from weboob.tools.exceptions import BrowserHTTPNotFound, BrowserIncorrectPassword, BrowserUnavailable -class BadAPIRequest(Exception): +class PastebinPaste(BasePaste): + # TODO perhaps move this logic elsewhere, remove this and id2url from capability + # (page_url is required by pastoob) + @classmethod + def id2url(cls, _id): + return '%s%s' % (PastebinBrowser.BASEURL, _id) + + +class BasePastebinPage(HTMLPage): + @property + def logged(self): + for link in self.doc.xpath('//div[@id="header_bottom"]/ul[@class="top_menu"]//ul/li/a'): + if link.text == 'logout': + return True + if link.text == 'login': + return False + raise BrowserUnavailable('Unable to determine login state') + + +class LoginPage(BasePastebinPage): + def login(self, username, password): + form = self.get_form('myform') + form['user_name'] = username + form['user_password'] = password + form.submit() + + +class CleanVisibility(Filter): + def filter(self, txt): + if txt.startswith('Public'): + return True + if txt.startswith('Unlisted') or txt.startswith('Private'): + return False + return self.default_or_raise(FilterError('Unable to get the paste visibility')) + + +class PastePage(BasePastebinPage): + @method + class fill_paste(ItemElement): + klass = PastebinPaste + + def parse(self, el): + self.env['header'] = el.find('//div[@id="content_left"]//div[@class="paste_box_info"]') + + obj_id = Env('id') + obj_title = Base(Env('header'), CleanText('.//div[@class="paste_box_line1"]//h1')) + obj_contents = RawText('//textarea[@id="paste_code"]') + obj_public = Base( + Env('header'), + CleanVisibility(Attr('.//div[@class="paste_box_line1"]//img', 'title'))) + obj__date = Base(Env('header'), DateTime(Attr('.//div[@class="paste_box_line2"]/span[1]', 'title'))) + + +class PostPage(BasePastebinPage): + def post(self, paste, expiration=None): + form = self.get_form(name='myform') + form['paste_code'] = paste.contents + form['paste_name'] = paste.title + if paste.public is True: + form['paste_private'] = '0' + elif paste.public is False: + form['paste_private'] = '1' + if expiration: + form['paste_expire_date'] = expiration + form.submit() + + +class UserPage(BasePastebinPage): pass -class PastebinBrowser(BaseBrowser): - DOMAIN = 'pastebin.com' - ENCODING = 'UTF-8' - PASTE_URL = 'http://%s/(?P\w+)' % DOMAIN - API_URL = 'http://%s/api/api_post.php' % DOMAIN - PAGES = OrderedDict(( - ('http://%s/login' % DOMAIN, LoginPage), - ('http://%s/u/(?P.+)' % DOMAIN, UserPage), - ('http://%s/' % DOMAIN, PostPage), - (PASTE_URL, PastePage), - )) +class BadAPIRequest(BrowserUnavailable): + pass + + +class PastebinBrowser(LoginBrowser): + BASEURL = 'http://pastebin.com/' + + api = URL('api/api_post\.php', RawPage) + apilogin = URL('api/api_login\.php', RawPage) + login = URL('login', LoginPage) + userprofile = URL('u/(?P.+)', UserPage) + postpage = URL('$', PostPage) + paste = URL('(?P\w+)', PastePage) + raw = URL('raw\.php\?i=(?P\w+)', RawPage) def __init__(self, api_key, *args, **kwargs): + super(PastebinBrowser, self).__init__(*args, **kwargs) self.api_key = api_key self.user_key = None - BaseBrowser.__init__(self, *args, **kwargs) + # being connected is optionnal at the module level, so require + # login only if an username is configured + if self.username: + self.post = need_login(self.post_paste) def fill_paste(self, paste): """ Get as much as information possible from the paste page """ try: - self.location(paste.page_url, no_login=True) - return self.page.fill_paste(paste) + return self.paste.stay_or_go(id=paste.id).fill_paste(paste) except BrowserHTTPNotFound: raise PasteNotFound() - @id2url(PastebinPaste.id2url) - @check_url(PASTE_URL) + @paste.id2url def get_paste(self, url): - _id = re.match('^%s$' % self.PASTE_URL, url).groupdict()['id'] - return PastebinPaste(_id) + m = self.paste.match(url) + if m: + return PastebinPaste(m.groupdict()['id']) def get_contents(self, _id): """ @@ -78,22 +145,18 @@ class PastebinBrowser(BaseBrowser): Returns unicode. """ try: - return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING) + return self.raw.open(id=_id).response.text except BrowserHTTPNotFound: raise PasteNotFound() def post_paste(self, paste, expiration=None): - self.home() - if not self.is_on_page(PostPage): - self.home() - self.page.post(paste, expiration=expiration) - paste.id = self.page.get_id() + self.postpage.stay_or_go().post(paste, expiration=expiration) + self.page.fill_paste(paste) def api_post_paste(self, paste, expiration=None): data = {'api_dev_key': self.api_key, 'api_option': 'paste', - 'api_paste_code': paste.contents.encode(self.ENCODING), - } + 'api_paste_code': paste.contents} if self.password: data['api_user_key'] = self.api_login() if paste.public is True: @@ -101,12 +164,12 @@ class PastebinBrowser(BaseBrowser): elif paste.public is False: data['api_paste_private'] = '1' if paste.title: - data['api_paste_name'] = paste.title.encode(self.ENCODING) + data['api_paste_name'] = paste.title if expiration: data['api_paste_expire_date'] = expiration - res = self.readurl(self.API_URL, urllib.urlencode(data)).decode(self.ENCODING) + res = self.open(self.api.build(), data=data, data_encoding='utf-8').text self._validate_api_response(res) - paste.id = re.match('^%s$' % self.PASTE_URL, res).groupdict()['id'] + paste.id = self.paste.match(res).groupdict()['id'] def api_login(self): # "The api_user_key does not expire." @@ -116,10 +179,8 @@ class PastebinBrowser(BaseBrowser): data = {'api_dev_key': self.api_key, 'api_user_name': self.username, - 'api_user_password': self.password - } - res = self.readurl('http://%s/api/api_login.php' % self.DOMAIN, - urllib.urlencode(data)).decode(self.ENCODING) + 'api_user_password': self.password} + res = self.open(self.apilogin.build(), data=data, data_encoding='utf-8').text try: self._validate_api_response(res) except BadAPIRequest as e: @@ -130,16 +191,14 @@ class PastebinBrowser(BaseBrowser): self.user_key = res return res + # TODO make it into a Page? def _validate_api_response(self, res): matches = re.match('Bad API request, (?P.+)', res) if matches: raise BadAPIRequest(matches.groupdict().get('error')) - def is_logged(self): - return self.page and self.page.is_logged() - - def login(self): - self.location('http://%s/login' % self.DOMAIN, no_login=True) + def do_login(self): + self.login.stay_or_go().login() self.page.login(self.username, self.password) - if not self.is_logged(): + if not self.page.logged: raise BrowserIncorrectPassword() diff --git a/modules/pastebin/pages.py b/modules/pastebin/pages.py deleted file mode 100644 index 01d10e00..00000000 --- a/modules/pastebin/pages.py +++ /dev/null @@ -1,85 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011-2012 Laurent Bachelier -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - -from weboob.tools.browser import BasePage, BrokenPageError - -__all__ = ['PastePage', 'PostPage', 'LoginPage'] - - -class BasePastebinPage(BasePage): - def is_logged(self): - header = self.parser.select(self.document.getroot(), - 'id("header_bottom")/ul[@class="top_menu"]', 1, 'xpath') - for link in header.xpath('//ul/li/a'): - if link.text == 'logout': - return True - if link.text == 'login': - return False - - -class LoginPage(BasePastebinPage): - def login(self, username, password): - self.browser.select_form(nr=1) - self.browser['user_name'] = username.encode(self.browser.ENCODING) - self.browser['user_password'] = password.encode(self.browser.ENCODING) - self.browser.submit() - - -class PastePage(BasePastebinPage): - def fill_paste(self, paste): - header = self.parser.select(self.document.getroot(), - 'id("content_left")//div[@class="paste_box_info"]', 1, 'xpath') - paste.title = unicode(self.parser.select(header, - '//div[@class="paste_box_line1"]//h1', 1, 'xpath').text) - paste.contents = unicode(self.parser.select(self.document.getroot(), - '//textarea[@id="paste_code"]', 1, 'xpath').text) - visibility_text = self.parser.select(header, - '//div[@class="paste_box_line1"]//img', 1, 'xpath').attrib['title'] - if visibility_text.startswith('Public'): - paste.public = True - elif visibility_text.startswith('Unlisted') or visibility_text.startswith('Private'): - paste.public = False - else: - raise BrokenPageError('Unable to get the paste visibility') - return paste - - def get_id(self): - """ - Find out the ID from the URL - """ - return self.group_dict['id'] - - -class PostPage(BasePastebinPage): - def post(self, paste, expiration=None): - self.browser.select_form(name='myform') - self.browser['paste_code'] = paste.contents.encode(self.browser.ENCODING) - self.browser['paste_name'] = paste.title.encode(self.browser.ENCODING) - if paste.public is True: - self.browser['paste_private'] = ['0'] - elif paste.public is False: - self.browser['paste_private'] = ['1'] - if expiration: - self.browser['paste_expire_date'] = [expiration] - self.browser.submit() - - -class UserPage(BasePastebinPage): - pass diff --git a/modules/pastebin/paste.py b/modules/pastebin/paste.py deleted file mode 100644 index bca243db..00000000 --- a/modules/pastebin/paste.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Laurent Bachelier -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - -from weboob.capabilities.paste import BasePaste - - -__all__ = ['PastebinPaste'] - - -class PastebinPaste(BasePaste): - @classmethod - def id2url(cls, _id): - return 'http://pastebin.com/%s' % _id diff --git a/modules/pastebin/test.py b/modules/pastebin/test.py index d6d96048..8cb075d0 100644 --- a/modules/pastebin/test.py +++ b/modules/pastebin/test.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2011 Laurent Bachelier +# Copyright(C) 2011-2014 Laurent Bachelier # # This file is part of weboob. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.test import BackendTest from weboob.capabilities.base import NotLoaded from weboob.capabilities.paste import PasteNotFound +from weboob.tools.test import BackendTest class PastebinTest(BackendTest): @@ -34,6 +34,7 @@ class PastebinTest(BackendTest): assert p.page_url == 'http://pastebin.com/7HmXwzyt' assert p.contents == u'prout' assert p.public is True + assert p._date.year == 2011 # raw method p = self.backend.get_paste(_id) @@ -44,13 +45,16 @@ class PastebinTest(BackendTest): assert p.public is NotLoaded def test_post(self): - p = self.backend.new_paste(None, title=u'ouiboube', contents=u'Weboob Test', public=True) + # we cannot test public pastes, as the website sometimes forces them as private + # there seems to be a very low post per day limit, even when logged in + p = self.backend.new_paste(None, title=u'ouiboube', contents=u'Weboob Test', public=False) self.backend.post_paste(p, max_age=600) assert p.id + assert not p.id.startswith('http://') self.backend.fill_paste(p, ['title']) assert p.title == u'ouiboube' assert p.id in p.page_url - assert p.public is True + assert p.public is False def test_specialchars(self): # post a paste and get the contents through the HTML response