diff --git a/modules/pastebin/backend.py b/modules/pastebin/backend.py index 73be24f1..816dffdf 100644 --- a/modules/pastebin/backend.py +++ b/modules/pastebin/backend.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2011-2012 Laurent Bachelier +# Copyright(C) 2011-2014 Laurent Bachelier # # This file is part of weboob. # @@ -18,18 +18,12 @@ # along with weboob. If not, see . - - -from weboob.tools.capabilities.paste import BasePasteBackend -from weboob.tools.backend import BaseBackend, BackendConfig from weboob.capabilities.base import NotLoaded +from weboob.tools.backend import BackendConfig, BaseBackend +from weboob.tools.capabilities.paste import BasePasteBackend from weboob.tools.value import Value, ValueBackendPassword -from .browser import PastebinBrowser -from .paste import PastebinPaste - - -__all__ = ['PastebinBackend'] +from .browser import PastebinBrowser, PastebinPaste class PastebinBackend(BaseBackend, BasePasteBackend): @@ -60,8 +54,8 @@ class PastebinBackend(BaseBackend, BasePasteBackend): password = self.config['password'].get() else: password = None - return self.create_browser(self.config['api_key'].get() if self.config['api_key'].get() else None, - username, password, get_home=False) + return self.create_browser(self.config['api_key'].get() or None, + username, password) def new_paste(self, *args, **kwargs): return PastebinPaste(*args, **kwargs) @@ -75,20 +69,16 @@ class PastebinBackend(BaseBackend, BasePasteBackend): return 1 def get_paste(self, _id): - with self.browser: - return self.browser.get_paste(_id) + return self.browser.get_paste(_id) def fill_paste(self, paste, fields): # if we only want the contents if fields == ['contents']: if paste.contents is NotLoaded: - with self.browser: - contents = self.browser.get_contents(paste.id) - paste.contents = contents + paste.contents = self.browser.get_contents(paste.id) # get all fields elif fields is None or len(fields): - with self.browser: - self.browser.fill_paste(paste) + self.browser.fill_paste(paste) return paste def post_paste(self, paste, max_age=None, use_api=True): @@ -96,10 +86,9 @@ class PastebinBackend(BaseBackend, BasePasteBackend): expiration = self.get_closest_expiration(max_age) else: expiration = None - with self.browser: - if use_api and self.config.get('api_key').get(): - self.browser.api_post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) - else: - self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) + if use_api and self.config.get('api_key').get(): + self.browser.api_post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) + else: + self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration)) OBJECTS = {PastebinPaste: fill_paste} diff --git a/modules/pastebin/browser.py b/modules/pastebin/browser.py index b4d488d9..40dd7d86 100644 --- a/modules/pastebin/browser.py +++ b/modules/pastebin/browser.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2011-2012 Laurent Bachelier +# Copyright(C) 2011-2014 Laurent Bachelier # # This file is part of weboob. # @@ -18,58 +18,125 @@ # along with weboob. If not, see . -from weboob.tools.browser import BaseBrowser, BrowserHTTPNotFound, BrowserIncorrectPassword -from weboob.tools.browser.decorators import id2url, check_url -from weboob.tools.ordereddict import OrderedDict - -from weboob.capabilities.paste import PasteNotFound - -from .pages import PastePage, PostPage, UserPage, LoginPage -from .paste import PastebinPaste - -import urllib import re -__all__ = ['PastebinBrowser'] +from weboob.capabilities.paste import BasePaste, PasteNotFound +from weboob.tools.browser2 import HTMLPage, LoginBrowser, need_login, URL +from weboob.tools.browser2.filters import Attr, Base, CleanText, DateTime, Env, Filter, FilterError, RawText +from weboob.tools.browser2.page import ItemElement, method, RawPage +from weboob.tools.exceptions import BrowserHTTPNotFound, BrowserIncorrectPassword, BrowserUnavailable -class BadAPIRequest(Exception): +class PastebinPaste(BasePaste): + # TODO perhaps move this logic elsewhere, remove this and id2url from capability + # (page_url is required by pastoob) + @classmethod + def id2url(cls, _id): + return '%s%s' % (PastebinBrowser.BASEURL, _id) + + +class BasePastebinPage(HTMLPage): + @property + def logged(self): + for link in self.doc.xpath('//div[@id="header_bottom"]/ul[@class="top_menu"]//ul/li/a'): + if link.text == 'logout': + return True + if link.text == 'login': + return False + raise BrowserUnavailable('Unable to determine login state') + + +class LoginPage(BasePastebinPage): + def login(self, username, password): + form = self.get_form('myform') + form['user_name'] = username + form['user_password'] = password + form.submit() + + +class CleanVisibility(Filter): + def filter(self, txt): + if txt.startswith('Public'): + return True + if txt.startswith('Unlisted') or txt.startswith('Private'): + return False + return self.default_or_raise(FilterError('Unable to get the paste visibility')) + + +class PastePage(BasePastebinPage): + @method + class fill_paste(ItemElement): + klass = PastebinPaste + + def parse(self, el): + self.env['header'] = el.find('//div[@id="content_left"]//div[@class="paste_box_info"]') + + obj_id = Env('id') + obj_title = Base(Env('header'), CleanText('.//div[@class="paste_box_line1"]//h1')) + obj_contents = RawText('//textarea[@id="paste_code"]') + obj_public = Base( + Env('header'), + CleanVisibility(Attr('.//div[@class="paste_box_line1"]//img', 'title'))) + obj__date = Base(Env('header'), DateTime(Attr('.//div[@class="paste_box_line2"]/span[1]', 'title'))) + + +class PostPage(BasePastebinPage): + def post(self, paste, expiration=None): + form = self.get_form(name='myform') + form['paste_code'] = paste.contents + form['paste_name'] = paste.title + if paste.public is True: + form['paste_private'] = '0' + elif paste.public is False: + form['paste_private'] = '1' + if expiration: + form['paste_expire_date'] = expiration + form.submit() + + +class UserPage(BasePastebinPage): pass -class PastebinBrowser(BaseBrowser): - DOMAIN = 'pastebin.com' - ENCODING = 'UTF-8' - PASTE_URL = 'http://%s/(?P\w+)' % DOMAIN - API_URL = 'http://%s/api/api_post.php' % DOMAIN - PAGES = OrderedDict(( - ('http://%s/login' % DOMAIN, LoginPage), - ('http://%s/u/(?P.+)' % DOMAIN, UserPage), - ('http://%s/' % DOMAIN, PostPage), - (PASTE_URL, PastePage), - )) +class BadAPIRequest(BrowserUnavailable): + pass + + +class PastebinBrowser(LoginBrowser): + BASEURL = 'http://pastebin.com/' + + api = URL('api/api_post\.php', RawPage) + apilogin = URL('api/api_login\.php', RawPage) + login = URL('login', LoginPage) + userprofile = URL('u/(?P.+)', UserPage) + postpage = URL('$', PostPage) + paste = URL('(?P\w+)', PastePage) + raw = URL('raw\.php\?i=(?P\w+)', RawPage) def __init__(self, api_key, *args, **kwargs): + super(PastebinBrowser, self).__init__(*args, **kwargs) self.api_key = api_key self.user_key = None - BaseBrowser.__init__(self, *args, **kwargs) + # being connected is optionnal at the module level, so require + # login only if an username is configured + if self.username: + self.post = need_login(self.post_paste) def fill_paste(self, paste): """ Get as much as information possible from the paste page """ try: - self.location(paste.page_url, no_login=True) - return self.page.fill_paste(paste) + return self.paste.stay_or_go(id=paste.id).fill_paste(paste) except BrowserHTTPNotFound: raise PasteNotFound() - @id2url(PastebinPaste.id2url) - @check_url(PASTE_URL) + @paste.id2url def get_paste(self, url): - _id = re.match('^%s$' % self.PASTE_URL, url).groupdict()['id'] - return PastebinPaste(_id) + m = self.paste.match(url) + if m: + return PastebinPaste(m.groupdict()['id']) def get_contents(self, _id): """ @@ -78,22 +145,18 @@ class PastebinBrowser(BaseBrowser): Returns unicode. """ try: - return self.readurl('http://%s/raw.php?i=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING) + return self.raw.open(id=_id).response.text except BrowserHTTPNotFound: raise PasteNotFound() def post_paste(self, paste, expiration=None): - self.home() - if not self.is_on_page(PostPage): - self.home() - self.page.post(paste, expiration=expiration) - paste.id = self.page.get_id() + self.postpage.stay_or_go().post(paste, expiration=expiration) + self.page.fill_paste(paste) def api_post_paste(self, paste, expiration=None): data = {'api_dev_key': self.api_key, 'api_option': 'paste', - 'api_paste_code': paste.contents.encode(self.ENCODING), - } + 'api_paste_code': paste.contents} if self.password: data['api_user_key'] = self.api_login() if paste.public is True: @@ -101,12 +164,12 @@ class PastebinBrowser(BaseBrowser): elif paste.public is False: data['api_paste_private'] = '1' if paste.title: - data['api_paste_name'] = paste.title.encode(self.ENCODING) + data['api_paste_name'] = paste.title if expiration: data['api_paste_expire_date'] = expiration - res = self.readurl(self.API_URL, urllib.urlencode(data)).decode(self.ENCODING) + res = self.open(self.api.build(), data=data, data_encoding='utf-8').text self._validate_api_response(res) - paste.id = re.match('^%s$' % self.PASTE_URL, res).groupdict()['id'] + paste.id = self.paste.match(res).groupdict()['id'] def api_login(self): # "The api_user_key does not expire." @@ -116,10 +179,8 @@ class PastebinBrowser(BaseBrowser): data = {'api_dev_key': self.api_key, 'api_user_name': self.username, - 'api_user_password': self.password - } - res = self.readurl('http://%s/api/api_login.php' % self.DOMAIN, - urllib.urlencode(data)).decode(self.ENCODING) + 'api_user_password': self.password} + res = self.open(self.apilogin.build(), data=data, data_encoding='utf-8').text try: self._validate_api_response(res) except BadAPIRequest as e: @@ -130,16 +191,14 @@ class PastebinBrowser(BaseBrowser): self.user_key = res return res + # TODO make it into a Page? def _validate_api_response(self, res): matches = re.match('Bad API request, (?P.+)', res) if matches: raise BadAPIRequest(matches.groupdict().get('error')) - def is_logged(self): - return self.page and self.page.is_logged() - - def login(self): - self.location('http://%s/login' % self.DOMAIN, no_login=True) + def do_login(self): + self.login.stay_or_go().login() self.page.login(self.username, self.password) - if not self.is_logged(): + if not self.page.logged: raise BrowserIncorrectPassword() diff --git a/modules/pastebin/pages.py b/modules/pastebin/pages.py deleted file mode 100644 index 01d10e00..00000000 --- a/modules/pastebin/pages.py +++ /dev/null @@ -1,85 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011-2012 Laurent Bachelier -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - -from weboob.tools.browser import BasePage, BrokenPageError - -__all__ = ['PastePage', 'PostPage', 'LoginPage'] - - -class BasePastebinPage(BasePage): - def is_logged(self): - header = self.parser.select(self.document.getroot(), - 'id("header_bottom")/ul[@class="top_menu"]', 1, 'xpath') - for link in header.xpath('//ul/li/a'): - if link.text == 'logout': - return True - if link.text == 'login': - return False - - -class LoginPage(BasePastebinPage): - def login(self, username, password): - self.browser.select_form(nr=1) - self.browser['user_name'] = username.encode(self.browser.ENCODING) - self.browser['user_password'] = password.encode(self.browser.ENCODING) - self.browser.submit() - - -class PastePage(BasePastebinPage): - def fill_paste(self, paste): - header = self.parser.select(self.document.getroot(), - 'id("content_left")//div[@class="paste_box_info"]', 1, 'xpath') - paste.title = unicode(self.parser.select(header, - '//div[@class="paste_box_line1"]//h1', 1, 'xpath').text) - paste.contents = unicode(self.parser.select(self.document.getroot(), - '//textarea[@id="paste_code"]', 1, 'xpath').text) - visibility_text = self.parser.select(header, - '//div[@class="paste_box_line1"]//img', 1, 'xpath').attrib['title'] - if visibility_text.startswith('Public'): - paste.public = True - elif visibility_text.startswith('Unlisted') or visibility_text.startswith('Private'): - paste.public = False - else: - raise BrokenPageError('Unable to get the paste visibility') - return paste - - def get_id(self): - """ - Find out the ID from the URL - """ - return self.group_dict['id'] - - -class PostPage(BasePastebinPage): - def post(self, paste, expiration=None): - self.browser.select_form(name='myform') - self.browser['paste_code'] = paste.contents.encode(self.browser.ENCODING) - self.browser['paste_name'] = paste.title.encode(self.browser.ENCODING) - if paste.public is True: - self.browser['paste_private'] = ['0'] - elif paste.public is False: - self.browser['paste_private'] = ['1'] - if expiration: - self.browser['paste_expire_date'] = [expiration] - self.browser.submit() - - -class UserPage(BasePastebinPage): - pass diff --git a/modules/pastebin/paste.py b/modules/pastebin/paste.py deleted file mode 100644 index bca243db..00000000 --- a/modules/pastebin/paste.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Laurent Bachelier -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - -from weboob.capabilities.paste import BasePaste - - -__all__ = ['PastebinPaste'] - - -class PastebinPaste(BasePaste): - @classmethod - def id2url(cls, _id): - return 'http://pastebin.com/%s' % _id diff --git a/modules/pastebin/test.py b/modules/pastebin/test.py index d6d96048..8cb075d0 100644 --- a/modules/pastebin/test.py +++ b/modules/pastebin/test.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2011 Laurent Bachelier +# Copyright(C) 2011-2014 Laurent Bachelier # # This file is part of weboob. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.test import BackendTest from weboob.capabilities.base import NotLoaded from weboob.capabilities.paste import PasteNotFound +from weboob.tools.test import BackendTest class PastebinTest(BackendTest): @@ -34,6 +34,7 @@ class PastebinTest(BackendTest): assert p.page_url == 'http://pastebin.com/7HmXwzyt' assert p.contents == u'prout' assert p.public is True + assert p._date.year == 2011 # raw method p = self.backend.get_paste(_id) @@ -44,13 +45,16 @@ class PastebinTest(BackendTest): assert p.public is NotLoaded def test_post(self): - p = self.backend.new_paste(None, title=u'ouiboube', contents=u'Weboob Test', public=True) + # we cannot test public pastes, as the website sometimes forces them as private + # there seems to be a very low post per day limit, even when logged in + p = self.backend.new_paste(None, title=u'ouiboube', contents=u'Weboob Test', public=False) self.backend.post_paste(p, max_age=600) assert p.id + assert not p.id.startswith('http://') self.backend.fill_paste(p, ['title']) assert p.title == u'ouiboube' assert p.id in p.page_url - assert p.public is True + assert p.public is False def test_specialchars(self): # post a paste and get the contents through the HTML response