support repositories to manage backends (closes #747)

This commit is contained in:
Romain Bignon 2012-01-03 12:10:21 +01:00
commit 14a7a1d362
410 changed files with 1079 additions and 297 deletions

View file

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import PastealaconBackend
__all__ = ['PastealaconBackend']

View file

@ -0,0 +1,94 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
import re
from weboob.tools.capabilities.paste import BasePasteBackend
from weboob.tools.backend import BaseBackend
from weboob.capabilities.base import NotLoaded
from .browser import PastealaconBrowser
from .paste import PastealaconPaste
__all__ = ['PastealaconBackend']
class PastealaconBackend(BaseBackend, BasePasteBackend):
NAME = 'pastealacon'
MAINTAINER = 'Laurent Bachelier'
EMAIL = 'laurent@bachelier.name'
VERSION = '0.a'
DESCRIPTION = 'Paste a la con paste tool'
LICENSE = 'AGPLv3+'
BROWSER = PastealaconBrowser
EXPIRATIONS = {
24*3600: 'd',
24*3600*30: 'm',
False: 'f',
}
def new_paste(self, *args, **kwargs):
return PastealaconPaste(*args, **kwargs)
def can_post(self, contents, title=None, public=None, max_age=None):
try:
contents.encode(self.browser.ENCODING)
except UnicodeEncodeError:
return 0
if public is False:
return 0
if max_age is not None:
if self.get_closest_expiration(max_age) is None:
return 0
# the "title" is filtered (does not even accepts dots)
if not title or re.match('^\w+$', title) and len(title) <= 24:
return 2
return 1
def get_paste(self, _id):
with self.browser:
return self.browser.get_paste(_id)
def fill_paste(self, paste, fields):
# if we only want the contents
if fields == ['contents']:
if paste.contents is NotLoaded:
with self.browser:
contents = self.browser.get_contents(paste.id)
paste.contents = contents
# get all fields
elif fields is None or len(fields):
with self.browser:
self.browser.fill_paste(paste)
return paste
def post_paste(self, paste, max_age = None):
if max_age is not None:
expiration = self.get_closest_expiration(max_age)
else:
expiration = None
with self.browser:
self.browser.post_paste(paste, expiration=self.EXPIRATIONS.get(expiration))
OBJECTS = {PastealaconPaste: fill_paste}

View file

@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from mechanize import RobustFactory
import re
from weboob.tools.browser import BaseBrowser, BrowserUnavailable, BrowserHTTPNotFound
from weboob.capabilities.paste import PasteNotFound
from weboob.tools.browser.decorators import id2url, check_url
from .pages import PastePage, CaptchaPage, PostPage
from .paste import PastealaconPaste
__all__ = ['PastealaconBrowser']
class PastealaconBrowser(BaseBrowser):
DOMAIN = 'pastealacon.com'
ENCODING = 'ISO-8859-1'
PASTE_URL = 'http://%s/(?P<id>\d+)' % DOMAIN
PAGES = {PASTE_URL: PastePage,
'http://%s/%s' % (DOMAIN, re.escape('pastebin.php?captcha=1')): CaptchaPage,
'http://%s/' % DOMAIN: PostPage}
def __init__(self, *args, **kwargs):
kwargs['factory'] = RobustFactory()
BaseBrowser.__init__(self, *args, **kwargs)
@id2url(PastealaconPaste.id2url)
@check_url(PASTE_URL)
def get_paste(self, url):
_id = re.match('^%s$' % self.PASTE_URL, url).groupdict()['id']
return PastealaconPaste(_id)
def fill_paste(self, paste):
"""
Get as much as information possible from the paste page
"""
self.location(paste.page_url)
return self.page.fill_paste(paste)
def get_contents(self, _id):
"""
Get the contents from the raw URL
This is the fastest and safest method if you only want the content.
Returns unicode.
"""
try:
return self.readurl('http://%s/pastebin.php?dl=%s' % (self.DOMAIN, _id), if_fail='raise').decode(self.ENCODING)
except BrowserHTTPNotFound:
raise PasteNotFound()
def post_paste(self, paste, expiration=None):
self.home()
self.page.post(paste, expiration=expiration)
if self.is_on_page(CaptchaPage):
raise BrowserUnavailable("Detected as spam and unable to handle the captcha")
paste.id = self.page.get_id()

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

View file

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
from weboob.tools.browser import BasePage, BrokenPageError
from weboob.capabilities.paste import PasteNotFound
__all__ = ['PastePage', 'PostPage', 'CaptchaPage']
class PastePage(BasePage):
def fill_paste(self, paste):
root = self.document.getroot()
try:
# there is no 404, try to detect if there really is a content
self.parser.select(root, 'id("content")/div[@class="syntax"]//ol', 1, 'xpath')
except BrokenPageError:
raise PasteNotFound()
header = self.parser.select(root, 'id("content")/h3', 1, 'xpath')
matches = re.match(r'Posted by (?P<author>.+) on (?P<date>.+) \(', header.text)
paste.title = matches.groupdict().get('author')
paste.contents = self.parser.select(root, '//textarea[@id="code"]', 1, 'xpath').text
return paste
def get_id(self):
"""
Find out the ID from the URL
"""
return self.group_dict['id']
class PostPage(BasePage):
def post(self, paste, expiration=None):
self.browser.select_form(name='editor')
self.browser['code2'] = paste.contents.encode(self.browser.ENCODING)
self.browser['poster'] = paste.title.encode(self.browser.ENCODING)
if expiration:
self.browser['expiry'] = [expiration]
self.browser.submit()
class CaptchaPage(BasePage):
pass

View file

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.paste import BasePaste
__all__ = ['PastealaconPaste']
class PastealaconPaste(BasePaste):
# all pastes are public
public = True
@classmethod
def id2url(cls, _id):
return 'http://pastealacon.com/%s' % _id

View file

@ -0,0 +1,93 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
from weboob.capabilities.base import NotLoaded
from weboob.tools.browser import BrowserUnavailable
from weboob.capabilities.paste import PasteNotFound
class PastealaconTest(BackendTest):
BACKEND = 'pastealacon'
def _get_paste(self, _id):
# html method
p = self.backend.get_paste(_id)
self.backend.fillobj(p, ['title'])
assert p.title == 'ouiboube'
assert p.page_url.startswith('http://pastealacon.com/')
assert u'héhéhé' in p.contents
assert p.public is True
# raw method
p = self.backend.get_paste(_id)
self.backend.fillobj(p, ['contents'])
assert p.title is NotLoaded
assert p.page_url.startswith('http://pastealacon.com/')
assert u'héhéhé' in p.contents
assert p.public is True
def test_post(self):
p = self.backend.new_paste(None, title='ouiboube', contents=u'Weboob Test héhéhé')
self.backend.post_paste(p, max_age=3600*24)
assert p.id
self.backend.fill_paste(p, ['title'])
assert p.title == 'ouiboube'
assert p.id in p.page_url
assert p.public is True
# test all get methods from the Paste we just created
self._get_paste(p.id)
# same but from the full URL
self._get_paste('http://pastealacon.com/'+p.id)
def test_spam(self):
p = self.backend.new_paste(None, title='viagra', contents='http://example.com/')
self.assertRaises(BrowserUnavailable, self.backend.post_paste, p)
def test_notfound(self):
for _id in ('424242424242424242424242424242424242', 'http://pastealacon.com/424242424242424242424242424242424242'):
# html method
p = self.backend.get_paste(_id)
self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['title'])
# raw method
p = self.backend.get_paste(_id)
self.assertRaises(PasteNotFound, self.backend.fillobj, p, ['contents'])
def test_checkurl(self):
# call with an URL we can't handle with this backend
assert self.backend.get_paste('http://pastebin.com/nJG9ZFG8') is None
# same even with correct domain (IDs are numeric)
assert self.backend.get_paste('http://pastealacon.com/nJG9ZFG8') is None
assert self.backend.get_paste('nJG9ZFG8') is None
def test_can_post(self):
assert 0 == self.backend.can_post('hello', public=False)
assert 1 <= self.backend.can_post('hello', public=True)
assert 0 == self.backend.can_post('hello', public=True, max_age=600)
assert 1 <= self.backend.can_post('hello', public=True, max_age=3600*24)
assert 1 <= self.backend.can_post('hello', public=True, max_age=3600*24*3)
assert 1 <= self.backend.can_post('hello', public=True, max_age=False)
assert 1 <= self.backend.can_post('hello', public=None, max_age=False)
assert 1 <= self.backend.can_post('hello', public=True, max_age=3600*24*40)
assert 1 <= self.backend.can_post(u'héhé', public=True)
assert 0 == self.backend.can_post(u'hello ♥', public=True)