From cc52b8eb3cdd77544e92659e895fd155fa2e96e1 Mon Sep 17 00:00:00 2001 From: Nicolas Duhamel Date: Tue, 12 Apr 2011 16:14:46 +0200 Subject: [PATCH] Downparadise backend first commit --- weboob/applications/boobmsg/boobmsg.py | 82 ++++++++++++++- weboob/backends/downparadise/__init__.py | 22 +++++ weboob/backends/downparadise/backend.py | 99 +++++++++++++++++++ weboob/backends/downparadise/browser.py | 85 ++++++++++++++++ .../backends/downparadise/pages/__init__.py | 23 +++++ weboob/backends/downparadise/pages/base.py | 33 +++++++ weboob/backends/downparadise/pages/index.py | 59 +++++++++++ weboob/backends/downparadise/pages/ucp.py | 27 +++++ .../backends/downparadise/pages/viewforum.py | 45 +++++++++ 9 files changed, 473 insertions(+), 2 deletions(-) create mode 100644 weboob/backends/downparadise/__init__.py create mode 100644 weboob/backends/downparadise/backend.py create mode 100644 weboob/backends/downparadise/browser.py create mode 100644 weboob/backends/downparadise/pages/__init__.py create mode 100644 weboob/backends/downparadise/pages/base.py create mode 100644 weboob/backends/downparadise/pages/index.py create mode 100644 weboob/backends/downparadise/pages/ucp.py create mode 100644 weboob/backends/downparadise/pages/viewforum.py diff --git a/weboob/applications/boobmsg/boobmsg.py b/weboob/applications/boobmsg/boobmsg.py index 2b218368..4d7889ca 100644 --- a/weboob/applications/boobmsg/boobmsg.py +++ b/weboob/applications/boobmsg/boobmsg.py @@ -27,6 +27,9 @@ from weboob.tools.application.repl import ReplApplication from weboob.tools.application.formatters.iformatter import IFormatter from weboob.tools.misc import html2text +from weboob.capabilities.collection import Collection, ICapCollection, CollectionNotFound +from weboob.tools.path import Path + __all__ = ['Boobmsg'] @@ -175,9 +178,13 @@ class Boobmsg(ReplApplication): COMMANDS_FORMATTERS = {'list': 'msglist', 'show': 'msg', 'export_thread': 'msg', - 'export_all': 'msg' + 'export_all': 'msg', + 'ls': 'msglist', } - + + def __init__(self, *args, **kwargs): + ReplApplication.__init__(self, *args, **kwargs) + self.working_path = Path() def add_application_options(self, group): group.add_option('-e', '--skip-empty', action='store_true', @@ -358,3 +365,74 @@ class Boobmsg(ReplApplication): print 'Oops, you need to be in interactive mode to read messages' else: print 'Message not found' + + def do_ls(self, line): + #~ self.videos = [] + path = self.working_path.get() + if len(path) == 0: + for name in [b.NAME for b in self.weboob.iter_backends(caps=ICapCollection)]: + print name + return 0 + + def do(backend): + return backend.iter_resources(path[1:]) + + for backend, rep in self.do(do, backends=path[0]): + if isinstance(rep, Thread): + #~ self.videos.append(rep) + self.format(rep) + else: + print rep + + self.flush() + + def do_cd(self, line): + line = line.encode('utf-8') + + self.working_path.extend(line) + + req_path = self.working_path.get() + + if len(req_path) == 0: + self.prompt = '%s> ' % self.APPNAME + return 0 + + working_backend = req_path[0] + path = req_path[1:] + + if working_backend in [b.NAME for b in self.enabled_backends]: + if working_backend in [b.NAME for b in self.weboob.iter_backends(caps=ICapCollection)]: + backend = [b for b in self.enabled_backends if b.NAME == working_backend][0] + else: + print >>sys.stderr, "Error backend %s not implement Collection" % working_backend + return 1 + else: + print >>sys.stderr, "Error backend %s unknow" % working_backend + return 1 + + try: + path = backend.change_working_collection(path) + except NotImplementedError: + print >>sys.stderr, "Error backend %s not implement collection" % working_backend + self.working_path.restore() + return 1 + except CollectionNotFound: + print >>sys.stderr, "Path: %s not found" % self.working_path.tostring() + self.working_path.restore() + return 1 + + self.prompt = '%s:%s> ' % (self.APPNAME, self.working_path.tostring() ) + + def complete_cd(self, text, line, begidx, endidx): + mline = line.partition(' ')[2] + offs = len(mline) - len(text) + + path = self.working_path.get() + + if len(path) == 0: + tmp = [b.NAME for b in self.weboob.iter_backends(caps=ICapCollection)] + else: + backend = [b for b in self.enabled_backends if b.NAME == path[0]][0] + tmp = [rep for rep in backend.iter_resources(path[1:])] + + return [s[offs:] for s in tmp if s.startswith(mline)] diff --git a/weboob/backends/downparadise/__init__.py b/weboob/backends/downparadise/__init__.py new file mode 100644 index 00000000..c2e6b97d --- /dev/null +++ b/weboob/backends/downparadise/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import DownparadiseBackend + +__all__ = ['DownparadiseBackend'] diff --git a/weboob/backends/downparadise/backend.py b/weboob/backends/downparadise/backend.py new file mode 100644 index 00000000..be444a00 --- /dev/null +++ b/weboob/backends/downparadise/backend.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Nicolas Duhamel +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.backend import BaseBackend +from weboob.tools.value import Value, ValueBool, ValuesDict + +from weboob.capabilities.messages import ICapMessages, ICapMessagesPost, Message, Thread, CantSendMessage +from weboob.capabilities.collection import ICapCollection + +from .browser import Downparadise + +class DownparadiseBackend(BaseBackend, ICapCollection, ICapMessages, ICapMessagesPost): + NAME = 'downparadise' + MAINTAINER = 'Nicolas Duhamel' + EMAIL = 'nicolas@jombi.fr' + VERSION = '0.8' + LICENSE = 'AGPLv3+' + DESCRIPTION = "Downparadise message board" + + CONFIG = ValuesDict(Value('username', label='Username', regexp='.+'), + Value('password', label='Password', regexp='.+', masked=True)) + + BROWSER = Downparadise + + def create_default_browser(self): + return self.create_browser(self.config['username'], self.config['password']) + + ############################# + ## Collection + + def change_working_collection(self, splited_path): + return self.browser.change_working_forum(splited_path) + + def iter_resources(self, splited_path): + return self.browser.iter_forums(splited_path) + + ############################# + ## Messages + + def iter_threads(self): + """ + Iterates on threads, from newers to olders. + + @return [iter] Thread objects + """ + raise NotImplementedError() + + def get_thread(self, id): + """ + Get a specific thread. + + @return [Thread] the Thread object + """ + raise NotImplementedError() + + def iter_unread_messages(self, thread=None): + """ + Iterates on messages which hasn't been marked as read. + + @param thread thread name (optional) + @return [iter] Message objects + """ + raise NotImplementedError() + + def set_message_read(self, message): + """ + Set a message as read. + + @param [message] message read (or ID) + """ + raise NotImplementedError() + + ############################# + ## Message Post + + def post_message(self, message): + """ + Post a message. + + @param message Message object + @return + """ + raise NotImplementedError() diff --git a/weboob/backends/downparadise/browser.py b/weboob/backends/downparadise/browser.py new file mode 100644 index 00000000..ce35f20a --- /dev/null +++ b/weboob/backends/downparadise/browser.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Nicolas Duhamel +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +import urllib + +from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword +from weboob.capabilities.collection import Collection, CollectionNotFound + +from .pages import IndexPage, UcpPage, ViewforumPage + + +class Downparadise(BaseBrowser): + DOMAIN = 'forum.downparadise.ws' + PROTOCOL = 'http' + PAGES = {'http://forum.downparadise.ws/index.php' : IndexPage, + 'http://forum.downparadise.ws/ucp.php.*' : UcpPage, + 'http://forum.downparadise.ws/viewforum.php.*' : ViewforumPage, + } + + def home(self): + return self.location('http://forum.downparadise.ws/index.php') + + def login(self): + data = {'login': 'Connexion', + 'password': self.password, + 'username': self.username} + self.location('http://forum.downparadise.ws/ucp.php?mode=login', urllib.urlencode(data) , no_login=True) + if not self.is_logged(): + raise BrowserIncorrectPassword() + + def is_logged(self): + return (self.page and self.page.is_logged()) + + def change_working_forum(self, splited_path): + if not self.is_on_page(IndexPage): + self.home() + + collections = self.page.get_collections() + + def walk(path, collections, final=[]): + if len(path) == 0: return final + i = path.pop(0) + if i in [collection.title for collection in collections if isinstance(collection, Collection)]: + final.append(i) + else: + raise CollectionNotFound() + + return walk(path, [collection.children for collection in collections if isinstance(collection, Collection) and collection.title == i][0], final) + + return walk(splited_path, collections) + + def iter_forums(self, splited_path): + + if not self.is_on_page(IndexPage): + self.home() + + collections = self.page.get_collections() + + def walk_res(path, collections): + if not isinstance(collections, (list, Collection)): + return collections + if len(path) == 0: + return [collection.title for collection in collections ] + i = path[0] + if i not in [collection.title for collection in collections]: + raise CollectionNotFound() + + return walk_res(path[1:], [collection.children for collection in collections if collection.title == i][0]) + + return walk_res(splited_path, collections) diff --git a/weboob/backends/downparadise/pages/__init__.py b/weboob/backends/downparadise/pages/__init__.py new file mode 100644 index 00000000..977e6255 --- /dev/null +++ b/weboob/backends/downparadise/pages/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Nicolas Duhamel +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +from .index import IndexPage +from .ucp import UcpPage +from .viewforum import ViewforumPage + +__all__ = ['IndexPage', 'UcpPage', 'ViewforumPage'] diff --git a/weboob/backends/downparadise/pages/base.py b/weboob/backends/downparadise/pages/base.py new file mode 100644 index 00000000..c11cef56 --- /dev/null +++ b/weboob/backends/downparadise/pages/base.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Nicolas Duhamel +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +import re + +from weboob.tools.browser import BasePage + +__all__ = ['DownparadisePage'] + +class DownparadisePage(BasePage): + def is_logged(self): + + for form in self.document.getiterator('form'): + act = form.attrib.get('action', None) + if './ucp.php?mode=login' in act: + return False + + return True diff --git a/weboob/backends/downparadise/pages/index.py b/weboob/backends/downparadise/pages/index.py new file mode 100644 index 00000000..8235a38a --- /dev/null +++ b/weboob/backends/downparadise/pages/index.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Nicolas Duhamel +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser import BasePage +from .base import DownparadisePage + +from weboob.capabilities.collection import Collection + +__all__ = ['IndexPage'] + +class IndexPage(DownparadisePage): + + def on_loaded(self): + self.collections = [] + self.parse_forums() + + def parse_forums(self): + """ Parse all forums """ + + def do(id): + self.browser.location(id) + return self.browser.page.iter_threads() + + maintable = self.document.xpath("//div[@id='wrapheader']/table")[3] + + for line in maintable.xpath("./tr"): + forums = line.xpath(".//a[@class='forumlink']") + for fo in forums: + coll = Collection() + coll.title = fo.text.strip().encode('latin-1') + coll.id = fo.get("href") + for link in line.getiterator('a'): + if "subforum" in link.attrib.get('class', ""): + sub = Collection(title=link.text.strip().encode('latin-1')) + sub.id = link.get("href") + sub.children = do + coll.appendchild(sub) + if not coll.children: + coll.children = do + self.collections.append(coll) + + def get_collections(self): + return self.collections diff --git a/weboob/backends/downparadise/pages/ucp.py b/weboob/backends/downparadise/pages/ucp.py new file mode 100644 index 00000000..ac404a84 --- /dev/null +++ b/weboob/backends/downparadise/pages/ucp.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Nicolas Duhamel +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser import BasePage +from .base import DownparadisePage +__all__ = ['UcpPage'] + +class UcpPage(DownparadisePage): + + def on_loaded(self): + pass diff --git a/weboob/backends/downparadise/pages/viewforum.py b/weboob/backends/downparadise/pages/viewforum.py new file mode 100644 index 00000000..f02fd7bf --- /dev/null +++ b/weboob/backends/downparadise/pages/viewforum.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Nicolas Duhamel +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +import re + +from weboob.tools.browser import BasePage +from .base import DownparadisePage + +from weboob.capabilities.collection import Collection + +from weboob.capabilities.messages import Thread + +__all__ = ['ViewforumPage'] + +def remove_html_tags(data): + p = re.compile(r'<.*?>') + return p.sub('', data) + +class ViewforumPage(DownparadisePage): + + def on_loaded(self): + pass + + def iter_threads(self): + maintable = self.document.xpath("//div[@id='pagecontent']/table")[1] + iter_lignes = maintable.xpath(".//a[@class='topictitle']") + for i in iter_lignes: + thread = Thread(i.get("href")) + thread.title = remove_html_tags(self.parser.tostring(i)).strip().encode('raw_unicode_escape').decode('utf-8') + yield thread