new backend 'fourchan' (implements ICapMessages)
This commit is contained in:
parent
495f47d7f7
commit
5be1d36beb
6 changed files with 237 additions and 2 deletions
2
weboob/backends/fourchan/__init__.py
Normal file
2
weboob/backends/fourchan/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
from .backend import FourChanBackend
|
||||||
|
from .browser import FourChan
|
||||||
107
weboob/backends/fourchan/backend.py
Normal file
107
weboob/backends/fourchan/backend.py
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2010 Romain Bignon
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, version 3 of the License.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
from logging import warning
|
||||||
|
|
||||||
|
from weboob.backend import BaseBackend
|
||||||
|
from weboob.capabilities.messages import ICapMessages, Message
|
||||||
|
|
||||||
|
from .browser import FourChan
|
||||||
|
|
||||||
|
class FourChanBackend(BaseBackend, ICapMessages):
|
||||||
|
NAME = 'fourchan'
|
||||||
|
MAINTAINER = 'Romain Bignon'
|
||||||
|
EMAIL = 'romain@peerfuse.org'
|
||||||
|
VERSION = '1.0'
|
||||||
|
LICENSE = 'GPLv3'
|
||||||
|
DESCRIPTION = "4chan website"
|
||||||
|
|
||||||
|
CONFIG = {'boards': BaseBackend.ConfigField(description='Boards'),
|
||||||
|
}
|
||||||
|
STORAGE = {'boards': {}}
|
||||||
|
BROWSER = FourChan
|
||||||
|
|
||||||
|
def iter_messages(self, thread=None):
|
||||||
|
return self._iter_messages(thread, False)
|
||||||
|
|
||||||
|
def iter_new_messages(self, thread=None):
|
||||||
|
return self._iter_messages(thread, True)
|
||||||
|
|
||||||
|
def _iter_messages(self, thread, only_new):
|
||||||
|
if thread:
|
||||||
|
if '.' in thread:
|
||||||
|
board, thread = thread.split('.', 2)
|
||||||
|
return self._iter_messages_of(board, thread, only_new)
|
||||||
|
else:
|
||||||
|
warning('"%s" is not a valid ID' % thread)
|
||||||
|
else:
|
||||||
|
for board in self.config['boards'].split(' '):
|
||||||
|
return self._iter_messages_of(board, None, only_new)
|
||||||
|
|
||||||
|
def _iter_messages_of(self, board, thread_wanted, only_new):
|
||||||
|
if not board in self.storage.get('boards', default={}):
|
||||||
|
self.storage.set('boards', board, {})
|
||||||
|
|
||||||
|
if thread_wanted:
|
||||||
|
for message in self._iter_thread_messages(board, thread_wanted, only_new):
|
||||||
|
yield message
|
||||||
|
else:
|
||||||
|
for thread in self.browser.get_threads(board):
|
||||||
|
for message in self._iter_thread_messages(board, thread.id, only_new):
|
||||||
|
yield message
|
||||||
|
|
||||||
|
def _iter_thread_messages(self, board, thread, only_new):
|
||||||
|
thread = self.browser.get_thread(board, thread)
|
||||||
|
|
||||||
|
if thread.id in self.storage.get('boards', board, default={}):
|
||||||
|
self.storage.set('boards', board, thread.id, [])
|
||||||
|
new = True
|
||||||
|
else:
|
||||||
|
new = False
|
||||||
|
|
||||||
|
if not only_new or new:
|
||||||
|
yield Message('%s.%s' % (board, thread.id),
|
||||||
|
0,
|
||||||
|
thread.filename,
|
||||||
|
thread.author,
|
||||||
|
thread.datetime,
|
||||||
|
content=thread.text,
|
||||||
|
is_html=True,
|
||||||
|
is_new=new)
|
||||||
|
|
||||||
|
for comment in thread.comments:
|
||||||
|
if not comment.id in self.storage.get('boards', board, thread.id, default=[]):
|
||||||
|
self.storage.set('boards', board, thread.id, self.storage.get('boards', board, thread.id, default=[]) + [comment.id])
|
||||||
|
new = True
|
||||||
|
else:
|
||||||
|
new = False
|
||||||
|
|
||||||
|
if not only_new or new:
|
||||||
|
yield Message('%s.%s' % (board, thread.id),
|
||||||
|
comment.id,
|
||||||
|
thread.filename,
|
||||||
|
comment.author,
|
||||||
|
comment.datetime,
|
||||||
|
0,
|
||||||
|
comment.text,
|
||||||
|
is_html=True,
|
||||||
|
is_new=new)
|
||||||
|
|
||||||
|
self.storage.save()
|
||||||
|
|
||||||
|
#def post_reply(self, thread_id, reply_id, title, message):
|
||||||
|
# return self.browser.post_reply(thread_id, reply_id, title, message)
|
||||||
41
weboob/backends/fourchan/browser.py
Normal file
41
weboob/backends/fourchan/browser.py
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2010 Romain Bignon
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, version 3 of the License.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
from weboob.tools.browser import BaseBrowser
|
||||||
|
|
||||||
|
from .pages.board import BoardPage
|
||||||
|
|
||||||
|
class FourChan(BaseBrowser):
|
||||||
|
DOMAIN = 'boards.4chan.org'
|
||||||
|
PROTOCOL = 'http'
|
||||||
|
PAGES = {'http://boards.4chan.org/\w+/': BoardPage,
|
||||||
|
'http://boards.4chan.org/\w+/res/\d+': BoardPage,
|
||||||
|
}
|
||||||
|
|
||||||
|
def is_logged(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_threads(self, board):
|
||||||
|
self.location('http://boards.4chan.org/%s/' % board)
|
||||||
|
|
||||||
|
return self.page.articles
|
||||||
|
|
||||||
|
def get_thread(self, board, id):
|
||||||
|
self.location('http://boards.4chan.org/%s/res/%d' % (board, id))
|
||||||
|
|
||||||
|
assert len(self.page.articles) == 1
|
||||||
|
return self.page.articles[0]
|
||||||
0
weboob/backends/fourchan/pages/__init__.py
Normal file
0
weboob/backends/fourchan/pages/__init__.py
Normal file
85
weboob/backends/fourchan/pages/board.py
Normal file
85
weboob/backends/fourchan/pages/board.py
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright(C) 2010 Romain Bignon
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, version 3 of the License.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
|
||||||
|
import re
|
||||||
|
from logging import warning
|
||||||
|
|
||||||
|
from weboob.tools.browser import BasePage
|
||||||
|
|
||||||
|
class Message(object):
|
||||||
|
def __init__(self, browser, board, id, filename=u'', url=u''):
|
||||||
|
self.id = id
|
||||||
|
self.browser = browser
|
||||||
|
self.board = board
|
||||||
|
self.filename = filename
|
||||||
|
self.datetime = 0
|
||||||
|
self.url = url
|
||||||
|
self.author = u''
|
||||||
|
self.text = u''
|
||||||
|
self.comments = []
|
||||||
|
|
||||||
|
def add_comment(self, div):
|
||||||
|
comment = Message(self.browser, self.board, int(div.attrib.get('id', '')))
|
||||||
|
comment.author = div.cssselect('span.commentpostername')[0].text
|
||||||
|
comment.text = self.browser.parser.tostring(div.find('blockquote'))
|
||||||
|
self.comments.append(comment)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return '<Message id=%s filename=%s url=%s comments=%d>' % (self.id, self.filename, self.url, len(self.comments))
|
||||||
|
|
||||||
|
class BoardPage(BasePage):
|
||||||
|
URL_REGEXP = re.compile('http://boards.4chan.org/(\w+)/')
|
||||||
|
|
||||||
|
def on_loaded(self):
|
||||||
|
self.articles = []
|
||||||
|
|
||||||
|
m = self.URL_REGEXP.match(self.url)
|
||||||
|
if m:
|
||||||
|
self.board = m.group(1)
|
||||||
|
else:
|
||||||
|
warning('Unable to find board')
|
||||||
|
self.board = 'unknown'
|
||||||
|
|
||||||
|
forms = self.document.getroot().cssselect('form')
|
||||||
|
form = None
|
||||||
|
|
||||||
|
for f in forms:
|
||||||
|
if f.attrib.get('name', '') == 'delform':
|
||||||
|
form = f
|
||||||
|
break
|
||||||
|
|
||||||
|
if form is None:
|
||||||
|
warning('No delform :(')
|
||||||
|
|
||||||
|
article = None
|
||||||
|
for div in form.getchildren():
|
||||||
|
if div.tag == 'span' and div.attrib.get('class', '') == 'filesize':
|
||||||
|
url = div.find('a').get('href', '')
|
||||||
|
filename = 'unknown.jpg'
|
||||||
|
span = div.find('span')
|
||||||
|
if span is not None:
|
||||||
|
filename = span.text
|
||||||
|
article = Message(self.browser, self.board, 0, filename, url)
|
||||||
|
self.articles.append(article)
|
||||||
|
if div.tag == 'input' and div.attrib.get('type', 'checkbox') and div.attrib.get('value', 'delete'):
|
||||||
|
article.id = int(div.attrib.get('name', '0'))
|
||||||
|
if div.tag == 'blockquote':
|
||||||
|
article.text = self.browser.parser.tostring(div)
|
||||||
|
if div.tag == 'table':
|
||||||
|
tags = div.cssselect('td.reply')
|
||||||
|
if tags:
|
||||||
|
article.add_comment(tags[0])
|
||||||
|
|
@ -73,8 +73,8 @@ class Message:
|
||||||
return self.id == msg.id and self.thread_id == msg.thread_id
|
return self.id == msg.id and self.thread_id == msg.thread_id
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
result = '<Message id="%s" title="%s" date="%s" from="%s">' % (
|
result = '<Message id="%s.%s" title="%s" date="%s" from="%s">' % (
|
||||||
self.id, self.title, self.date, self.sender)
|
self.thread_id, self.id, self.title, self.date, self.sender)
|
||||||
return result.encode('utf-8')
|
return result.encode('utf-8')
|
||||||
|
|
||||||
class ICapMessages(ICap):
|
class ICapMessages(ICap):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue