add backend for website histoires-de-sexe.net
This commit is contained in:
parent
2774fa58c9
commit
892e9e37ad
5 changed files with 285 additions and 0 deletions
3
weboob/backends/hds/__init__.py
Normal file
3
weboob/backends/hds/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from .backend import HDSBackend
|
||||
|
||||
__all__ = ['HDSBackend']
|
||||
102
weboob/backends/hds/backend.py
Normal file
102
weboob/backends/hds/backend.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
from weboob.tools.backend import BaseBackend
|
||||
from weboob.capabilities.messages import ICapMessages, Message, Thread
|
||||
|
||||
from .browser import HDSBrowser
|
||||
|
||||
|
||||
__all__ = ['HDSBackend']
|
||||
|
||||
|
||||
class HDSBackend(BaseBackend, ICapMessages):
|
||||
NAME = 'hds'
|
||||
MAINTAINER = 'Romain Bignon'
|
||||
EMAIL = 'romain@weboob.org'
|
||||
VERSION = '0.9'
|
||||
LICENSE = 'AGPLv3+'
|
||||
DESCRIPTION = "histoires-de-sexe.net french erotic novels"
|
||||
STORAGE = {'seen': []}
|
||||
BROWSER = HDSBrowser
|
||||
|
||||
#### ICapMessages ##############################################
|
||||
|
||||
def iter_threads(self):
|
||||
with self.browser:
|
||||
for story in self.browser.iter_stories():
|
||||
thread = Thread(story.id)
|
||||
thread.title = story.title
|
||||
thread.date = story.date
|
||||
thread.nb_messages = 1
|
||||
yield thread
|
||||
|
||||
def get_thread(self, id):
|
||||
if isinstance(id, Thread):
|
||||
thread = id
|
||||
id = thread.id
|
||||
else:
|
||||
thread = None
|
||||
|
||||
with self.browser:
|
||||
story = self.browser.get_story(id)
|
||||
|
||||
if not story:
|
||||
return None
|
||||
|
||||
if not thread:
|
||||
thread = Thread(story.id)
|
||||
|
||||
flags = 0
|
||||
if not thread.id in self.storage.get('seen', default=[]):
|
||||
flags |= Message.IS_UNREAD
|
||||
|
||||
thread.title = story.title
|
||||
thread.date = story.date
|
||||
thread.root = Message(thread=thread,
|
||||
id=0,
|
||||
title=story.title,
|
||||
sender=story.author or u'',
|
||||
receivers=None,
|
||||
date=thread.date,
|
||||
parent=None,
|
||||
content=story.body,
|
||||
children=[],
|
||||
flags=flags)
|
||||
|
||||
return thread
|
||||
|
||||
def iter_unread_messages(self, thread=None):
|
||||
for thread in self.iter_threads():
|
||||
if thread.id in self.storage.get('seen', default=[]):
|
||||
continue
|
||||
self.fill_thread(thread, 'root')
|
||||
yield thread.root
|
||||
|
||||
def set_message_read(self, message):
|
||||
self.storage.set('seen', self.storage.get('seen', default=[]) + [message.thread.id])
|
||||
self.storage.save()
|
||||
|
||||
def fill_thread(self, thread, fields):
|
||||
return self.get_thread(thread)
|
||||
|
||||
OBJECTS = {Thread: fill_thread}
|
||||
54
weboob/backends/hds/browser.py
Normal file
54
weboob/backends/hds/browser.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BaseBrowser
|
||||
|
||||
from .pages import ValidationPage, HomePage, HistoryPage, StoryPage
|
||||
|
||||
# Browser
|
||||
class HDSBrowser(BaseBrowser):
|
||||
ENCODING = 'ISO-8859-1'
|
||||
DOMAIN = 'histoires-de-sexe.net'
|
||||
PAGES = {'http://histoires-de-sexe.net/': ValidationPage,
|
||||
'http://histoires-de-sexe.net/menu.php': HomePage,
|
||||
'http://histoires-de-sexe.net/sexe/histoires-par-date.php.*': HistoryPage,
|
||||
'http://histoires-de-sexe.net/sexe.php\?histoire=(?P<id>.+)': StoryPage,
|
||||
}
|
||||
|
||||
def iter_stories(self):
|
||||
self.location('/sexe/histoires-par-date.php')
|
||||
n = 1
|
||||
while 1:
|
||||
count = 0
|
||||
for count, story in enumerate(self.page.iter_stories()):
|
||||
yield story
|
||||
|
||||
if count < 49:
|
||||
return
|
||||
|
||||
n += 1
|
||||
self.location('/sexe/histoires-par-date.php?p=%d' % n)
|
||||
|
||||
def get_story(self, id):
|
||||
id = int(id)
|
||||
|
||||
self.location('/sexe.php?histoire=%d' % id)
|
||||
assert self.is_on_page(StoryPage)
|
||||
return self.page.get_story()
|
||||
93
weboob/backends/hds/pages.py
Normal file
93
weboob/backends/hds/pages.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
|
||||
|
||||
__all__ = ['ValidationPage', 'HomePage', 'HistoryPage', 'StoryPage']
|
||||
|
||||
|
||||
class ValidationPage(BasePage):
|
||||
pass
|
||||
|
||||
class HomePage(BasePage):
|
||||
pass
|
||||
|
||||
class Story(object):
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
self.title = u''
|
||||
self.date = None
|
||||
self.author = None
|
||||
self.body = None
|
||||
|
||||
class HistoryPage(BasePage):
|
||||
def iter_stories(self):
|
||||
links = self.parser.select(self.document.getroot(), 'a.t11')
|
||||
story = None
|
||||
for link in links:
|
||||
if not story:
|
||||
m = re.match('.*histoire=(\d+)', link.attrib['href'])
|
||||
if not m:
|
||||
self.logger.warning('Unable to parse ID "%s"' % link.attrib['href'])
|
||||
continue
|
||||
story = Story(int(m.group(1)))
|
||||
story.title = link.text.strip()
|
||||
else:
|
||||
story.author = link.text.strip()
|
||||
date_text = link.tail.strip().split('\n')[-1].strip()
|
||||
m = re.match('(\d+)-(\d+)-(\d+)', date_text)
|
||||
if not m:
|
||||
self.logger.warning('Unable to parse datetime "%s"' % date_text)
|
||||
story = None
|
||||
continue
|
||||
story.date = datetime.datetime(int(m.group(3)),
|
||||
int(m.group(2)),
|
||||
int(m.group(1)))
|
||||
yield story
|
||||
story = None
|
||||
|
||||
class StoryPage(BasePage):
|
||||
def get_story(self):
|
||||
story = Story((self.group_dict['id']))
|
||||
story.body = u''
|
||||
story.author = self.parser.select(self.document.getroot(), 'a.t3', 1).text.strip()
|
||||
story.title = self.parser.select(self.document.getroot(), 'h1', 1).text.strip()
|
||||
date_text = self.parser.select(self.document.getroot(), 'span.t4', 1).text.strip().split('\n')[-1].strip()
|
||||
m = re.match('(\d+)-(\d+)-(\d+)', date_text)
|
||||
if m:
|
||||
story.date = datetime.datetime(int(m.group(3)),
|
||||
int(m.group(2)),
|
||||
int(m.group(1)))
|
||||
else:
|
||||
self.logger.warning('Unable to parse datetime "%s"' % date_text)
|
||||
|
||||
div = self.parser.select(self.document.getroot(), 'div[align=justify]', 1)
|
||||
for para in div.findall('br'):
|
||||
if para.text is not None:
|
||||
story.body += para.text.strip()
|
||||
story.body += '\n'
|
||||
if para.tail is not None:
|
||||
story.body += para.tail.strip()
|
||||
story.body = story.body.replace(u'\x92', "'").strip()
|
||||
return story
|
||||
33
weboob/backends/hds/test.py
Normal file
33
weboob/backends/hds/test.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.test import BackendTest
|
||||
from weboob.tools.misc import limit
|
||||
|
||||
|
||||
__all__ = ['HDSTest']
|
||||
|
||||
|
||||
class HDSTest(BackendTest):
|
||||
BACKEND = 'hds'
|
||||
|
||||
def test_new_messages(self):
|
||||
for message in limit(self.backend.iter_unread_messages(), 10):
|
||||
pass
|
||||
Loading…
Add table
Add a link
Reference in a new issue