diff --git a/weboob/backends/hds/__init__.py b/weboob/backends/hds/__init__.py new file mode 100644 index 00000000..a7677ff1 --- /dev/null +++ b/weboob/backends/hds/__init__.py @@ -0,0 +1,3 @@ +from .backend import HDSBackend + +__all__ = ['HDSBackend'] diff --git a/weboob/backends/hds/backend.py b/weboob/backends/hds/backend.py new file mode 100644 index 00000000..4819e67a --- /dev/null +++ b/weboob/backends/hds/backend.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from __future__ import with_statement + +from weboob.tools.backend import BaseBackend +from weboob.capabilities.messages import ICapMessages, Message, Thread + +from .browser import HDSBrowser + + +__all__ = ['HDSBackend'] + + +class HDSBackend(BaseBackend, ICapMessages): + NAME = 'hds' + MAINTAINER = 'Romain Bignon' + EMAIL = 'romain@weboob.org' + VERSION = '0.9' + LICENSE = 'AGPLv3+' + DESCRIPTION = "histoires-de-sexe.net french erotic novels" + STORAGE = {'seen': []} + BROWSER = HDSBrowser + + #### ICapMessages ############################################## + + def iter_threads(self): + with self.browser: + for story in self.browser.iter_stories(): + thread = Thread(story.id) + thread.title = story.title + thread.date = story.date + thread.nb_messages = 1 + yield thread + + def get_thread(self, id): + if isinstance(id, Thread): + thread = id + id = thread.id + else: + thread = None + + with self.browser: + story = self.browser.get_story(id) + + if not story: + return None + + if not thread: + thread = Thread(story.id) + + flags = 0 + if not thread.id in self.storage.get('seen', default=[]): + flags |= Message.IS_UNREAD + + thread.title = story.title + thread.date = story.date + thread.root = Message(thread=thread, + id=0, + title=story.title, + sender=story.author or u'', + receivers=None, + date=thread.date, + parent=None, + content=story.body, + children=[], + flags=flags) + + return thread + + def iter_unread_messages(self, thread=None): + for thread in self.iter_threads(): + if thread.id in self.storage.get('seen', default=[]): + continue + self.fill_thread(thread, 'root') + yield thread.root + + def set_message_read(self, message): + self.storage.set('seen', self.storage.get('seen', default=[]) + [message.thread.id]) + self.storage.save() + + def fill_thread(self, thread, fields): + return self.get_thread(thread) + + OBJECTS = {Thread: fill_thread} diff --git a/weboob/backends/hds/browser.py b/weboob/backends/hds/browser.py new file mode 100644 index 00000000..f702ec8a --- /dev/null +++ b/weboob/backends/hds/browser.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser + +from .pages import ValidationPage, HomePage, HistoryPage, StoryPage + +# Browser +class HDSBrowser(BaseBrowser): + ENCODING = 'ISO-8859-1' + DOMAIN = 'histoires-de-sexe.net' + PAGES = {'http://histoires-de-sexe.net/': ValidationPage, + 'http://histoires-de-sexe.net/menu.php': HomePage, + 'http://histoires-de-sexe.net/sexe/histoires-par-date.php.*': HistoryPage, + 'http://histoires-de-sexe.net/sexe.php\?histoire=(?P.+)': StoryPage, + } + + def iter_stories(self): + self.location('/sexe/histoires-par-date.php') + n = 1 + while 1: + count = 0 + for count, story in enumerate(self.page.iter_stories()): + yield story + + if count < 49: + return + + n += 1 + self.location('/sexe/histoires-par-date.php?p=%d' % n) + + def get_story(self, id): + id = int(id) + + self.location('/sexe.php?histoire=%d' % id) + assert self.is_on_page(StoryPage) + return self.page.get_story() diff --git a/weboob/backends/hds/pages.py b/weboob/backends/hds/pages.py new file mode 100644 index 00000000..7b7eb4ef --- /dev/null +++ b/weboob/backends/hds/pages.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import datetime +import re + +from weboob.tools.browser import BasePage + + +__all__ = ['ValidationPage', 'HomePage', 'HistoryPage', 'StoryPage'] + + +class ValidationPage(BasePage): + pass + +class HomePage(BasePage): + pass + +class Story(object): + def __init__(self, id): + self.id = id + self.title = u'' + self.date = None + self.author = None + self.body = None + +class HistoryPage(BasePage): + def iter_stories(self): + links = self.parser.select(self.document.getroot(), 'a.t11') + story = None + for link in links: + if not story: + m = re.match('.*histoire=(\d+)', link.attrib['href']) + if not m: + self.logger.warning('Unable to parse ID "%s"' % link.attrib['href']) + continue + story = Story(int(m.group(1))) + story.title = link.text.strip() + else: + story.author = link.text.strip() + date_text = link.tail.strip().split('\n')[-1].strip() + m = re.match('(\d+)-(\d+)-(\d+)', date_text) + if not m: + self.logger.warning('Unable to parse datetime "%s"' % date_text) + story = None + continue + story.date = datetime.datetime(int(m.group(3)), + int(m.group(2)), + int(m.group(1))) + yield story + story = None + +class StoryPage(BasePage): + def get_story(self): + story = Story((self.group_dict['id'])) + story.body = u'' + story.author = self.parser.select(self.document.getroot(), 'a.t3', 1).text.strip() + story.title = self.parser.select(self.document.getroot(), 'h1', 1).text.strip() + date_text = self.parser.select(self.document.getroot(), 'span.t4', 1).text.strip().split('\n')[-1].strip() + m = re.match('(\d+)-(\d+)-(\d+)', date_text) + if m: + story.date = datetime.datetime(int(m.group(3)), + int(m.group(2)), + int(m.group(1))) + else: + self.logger.warning('Unable to parse datetime "%s"' % date_text) + + div = self.parser.select(self.document.getroot(), 'div[align=justify]', 1) + for para in div.findall('br'): + if para.text is not None: + story.body += para.text.strip() + story.body += '\n' + if para.tail is not None: + story.body += para.tail.strip() + story.body = story.body.replace(u'\x92', "'").strip() + return story diff --git a/weboob/backends/hds/test.py b/weboob/backends/hds/test.py new file mode 100644 index 00000000..9f465eff --- /dev/null +++ b/weboob/backends/hds/test.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +from weboob.tools.misc import limit + + +__all__ = ['HDSTest'] + + +class HDSTest(BackendTest): + BACKEND = 'hds' + + def test_new_messages(self): + for message in limit(self.backend.iter_unread_messages(), 10): + pass