add a tool to export the HDS stories into a sqlite database

This commit is contained in:
Romain Bignon 2011-08-03 15:58:43 +02:00
commit dff165593f
2 changed files with 142 additions and 0 deletions

123
contrib/hds/export.py Executable file
View file

@ -0,0 +1,123 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
try:
import sqlite3 as sqlite
except ImportError, e:
from pysqlite2 import dbapi2 as sqlite
import sys
import logging
level = logging.DEBUG
logging.basicConfig(stream=sys.stdout, level=level)
from weboob.backends.hds.browser import HDSBrowser
def main(filename):
try:
db = sqlite.connect(database=filename, timeout=10.0)
except sqlite.OperationalError, err:
print 'Unable to open %s database: %s' % (filename, err)
return 1
sys.stdout.write('Reading database... ')
sys.stdout.flush()
results = db.execute('SELECT id, author FROM stories')
stored = set()
authors = set()
for r in results:
stored.add(r[0])
authors.add(r[1])
stored_authors = set([s[0] for s in db.execute('SELECT name FROM authors')])
sys.stdout.write('ok\n')
br = HDSBrowser()
to_fetch = set()
sys.stdout.write('Getting stories list from website... ')
sys.stdout.flush()
for story in br.iter_stories():
if story.id in stored:
break
to_fetch.add(story.id)
authors.add(story.author.name)
sys.stdout.write(' ok\n')
sys.stdout.write('Getting %d new storiese... ' % len(to_fetch))
sys.stdout.flush()
for id in to_fetch:
story = br.get_story(id)
if not story:
logging.warning('Story #%d unavailable' % id)
continue
db.execute("""INSERT INTO stories (id, title, date, category, author, body)
VALUES (?, ?, ?, ?, ?, ?)""",
(story.id, story.title, story.date, story.category,
story.author.name, story.body))
db.commit()
sys.stdout.write('ok\n')
authors = authors.difference(stored_authors)
sys.stdout.write('Getting %d new authors... ' % len(authors))
sys.stdout.flush()
for a in authors:
author = br.get_author(a)
if not author:
logging.warning('Author %s unavailable\n' % id)
continue
db.execute("INSERT INTO authors (name, sex, description) VALUES (?, ?, ?)",
(a, author.sex, author.description))
db.commit()
sys.stdout.write(' ok\n')
return 0
if __name__ == '__main__':
if len(sys.argv) < 2:
print >>sys.stderr, 'Syntax: %s [--help] SQLITE_FILENAME' % sys.argv[0]
sys.exit(1)
if sys.argv[1] in ('-h', '--help'):
print 'Syntax: %s SQLITE_FILENAME' % sys.argv[0]
print ''
print 'Before running this software, please create the database with'
print 'this command:'
print ' $ cat scheme.sql | sqlite3 hds.sql'
print ''
print 'You can then run export.py with:'
print ' $ %s hds.sql ' % sys.argv[0]
print ''
print 'It fill the database with stories and authors information'
print 'fetched from histoires-de-sexe.net'
print ''
print 'You can next use SQL queries to find interesting stories, for'
print 'example:'
print ''
print '- To get all stories written by women'
print ' sqlite> SELECT s.id, s.title, s.category, a.name'
print ' FROM stories AS s LEFT JOIN authors AS a'
print ' WHERE a.name = s.author AND a.sex = 2;'
print '- To get all stories where it talks about bukkake'
print ' sqlite> SELECT s.id, s.title, s.category, a.name'
print ' FROM stories AS s LEFT JOIN authors AS a'
print ' WHERE a.name = s.author AND s.body LIKE \'%bukkake%\';'
sys.exit(0)
sys.exit(main(sys.argv[1]))

19
contrib/hds/scheme.sql Normal file
View file

@ -0,0 +1,19 @@
DROP TABLE authors;
CREATE TABLE authors (
name TEXT PRIMARY KEY,
sex INTEGER,
description
);
CREATE INDEX author_idx ON authors(name, sex);
DROP TABLE stories;
CREATE TABLE stories (
id INTEGER PRIMARY KEY,
title TEXT,
date TEXT,
category TEXT,
author TEXT REFERENCES authors,
body TEXT
);
CREATE INDEX stories_idx ON stories(id, category);