Various bugfixes and additions

This commit is contained in:
maartenkeijzer 2005-11-24 09:35:34 +00:00
commit 44876f0926
24 changed files with 889 additions and 187 deletions

View file

@ -1,5 +1,5 @@
COMPILEFLAGS=-Wno-deprecated -g -Wall #-DINTERVAL_DEBUG
OPTFLAGS= -O3 DNDEBUG
COMPILEFLAGS=-Wno-deprecated -g -Wall -mpreferred-stack-boundary=2 -falign-functions=0#-DINTERVAL_DEBUG
OPTFLAGS= #-O3 -DNDEBUG
PROFILE_FLAGS=#-pg
LDFLAGS=#-a
@ -17,9 +17,9 @@ VPATH=sym fun gen eval regression eo_interface
CXXSOURCES=FunDef.cpp Sym.cpp SymImpl.cpp SymOps.cpp sym_compile.cpp TreeBuilder.cpp LanguageTable.cpp\
Dataset.cpp ErrorMeasure.cpp Scaling.cpp TargetInfo.cpp BoundsCheck.cpp util.cpp NodeSelector.cpp\
eoSymCrossover.cpp sym_operations.cpp eoSymMutate.cpp
eoSymCrossover.cpp sym_operations.cpp eoSymMutate.cpp eoSymLambdaMutate.cpp MultiFunction.cpp
TESTPROGRAMS=test/test_compile test/testeo test/test_simplify test/test_diff test/test_lambda
TESTPROGRAMS=test/test_compile test/testeo test/test_simplify test/test_diff test/test_lambda test/test_mf
OBJS= $(CXXSOURCES:.cpp=.o) c_compile.o
@ -57,12 +57,15 @@ test/test_diff: test/test_diff.o $(SYMLIB)
test/test_lambda: test/test_lambda.o $(SYMLIB)
$(CXX) -o test/test_lambda test/test_lambda.o $(SYMLIB) ${LIBS}
test/test_mf: test/test_mf.o $(SYMLIB)
$(CXX) -o test/test_mf test/test_mf.o $(SYMLIB) ${LIBS}
# eo
../../src/libeo.a:
make -C ../../src
make -C ../../src libeo.a
../../src/utils/libeoutils.a:
make -C ../../src/utils
make -C ../../src/utils libeoutils.a
# tiny cc
tcc/: tcc.tar.gz

View file

@ -11,17 +11,17 @@ running it through a simulator, you might find what you're looking for here.
Mathsym has a few interesting characteristics. First and foremost is the
basic representation. It uses trees, but these trees are stored in a
reference counted hashtable. This means that every subtree that is alive
is stored once and only once. The reference counting mechanism takes care
of memory management.
reference counted hashtable. This means that every distinct subtree that is alive
is stored once and only once.
The reference counting mechanism takes care of memory management.
The idea of using a hashtable (for offline analysis) comes from Walter Tackett, in his
1994 dissertation. The current system is just a real-time implementation of this
idea, adding the reference counting for ease of use.
The hashtable brings overhead. It's still pretty fast, but a string based representation
would run rounds around it. However, by virtue of it storing every subtree only once, it
is fairly tight on memory. This helps tremendously when confronted with growing populations, bloat.
would run circles around it. However, by virtue of it storing every subtree only once, it
is fairly tight on memory. This helps tremendously when confronted with excessively growing populations, bloat.
The hashtable implementation can not stop bloat, but does make it more manageable. In a typical
GP run, the number of distinct subtrees is only 10-20% of the total number of subtrees.

View file

@ -32,6 +32,9 @@ class EoSym : public EO<Fitness>, public Sym {
EO<Fitness>::invalidate();
static_cast<Sym*>(this)->operator=(sym);
}
Sym& get() { return static_cast<Sym&>(*this); };
Sym get() const { return static_cast<Sym&>(*this); };
virtual void printOn(std::ostream& os) const;
virtual void readFrom(std::istream& is);

View file

@ -19,6 +19,7 @@
#define SYMEVAL_H
#include <Sym.h>
#include <FunDef.h>
#include <ErrorMeasure.h>
#include <BoundsCheck.h>
@ -52,7 +53,7 @@ class eoSymPopEval : public eoPopEvalFunc<EoType> {
for (unsigned i = 0; i < p1.size(); ++i) {
if (p1[i].invalid()) {
if (p1[i].size() < size_cap && check.in_bounds(p1[i])) {
if (expand_all(p1[i]).size() < size_cap && check.in_bounds(p1[i])) {
unevaluated.push_back(i);
tmppop.push_back( static_cast<Sym>(p1[i]) );
} else {
@ -64,7 +65,7 @@ class eoSymPopEval : public eoPopEvalFunc<EoType> {
for (unsigned i = 0; i < p2.size(); ++i) {
if (p2[i].invalid()) {
if (p2[i].size() < size_cap && check.in_bounds(p2[i])) {
if (expand_all(p2[i]).size() < size_cap && check.in_bounds(p2[i])) {
unevaluated.push_back(p1.size() + i);
tmppop.push_back( static_cast<Sym>(p2[i]) );

View file

@ -0,0 +1,47 @@
/*
* Copyright (C) 2005 Maarten Keijzer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef SYMLAMBDAMUTATE_H
#define SYMLAMBDAMUTATE_H
#include <eoOp.h>
class NodeSelector;
class Sym;
extern Sym compress(Sym, NodeSelector&);
extern Sym expand(Sym, NodeSelector&);
template <class EoType>
class eoSymLambdaMutate : public eoMonOp<EoType> {
NodeSelector& selector;
public :
eoSymLambdaMutate(NodeSelector& s) : selector(s) {}
bool operator()(EoType& tomutate) {
if (rng.flip()) {
tomutate.set( expand(tomutate, selector));
} else {
tomutate.set( compress(tomutate, selector));
}
return true;
}
};
#endif

View file

@ -26,9 +26,11 @@ std::pair<Sym, bool> do_mutate(Sym sym, double p, const LanguageTable& table) {
bool changed = false;
SymVec args = sym.args();
if (rng.flip(p)) {
token_t new_token = table.get_random_function( args.size());
if (new_token != sym.token()) changed = true;
sym = Sym(new_token, args);
token_t new_token = table.get_random_function(sym.token(), args.size());
if (new_token != sym.token()) {
changed = true;
sym = Sym(new_token, args);
}
}
for (unsigned i = 0; i < args.size(); ++i) {

View file

@ -0,0 +1,26 @@
namespace multi_function {
double plus(arg_ptr args) {
return *args[0] + *args[1];
}
double mult(arg_ptr args) {
return *args[0] * *args[1];
}
double min(arg_ptr args) {
return -**args;
}
double inv(arg_ptr args) {
return 1 / **args;
}
//template <typename f> class F { public: double operator()(double a) { return f(a); } };
double exp(arg_ptr args) {
return ::exp(**args);
}
} // namespace

View file

@ -0,0 +1,341 @@
#include <vector.h>
#include "MultiFunction.h"
#include "Sym.h"
#include "FunDef.h"
using namespace std;
typedef vector<double>::const_iterator data_ptr;
typedef vector<data_ptr> data_ptrs;
typedef data_ptrs::const_iterator arg_ptr;
#include "MultiFuncs.cpp"
typedef double (*fptr)( arg_ptr );
string print_function( fptr f) {
if (f == multi_function::plus) return "+";
if (f == multi_function::mult) return "*";
if (f == multi_function::min) return "-";
if (f == multi_function::inv) return "/";
if (f == multi_function::exp) return "e";
return "unknown";
}
struct Function {
fptr function;
arg_ptr args;
double operator()() const { return function(args); }
};
static vector<Function> token_2_function;
Sym make_binary(Sym sym) {
if (sym.args().size() == 2) return sym;
SymVec args = sym.args();
Sym an = args.back();
args.pop_back();
Sym nw = make_binary( Sym( sym.token(), args) );
args.resize(2);
args[0] = nw;
args[1] = an;
return Sym(sym.token(), args);
}
class Compiler {
public:
enum func_type {constant, variable, function};
typedef pair<func_type, unsigned> entry;
#if USE_TR1
typedef std::tr1::unordered_map<Sym, entry, HashSym> HashMap;
#else
typedef hash_map<Sym, entry, HashSym> HashMap;
#endif
HashMap map;
vector<double> constants;
vector<unsigned> variables;
vector< fptr > functions;
vector< vector<entry> > function_args;
unsigned total_args;
vector<entry> outputs;
Compiler() : total_args(0) {}
entry do_add(Sym sym) {
HashMap::iterator it = map.find(sym);
if (it == map.end()) { // new entry
token_t token = sym.token();
if (is_constant(token)) {
constants.push_back( get_constant_value(token) ); // set value
entry e = make_pair(constant, constants.size()-1);
map.insert( make_pair(sym, e) );
return e;
} else if (is_variable(token)) {
unsigned idx = get_variable_index(token);
variables.push_back(idx);
entry e = make_pair(variable, variables.size()-1);
map.insert( make_pair(sym, e) );
return e;
} // else
fptr f;
vector<entry> vec;
const SymVec& args = sym.args();
switch (token) {
case sum_token:
{
if (args.size() == 0) {
return do_add( SymConst(0.0));
}
if (args.size() == 1) {
return do_add(args[0]);
}
if (args.size() == 2) {
vec.push_back(do_add(args[0]));
vec.push_back(do_add(args[1]));
f = multi_function::plus;
//cout << "Adding + " << vec[0].second << ' ' << vec[1].second << endl;
break;
} else {
return do_add( make_binary(sym) );
}
}
case prod_token:
{
if (args.size() == 0) {
return do_add( SymConst(1.0));
}
if (args.size() == 1) {
return do_add(args[0]);
}
if (args.size() == 2) {
vec.push_back(do_add(args[0]));
vec.push_back(do_add(args[1]));
f = multi_function::mult;
//cout << "Adding * " << vec[0].second << ' ' << vec[1].second << endl;
break;
} else {
return do_add( make_binary(sym) );
}
}
case sqr_token:
{
SymVec newargs(2);
newargs[0] = args[0];
newargs[1] = args[0];
return do_add( Sym(prod_token, newargs));
}
default :
{
if (args.size() != 1) {
cerr << "Unknown function " << sym << " encountered" << endl;
exit(1);
}
vec.push_back(do_add(args[0]));
switch (token) {
case min_token: f = multi_function::min; break;
case inv_token: f = multi_function::inv; break;
case exp_token :f = multi_function::exp; break;
default :
{
cerr << "Unimplemented token encountered " << sym << endl;
exit(1);
}
}
//cout << "Adding " << print_function(f) << ' ' << vec[0].second << endl;
}
}
total_args += vec.size();
function_args.push_back(vec);
functions.push_back(f);
entry e = make_pair(function, functions.size()-1);
map.insert( make_pair(sym, e) );
return e;
}
return it->second; // entry
}
void add(Sym sym) {
entry e = do_add(sym);
outputs.push_back(e);
}
};
class MultiFunctionImpl {
public:
// input mapping
vector<unsigned> input_idx;
unsigned constant_offset;
unsigned var_offset;
// evaluation
vector<double> data;
vector<Function> funcs;
data_ptrs args;
vector<unsigned> output_idx;
MultiFunctionImpl() {}
void clear() {
input_idx.clear();
data.clear();
funcs.clear();
args.clear();
output_idx.clear();
constant_offset = 0;
}
void eval(const double* x, double* y) {
unsigned i;
// evaluate variables
for (i = constant_offset; i < constant_offset + input_idx.size(); ++i) {
data[i] = x[input_idx[i-constant_offset]];
}
for(; i < data.size(); ++i) {
data[i] = funcs[i-var_offset]();
//cout << i << " " << data[i] << endl;
}
for (i = 0; i < output_idx.size(); ++i) {
y[i] = data[output_idx[i]];
}
}
void eval(const vector<double>& x, vector<double>& y) {
eval(&x[0], &y[0]);
}
void setup(const vector<Sym>& pop) {
clear();
Compiler compiler;
for (unsigned i = 0; i < pop.size(); ++i) {
Sym sym = (expand_all(pop[i]));
compiler.add(sym);
}
// compiler is setup so get the data
constant_offset = compiler.constants.size();
var_offset = constant_offset + compiler.variables.size();
int n = var_offset + compiler.functions.size();
data.resize(n);
funcs.resize(compiler.functions.size());
args.resize(compiler.total_args);
// constants
for (unsigned i = 0; i < constant_offset; ++i) {
data[i] = compiler.constants[i];
//cout << i << ' ' << data[i] << endl;
}
// variables
input_idx = compiler.variables;
//for (unsigned i = constant_offset; i < var_offset; ++i) {
//cout << i << " x" << input_idx[i-constant_offset] << endl;
//}
// functions
unsigned which_arg = 0;
for (unsigned i = 0; i < funcs.size(); ++i) {
Function f;
f.function = compiler.functions[i];
//cout << i+var_offset << ' ' << print_function(f.function);
// interpret args
for (unsigned j = 0; j < compiler.function_args[i].size(); ++j) {
Compiler::entry e = compiler.function_args[i][j];
unsigned idx = e.second;
switch (e.first) {
case Compiler::function: idx += compiler.variables.size();
case Compiler::variable: idx += compiler.constants.size();
case Compiler::constant: {}
}
args[which_arg + j] = data.begin() + idx;
//cout << ' ' << idx << "(" << e.second << ")";
}
//cout << endl;
f.args = args.begin() + which_arg;
which_arg += compiler.function_args[i].size();
funcs[i] = f;
}
// output indices
output_idx.resize(compiler.outputs.size());
for (unsigned i = 0; i < output_idx.size(); ++i) {
output_idx[i] = compiler.outputs[i].second;
switch(compiler.outputs[i].first) {
case Compiler::function: output_idx[i] += compiler.variables.size();
case Compiler::variable: output_idx[i] += compiler.constants.size();
case Compiler::constant: {}
}
//cout << "out " << output_idx[i] << endl;
}
}
};
MultiFunction::MultiFunction(const std::vector<Sym>& pop) : pimpl(new MultiFunctionImpl) {
pimpl->setup(pop);
}
MultiFunction::~MultiFunction() { delete pimpl; }
void MultiFunction::operator()(const std::vector<double>& x, std::vector<double>& y) {
pimpl->eval(x,y);
}
void MultiFunction::operator()(const double* x, double* y) {
pimpl->eval(x,y);
}

View file

@ -0,0 +1,26 @@
#ifndef MULTIFUNCTION_H_
#define MULTIFUNCTION_H_
#include <vector>
class Sym;
class MultiFunctionImpl;
class MultiFunction {
MultiFunction& operator=(const MultiFunction&);
MultiFunction(const MultiFunction&);
MultiFunctionImpl* pimpl;
public:
MultiFunction(const std::vector<Sym>& pop);
~MultiFunction();
void operator()(const std::vector<double>& x, std::vector<double>& y);
void operator()(const double* x, double* y);
};
#endif

View file

@ -9,25 +9,42 @@ extern void symc_init() {
tcc_delete(s);
}
s = tcc_new();
if (s == 0) {
fprintf(stderr, "Tiny cc doesn't function properly");
exit(1);
}
tcc_set_output_type(s, TCC_OUTPUT_MEMORY);
}
extern void symc_compile(const char* func_str) {
extern int symc_compile(const char* func_str) {
//printf("Compiling %s\n", func_str);
tcc_compile_string(s, func_str);
int err = tcc_compile_string(s, func_str);
if (err) {
fprintf(stderr,"Compile failed");
}
return err;
}
extern void symc_link() {
tcc_relocate(s);
extern int symc_link() {
int err = tcc_relocate(s);
if (err) {
fprintf(stderr,"Compile failed");
exit(1);
}
return err;
}
extern void* symc_get_fun(const char* func_name) {
unsigned long val;
tcc_get_symbol(s, &val, func_name);
if (val == 0) {
fprintf(stderr,"getfun failed");
exit(1);
}
return (void*) val;
}

View file

@ -26,8 +26,8 @@ using namespace std;
extern "C" {
void symc_init();
void symc_compile(const char* func_str);
void symc_link();
int symc_compile(const char* func_str);
int symc_link();
void* symc_get_fun(const char* func_name);
void* symc_make(const char* func_str, const char* func_name);
}
@ -46,33 +46,6 @@ typedef std::tr1::unordered_map<Sym, string, HashSym> HashMap;
typedef hash_map<Sym, string, HashSym> HashMap;
#endif
HashMap::iterator find_entry(const Sym& sym, ostream& os, HashMap& map) {
HashMap::iterator result = map.find(sym);
if (result == map.end()) { // new entry
const SymVec& args = sym.args();
vector<string> argstr(args.size());
for (unsigned i = 0; i < args.size(); ++i) {
argstr[i] = find_entry(args[i], os, map)->second;
}
unsigned current_entry = map.size(); // current number of variables defined
// write out the code
const FunDef& fun = get_element(sym.token());
string code = fun.c_print(argstr, vector<string>());
os << "double a" << current_entry << "=" << code << ";\n";
// insert variable ref in map
ostringstream str;
str << 'a' << current_entry;
result = map.insert( make_pair(sym, str.str()) ).first; // only want iterator
}
return result;
}
// prints 'num' in reverse notation. Does not matter as it's a unique id
string make_var(unsigned num) {
string str = "a";
@ -83,37 +56,6 @@ string make_var(unsigned num) {
return str;
}
HashMap::iterator find_entry2(const Sym& sym, string& str, HashMap& map) {
HashMap::iterator result = map.find(sym);
if (result == map.end()) { // new entry
const SymVec& args = sym.args();
vector<string> argstr(args.size());
for (unsigned i = 0; i < args.size(); ++i) {
argstr[i] = find_entry2(args[i], str, map)->second;
}
string var = make_var(map.size()); // map.size(): unique id
// write out the code
const FunDef& fun = get_element(sym.token());
string code = fun.c_print(argstr, vector<string>() );
str += "double " + var + "=" + code + ";\n";
result = map.insert( make_pair(sym, var ) ).first; // only want iterator
}
return result;
}
void write_entry(const Sym& sym, ostream& os, HashMap& map, unsigned out) {
string s;
HashMap::iterator it = find_entry2(sym, s, map);
os << s;
os << "y[" << out << "]=" << it->second << ";\n";
}
template <class T>
string to_string(T t) {
ostringstream os;
@ -121,22 +63,50 @@ string to_string(T t) {
return os.str();
}
void write_entry(const Sym& sym, string& str, HashMap& map, unsigned out) {
HashMap::iterator it = find_entry2(sym, str, map);
HashMap::iterator find_entry(const Sym& sym, string& str, HashMap& map) {
HashMap::iterator result = map.find(sym);
if (result == map.end()) { // new entry
const SymVec& args = sym.args();
vector<string> argstr(args.size());
for (unsigned i = 0; i < args.size(); ++i) {
argstr[i] = find_entry(args[i], str, map)->second;
}
string var = make_var(map.size()); // map.size(): unique id
string code;
// write out the code
const FunDef& fun = get_element(sym.token());
code = fun.c_print(argstr, vector<string>() );
str += "double " + var + "=" + code + ";\n";
result = map.insert( make_pair(sym, var ) ).first; // only want iterator
}
str += "y[" + to_string(out) + "]=" + it->second + ";\n";
return result;
}
//#include <fstream>
void write_entry(const Sym& sym, string& str, HashMap& map, unsigned out) {
HashMap::iterator it = find_entry(sym, str, map);
str += "y[" + to_string(out) + "]=" + it->second + ";\n";
//cout << "wrote " << out << '\n';
}
#include <fstream>
multi_function compile(const std::vector<Sym>& syms) {
//cout << "Multifunction " << syms.size() << endl;
// static stream to avoid fragmentation of these LARGE strings
static string str;
str.clear();
str += make_prototypes();
str += "double func(const double* x, double* y) { \n ";
str += "extern double func(const double* x, double* y) { \n ";
multi_function result;
HashMap map(Sym::get_dag().size());
for (unsigned i = 0; i < syms.size(); ++i) {
@ -144,34 +114,27 @@ multi_function compile(const std::vector<Sym>& syms) {
}
str += ";}";
// ofstream cmp("compiled.c");
// cmp << str;
// cmp.close();
return (multi_function) symc_make(str.c_str(), "func");
}
multi_function compile2(const std::vector<Sym>& syms) {
// static stream to avoid fragmentation of these LARGE strings
static ostringstream os;
os.str("");
/*static int counter = 0;
ostringstream nm;
nm << "cmp/compiled" << (counter++) << ".c";
cout << "Saving as " << nm.str() << endl;
ofstream cmp(nm.str().c_str());
cmp << str;
cmp.close();
os << make_prototypes();
//cout << "Multifunction " << syms.size() << endl;
cout << "Size of map " << map.size() << endl;
*/
os << "double func(const double* x, double* y) { \n ";
HashMap map(Sym::get_dag().size());
for (unsigned i = 0; i < syms.size(); ++i) {
write_entry(syms[i], os, map, i);
result = (multi_function) symc_make(str.c_str(), "func");
if (result==0) { // error
cout << "Error in compile " << endl;
}
os << ";}";
return (multi_function) symc_make(os.str().c_str(), "func");
return result;
}
single_function compile(Sym sym) {
@ -238,7 +201,7 @@ void compile(const std::vector<Sym>& syms, std::vector<single_function>& functio
#ifdef INTERVAL_DEBUG
//cout << "Compiling " << os.str() << endl;
#endif
symc_compile(os.str().c_str());
symc_link();

View file

@ -21,8 +21,8 @@
#include <vector>
typedef double (*single_function)(const double *);
typedef void (*multi_function)(const double*, double*); // last argument is output
typedef double (*single_function)(const double []);
typedef double (*multi_function)(const double[], double[]);
/*
* Important, after every call of the functions below, the function pointers of the previous

View file

@ -65,7 +65,6 @@ Interval eval(const Sym& sym, const vector<Interval>& inputs) {
if (!valid(interv[i])) throw interval_error();
}
return language[sym.token()]->eval(interv, inputs);
}
@ -103,7 +102,6 @@ Sym SymVar(unsigned idx) {
} else if (var_token[idx] == token_t(-1)) {
var_token[idx] = add_function( make_var(idx) );
}
return Sym(var_token[idx]);
}
@ -122,27 +120,37 @@ struct HashDouble{
#if USE_TR1
typedef std::tr1::unordered_map<double, token_t> DoubleSet;
typedef std::tr1::unordered_map<Sym, token_t> LambdaSet;
#else
typedef hash_map<double, token_t, HashDouble> DoubleSet;
typedef hash_map<Sym, token_t, HashSym> LambdaSet;
#endif
static DoubleSet doubleSet; // for quick checking if a constant already exists
static vector<double> token_value;
static LambdaSet lambdaSet;
static vector<Sym> token_lambda;
static std::vector<token_t> free_list;
void delete_val(token_t token) { // clean up the information about this value
if (is_constant(token)) {
//cout << "Deleting constant token " << token << endl;
double value = token_value[token];
doubleSet.erase(value);
delete language[token];
language[token] = 0;
doubleSet.erase(value);
free_list.push_back(token);
}
if (is_lambda(token)) {
else if (is_lambda(token)) {
//cout << "Deleting lambda token " << token << endl;
Sym expression = token_lambda[token];
lambdaSet.erase(expression);
delete language[token];
language[token] = 0;
free_list.push_back(token);
@ -152,38 +160,30 @@ void delete_val(token_t token) { // clean up the information about this value
FunDef* make_const(double value);
void extend_free_list() {
unsigned sz = language.size();
language.resize(sz + sz+1); // double
for (unsigned i = sz; i < language.size(); ++i) {
free_list.push_back(i);
}
}
void extend_free_list();
Sym SymConst(double value) {
Sym::set_extra_dtor(delete_val);
DoubleSet::iterator it = doubleSet.find(value);
if (it != doubleSet.end()) {
return Sym(it->second);
return Sym(it->second); // already exists
}
if (free_list.empty()) { // make space for tokens;
extend_free_list();
token_value.resize(language.size(), 0.0);
}
token_t token = free_list.back();
free_list.pop_back();
//cout << "Creating constant with token " << token << endl;
assert(language[token] == 0);
language[token] = make_const(value);
doubleSet[value] = token;
if (token_value.size() < token) token_value.resize(token+1);
token_value[token] = value;
return Sym(token);
@ -198,10 +198,10 @@ namespace {
class Var : public FunDef {
public :
int idx;
unsigned idx;
string default_str;
Var(int _idx) : idx(_idx) {
Var(unsigned _idx) : idx(_idx) {
ostringstream os;
os << "x[" << idx << ']'; // CompiledCode expects this form
default_str = os.str();
@ -273,6 +273,10 @@ void get_constants(Sym sym, vector<double>& ret) {
}
double get_constant_value(token_t token) {
return static_cast<const Const*>(language[token])->value;
}
/** Get out the values for all constants in the expression */
vector<double> get_constants(Sym sym) {
vector<double> retval;
@ -329,24 +333,28 @@ bool is_variable(token_t token) {
return var != 0;
}
unsigned get_variable_index(token_t token) {
const Var* var = static_cast<const Var*>( language[token] );
return var->idx;
}
namespace {
class Lambda : public FunDef {
public:
Sym expression;
int arity;
Lambda(Sym expr, int arity_) : expression(expr), arity(arity_) {}
Lambda(Sym expr, int arity_) : expression(expr), arity(arity_) {}
double eval(const vector<double>& vals, const vector<double>& _) const {
return ::eval(expression, vals);
}
string c_print(const vector<string>& args, const vector<string>& str) const {
return ::c_print(expression, args);
string c_print(const vector<string>& args, const vector<string>& _) const {
return string("/*f*/") + ::c_print(expression, args) + string("/*eof*/");
}
Interval eval(const vector<Interval>& args, const vector<Interval>& inputs) const {
Interval eval(const vector<Interval>& args, const vector<Interval>& _) const {
return ::eval(expression, args);
}
@ -381,41 +389,155 @@ class Lambda : public FunDef {
}
}
bool is_lambda(token_t token) {
const Lambda* lambda = dynamic_cast<const Lambda*>( language[token]);
return lambda != 0;
}
Sym SymLambda(Sym expression) {
vector<Sym> args;
Sym expr = normalize(expression, args);
std::ostream& print_list(Sym sym, ostream& os) {
os << sym.token() << ' ';
// check if expression is already present as a lambda expression
for (unsigned i = 0; i < language.size(); ++i) {
const Lambda* lambda = dynamic_cast<const Lambda*>(language[i]);
if (lambda != 0 && lambda->expression == expr) {
return Sym(i, args);
}
const SymVec& args = sym.args();
for (unsigned i = 0; i < args.size(); ++i) {
print_list(args[i], os);
}
// else add it
Lambda* lambda = new Lambda(expr, args.size());
return os;
}
token_t new_lambda(Sym sym, int arity) {
// check if already present
LambdaSet::iterator it = lambdaSet.find(sym);
if (it != lambdaSet.end()) {
return it->second;
}
// insert in language table
// new, insert
Lambda* lambda = new Lambda(sym, arity);
if (free_list.empty()) {
extend_free_list();
}
token_t lambda_token = free_list.back();
token_t token = free_list.back();
free_list.pop_back();
language[lambda_token] = lambda;
language[token] = lambda;
lambdaSet[sym] = token;
if (token_lambda.size() <= token) token_lambda.resize(token+1);
token_lambda[token] = sym;
return Sym(lambda_token, args);
return token;
}
/* Compression */
typedef hash_map<Sym, unsigned, HashSym> OccMap;
void count_occurances(Sym sym, OccMap& occ) {
occ[sym]++;
const SymVec& args = sym.args();
for (unsigned i = 0; i < args.size(); ++i) {
count_occurances(args[i], occ);
}
}
Sym create_lambda(Sym sym, OccMap& occ, unsigned nvars, vector<Sym>& args) {
unsigned o = occ[sym];
unsigned sz = sym.size();
if (o * sz > o + sz + nvars || is_variable(sym.token()) ) {
// check if it's already present
for (unsigned i = 0; i < args.size(); ++i) {
if (args[i] == sym) {
return SymVar(i);
}
}
// push_back
args.push_back(sym);
return SymVar(args.size()-1);
}
SymVec sym_args = sym.args();
for (unsigned i = 0; i < sym_args.size(); ++i) {
sym_args[i] = create_lambda(sym_args[i], occ, nvars, args);
}
return Sym(sym.token(), sym_args);
}
Sym compress(Sym sym) {
OccMap occ(sym.size());
count_occurances(sym, occ);
unsigned nvars = 0;
for (OccMap::iterator it = occ.begin(); it != occ.end(); ++it) {
if (is_variable(it->first.token())) nvars++;
}
SymVec args;
Sym body = create_lambda(sym, occ, nvars, args);
if (body.size() < sym.size()) {
// see if the body can be compressed some more
body = compress(body);
token_t token = new_lambda(body, args.size());
for (unsigned i = 0; i < args.size(); ++i) {
args[i] = compress(args[i]);
}
Sym result = Sym(token, args);
return compress(result); // see if it can be compressed some more
}
return sym;
}
Sym SymLambda(Sym expr) { return compress(expr); }
Sym expand(Sym expr, const SymVec& args) {
const Var* var = dynamic_cast<const Var*>( language[expr.token()] );
if (var != 0) {
return args[var->idx];
}
SymVec expr_args = expr.args();
for (unsigned i = 0; i < expr_args.size(); ++i) {
expr_args[i] = expand(expr_args[i], args);
}
return Sym(expr.token(), expr_args);
}
Sym SymUnlambda(Sym sym) {
Sym retval = sym;
const Lambda* lambda = dynamic_cast<const Lambda*>( language[sym.token()] );
if (lambda != 0) {
retval = expand(lambda->expression, sym.args());
}
return retval;
}
Sym expand_all(Sym sym) {
SymVec args = sym.args();
for (unsigned i = 0; i < args.size(); ++i) {
args[i] = expand_all(args[i]);
}
Sym nw = SymUnlambda( Sym(sym.token(), args) );
if (nw != sym) {
nw = expand_all(nw);
}
return nw;
}
namespace {
@ -442,10 +564,9 @@ class Sum : public FunDef {
}
Interval eval(const vector<Interval>& args, const vector<Interval>& inputs) const {
Interval interv(0.0); //(0.0-BiasEpsilon, 0.0+BiasEpsilon); // Profil/Bias seems to have a problem with 0 * inf when the Interval is exact zero (fpe)
Interval interv(0.0);
for (unsigned i = 0; i < args.size(); ++i) {
Interval a = args[i]; // Profil doesn't know much about const correctness
interv += a;
interv += args[i];
}
return interv;
}
@ -483,8 +604,7 @@ class Prod : public FunDef {
Interval eval(const vector<Interval>& args, const vector<Interval>& inputs) const {
Interval interv(1.0);
for (unsigned i = 0; i < args.size(); ++i) {
Interval a = args[i]; // Profil doesn't know much about const correctness
interv *= a;
interv *= args[i];
}
return interv;
}
@ -645,8 +765,51 @@ double sqr(double x) { return x*x; }
namespace {
FUNCDEF(sqr);
FUNCDEF(sqrt);
const int buildInFunctionOffset = language.size();
} // namespace
void add_tokens() {
unsigned sz = language.size();
language.resize(sz + sz+1); // double
for (unsigned i = sz; i < language.size(); ++i) {
free_list.push_back(i);
}
}
void extend_free_list() {
// first check if we can clean up unused tokens;
const vector<unsigned>& refcount = Sym::token_refcount();
for (unsigned i = buildInFunctionOffset; i < refcount.size(); ++i) {
if (language[i] == 0) continue;
bool c = is_constant(i);
bool l = is_lambda(i);
if (refcount[i] == 0 && (c || l)) {
if (c) {
doubleSet.erase(token_value[i]);
}
if (l) {
lambdaSet.erase(token_lambda[i]);
}
delete language[i];
language[i] = 0;
free_list.push_back(i);
}
}
// if still empty, add new tokens
if (free_list.empty()) {
add_tokens();
}
}
/* Serialization */
void write_raw(ostream& os, const Sym& sym) {
token_t token = sym.token();

View file

@ -71,7 +71,7 @@ extern std::string c_print(const Sym& sym);
extern std::string c_print(const Sym& sym, const std::vector<std::string>& var_names);
/** Pretty printer streamer */
inline std::ostream& operator<<(std::ostream& os, Sym sym) { return os << c_print(sym); }
inline std::ostream& operator<<(std::ostream& os, const Sym& sym) { return os << c_print(sym); }
/* Support for Ephemeral Random Constants (ERC) */
@ -92,6 +92,12 @@ extern Sym SymVar(unsigned idx);
extern Sym SymLambda(Sym expression);
extern Sym SymUnlambda(Sym sym);
/** Expands all lambda expressions inline */
extern Sym expand_all(Sym sym);
extern Sym compress(Sym sym);
/** Get out the values for all constants in the expression */
std::vector<double> get_constants(Sym sym);
@ -101,8 +107,11 @@ Sym set_constants(Sym sym, const std::vector<double>& constants);
/** check if a token is a constant */
extern bool is_constant(token_t token);
extern double get_constant_value(token_t token);
/** check if a token is a variable */
extern bool is_variable(token_t token);
extern unsigned get_variable_index(token_t token);
/** check if a token is a user/automatically defined function */
extern bool is_lambda(token_t token);

View file

@ -36,25 +36,25 @@ Sym simplify_constants(Sym sym) {
}
args[i] = arg;
all_constants &= is_constant(args[i].token());
}
if (args.size() == 0) return sym; // variable or constant
if (args.size() == 0) {
if (sym.token() == sum_token) return SymConst(0.0);
if (sym.token() == prod_token) return SymConst(1.0);
return sym; // variable or constant
}
if (all_constants) {
// evaluate
vector<double> dummy;
vector<double> vals(args.size());
for (unsigned i = 0; i < vals.size(); ++i) {
vals[i] = eval(sym, dummy);
}
Sym result = SymConst( get_element(token).eval(vals, dummy) );
double v = ::eval(sym, dummy);
Sym result = SymConst(v);
return result;
}
@ -101,9 +101,10 @@ Sym derivative(token_t token, Sym x) {
case sqr_token : return SymConst(2.0) * x;
case sqrt_token : return SymConst(0.5) * inv( sqrt(x));
default :
throw differentiation_error();
}
throw differentiation_error();
return x;
}

View file

@ -86,8 +86,11 @@ functor_t LanguageTable::get_random_function() const
return rng.choice(pimpl->functions);
}
token_t LanguageTable::get_random_function(unsigned arity) const
token_t LanguageTable::get_random_function(token_t token, unsigned arity) const
{
if (pimpl->functions_per_arity.size() <= arity || pimpl->functions_per_arity[arity].empty()) {
return token; // return original token if no functions of this arity are found
}
return rng.choice(pimpl->functions_per_arity[arity]);
}

View file

@ -49,7 +49,7 @@ class LanguageTable {
Sym get_random_const() const;
functor_t get_random_function() const;
token_t get_random_function(unsigned arity) const;
token_t get_random_function(token_t org, unsigned arity) const;
};
#endif

View file

@ -19,9 +19,12 @@
#include <vector>
#include <valarray>
#include "MultiFunction.h"
#include "ErrorMeasure.h"
#include "Dataset.h"
#include "Sym.h"
#include "FunDef.h"
#include "sym_compile.h"
#include "TargetInfo.h"
#include "stats.h"
@ -99,14 +102,19 @@ class ErrorMeasureImpl {
}
vector<ErrorMeasure::result> multi_function_eval(const vector<Sym>& pop) {
if (pop.size() == 0) return vector<ErrorMeasure::result>();
multi_function all = compile(pop);
//MultiFunction all(pop);
std::vector<double> y(pop.size());
Scaling noScaling = Scaling(new NoScaling);
const std::valarray<double>& t = train_info.targets();
cout << "Population size " << pop.size() << endl;
if (measure == ErrorMeasure::mean_squared_scaled) {
std::vector<Var> var(pop.size());
std::vector<Cov> cov(pop.size());
@ -117,6 +125,7 @@ class ErrorMeasureImpl {
vart.update(t[i]);
all(&data.get_inputs(i)[0], &y[0]); // evalutate
//all(data.get_inputs(i), y); // evalutate
for (unsigned j = 0; j < pop.size(); ++j) {
var[j].update(y[j]);
@ -148,6 +157,7 @@ class ErrorMeasureImpl {
double err = vart.get_var() - c / var[i].get_var();
result[i].error = err;
if (!finite(err)) {
//cout << pop[i] << endl;
cout << "b " << b << endl;
cout << "var t " << vart.get_var() << endl;
cout << "var i " << var[i].get_var() << endl;
@ -155,8 +165,9 @@ class ErrorMeasureImpl {
for (unsigned j = 0; j < t.size(); ++j) {
all(&data.get_inputs(i)[0], &y[0]); // evalutate
//all(data.get_inputs(j), y); // evalutate
cout << y[i] << endl;
cout << y[i] << ' ' << ::eval(pop[i], data.get_inputs(j)) << endl;
}
exit(1);
@ -172,8 +183,9 @@ class ErrorMeasureImpl {
for (unsigned i = 0; i < train_cases(); ++i) {
// evaluate
all(&data.get_inputs(i)[0], &y[0]);
//all(data.get_inputs(i), y);
for (unsigned j = 0; j < y.size(); ++j) {
for (unsigned j = 0; j < pop.size(); ++j) {
double diff = y[j] - t[i];
if (measure == ErrorMeasure::mean_squared) { // branch prediction will probably solve this inefficiency
err[j] += diff * diff;

View file

@ -26,9 +26,9 @@ typedef UniqueNodeStats* (*NodeStatFunc)(Sym&);
UniqueNodeStats* (*Sym::factory)(const Sym&) = 0;
void (*Sym::extra_dtor)(token_t) = 0;
SymMap Sym::dag(100000); // reserve space for so many nodes
std::vector<unsigned> Sym::token_count;
size_t get_size(const SymVec& vec) {
size_t sz = 0;
@ -56,10 +56,17 @@ Sym::Sym(token_t tok, const SymVec& args_) : node(dag.end())
if (__unchecked_refcount() == 0) { // new node, set some stats
node->second.size = 1 + get_size(args_);
node->second.depth = 1 + get_depth(args_);
// token count
if (tok >= token_count.size()) {
token_count.resize(tok+1);
}
incref();
node->first.fixate();
// call the factory function if available
if (factory) node->second.uniqueNodeStats = factory(*this);
}
else incref();
}
@ -74,6 +81,12 @@ Sym::Sym(token_t tok, const Sym& a) : node(dag.end()) {
if (__unchecked_refcount() == 0) { // new node, set some stats
node->second.size = 1 + get_size(args_);
node->second.depth = 1 + get_depth(args_);
// token count
if (tok >= token_count.size()) {
token_count.resize(tok+1);
}
incref();
node->first.fixate();
// call the factory function if available
@ -90,10 +103,17 @@ Sym::Sym(token_t tok) : node(dag.end()) {
if (__unchecked_refcount() == 0) { // new node, set some stats
node->second.size = 1;
node->second.depth = 1;
// token count
if (tok >= token_count.size()) {
token_count.resize(tok+1);
}
incref();
// call the factory function if available
if (factory) node->second.uniqueNodeStats = factory(*this);
}
else incref();
}

View file

@ -33,7 +33,7 @@ struct UniqueNodeStats { virtual ~UniqueNodeStats(){} };
#include "SymImpl.h"
#include "token.h"
#if __GNUC__ == 444
#if __GNUC__ == 4
#define USE_TR1 1
#else
#define USE_TR1 0
@ -92,7 +92,7 @@ class Sym
/* Support for traversing trees */
unsigned arity() const { return node->first.arity(); }
token_t token() const { assert(!empty()); return node->first.token; }
token_t token() const { return node->first.token; }
const SymVec& args() const { return node->first.vec(); }
@ -109,9 +109,9 @@ class Sym
* it can for instance be used to create ERC's and what not. */
static void set_factory_function(UniqueNodeStats* (*f)(const Sym&)) { factory=f; }
static void clear_factory_function() { factory = 0; }
static void set_extra_dtor( void (*extra)(token_t) ) { extra_dtor = extra; }
static const std::vector<unsigned>& token_refcount() { return token_count; }
unsigned address() const { return reinterpret_cast<unsigned>(&*node); }
private :
@ -122,15 +122,17 @@ class Sym
unsigned __unchecked_refcount() const { return node->second.refcount; }
void incref() {
if (!empty())
if (!empty()) {
++(node->second.refcount);
++token_count[token()];
}
}
void decref() {
if (!empty() && --(node->second.refcount) == 0) {
if (extra_dtor) {
extra_dtor(token());
if (!empty()) {
--token_count[token()];
if (--(node->second.refcount) == 0) {
dag.erase(node);
}
dag.erase(node);
}
}
@ -140,11 +142,11 @@ class Sym
// A static hash_map that contains all live nodes..
static SymMap dag;
static std::vector<unsigned> token_count;
// Factory function for creating extra node stats, default will be 0
static UniqueNodeStats* (*factory)(const Sym&);
static void (*extra_dtor)(token_t);
};
/* Utility hash functor for syms */

View file

@ -23,7 +23,7 @@
class Sym;
#if __GNUC__ > 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
#if __GNUC__ > 4
#include <ext/pool_allocator.h>
typedef std::vector<Sym, __gnu_cxx::__pool_alloc<Sym> > std::vector<Sym>;
//typedef std::vector<Sym> SymVec;
@ -60,10 +60,6 @@ class SymKey
SymKey(token_t _token) : args(), token(_token), hash_code(calc_hash()) {}
SymKey(token_t _token, const detail::SymArgs& _args) : args(_args), token(_token), hash_code(calc_hash()) {}
private:
detail::SymArgs args;
public:
bool operator==(const SymKey& other) const;
struct Hash
@ -74,13 +70,14 @@ class SymKey
unsigned arity() const { return args.len(); }
const std::vector<Sym>& vec() const { return args.vec(); }
token_t token; // identifies the function
// fixates (i.e. claims memory) for the embedded vector of Syms
void fixate() const { args.fixate(); }
int get_hash_code() const { return hash_code; }
detail::SymArgs args;
token_t token; // identifies the function
private:
int calc_hash() const;
int hash_code;

View file

@ -25,6 +25,7 @@
#include <eoSym.h>
#include <eoPop.h>
#include <eoSymMutate.h>
//#include <eoSymLambdaMutate.h>
#include <eoSymCrossover.h>
#include <eoSymEval.h>
#include <eoOpContainer.h>
@ -151,6 +152,13 @@ int main(int argc, char* argv[]) {
0,
"Population").value();
double constant_mut_prob = parser.createParam(
0.1,
"constant-mut-rate",
"Probability of performing constant mutation",
0,
"Population").value();
double subtree_mut_prob = parser.createParam(
0.2,
@ -165,7 +173,14 @@ int main(int argc, char* argv[]) {
"Probability of performing node mutation",
0,
"Population").value();
/* double lambda_mut_prob = parser.createParam(
1.0,
"lambda-mut-rate",
"Probability of performing (neutral) lambda extraction/expansion",
0,
"Population").value();
*/
double subtree_xover_prob = parser.createParam(
0.4,
"xover-rate",
@ -195,7 +210,7 @@ int main(int argc, char* argv[]) {
"Population").value();
unsigned maximumSize = parser.createParam(
0xffffffffu,
-1u,
"maximum-size",
"Maximum size after crossover",
's',
@ -261,10 +276,13 @@ int main(int argc, char* argv[]) {
// todo, make this parameter, etc
double std = 0.01;
eoSymConstantMutate<EoType> constmutate(std);
genetic_operator.add(constmutate, 0.1);
genetic_operator.add(constmutate, constant_mut_prob);
eoSymNodeMutate<EoType> nodemutate(table);
genetic_operator.add(nodemutate, node_mut_prob);
// eoSymLambdaMutate<EoType> lambda_mutate(node_selector);
// genetic_operator.add(lambda_mutate, lambda_mut_prob); // TODO: prob should be settable
//eoQuadSubtreeCrossover<EoType> quad(node_selector);
eoBinSubtreeCrossover<EoType> bin(node_selector);

View file

@ -0,0 +1,45 @@
#include "Sym.h"
#include "MultiFunction.h"
#include "FunDef.h"
using namespace std;
int main() {
Sym v = SymVar(0);
Sym c = SymConst(0.1);
Sym sym = inv(v) + c;
Sym a = sym;
sym = sym * sym;
Sym b = sym;
sym = sym + sym;
c = sym;
vector<Sym> pop;
pop.push_back(sym);
MultiFunction m(pop);
vector<double> vec(1);
vec[0] = 10.0;
cout << sym << endl;
cout << "Eval " << eval(sym, vec);
vector<double> y(1);
m(vec,y);
cout << " " << y[0] << endl;
cout << "3 " << eval(a,vec) << endl;
cout << "4 " << eval(b, vec) << endl;
cout << "5 " << eval(c, vec) << endl;
}

View file

@ -13,6 +13,9 @@ int main() {
cout << expr << endl;
cout << simplify(expr) << endl;
Sym dv = differentiate( exp(expr) , v1.token());
cout << dv << endl;
cout << simplify(dv) << endl;
}