better variance computation, use Knuth online robust algorithm, add a test for variance computation

This commit is contained in:
Johann Dreo 2012-09-18 16:56:38 +02:00
commit 2c2e9d0ca3
4 changed files with 98 additions and 39 deletions

View file

@ -125,7 +125,7 @@ SET(SAMPLE_SRCS)
ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(src)
ADD_SUBDIRECTORY(application) ADD_SUBDIRECTORY(application)
#ADD_SUBDIRECTORY(test) ADD_SUBDIRECTORY(test)
ADD_SUBDIRECTORY(doc) ADD_SUBDIRECTORY(doc)
###################################################################################### ######################################################################################

View file

@ -39,35 +39,55 @@ Authors:
template < typename EOT > template < typename EOT >
class edoEstimatorNormalMono : public edoEstimator< edoNormalMono< EOT > > class edoEstimatorNormalMono : public edoEstimator< edoNormalMono< EOT > >
{ {
public: public:
typedef typename EOT::AtomType AtomType; typedef typename EOT::AtomType AtomType;
//! Knuth's algorithm, online variance, numericably stable
class Variance class Variance
{ {
public: public:
Variance() : _sumvar(0){} Variance() : _n(0), _mean(0), _M2(0) {}
void update(AtomType v) void update(AtomType x)
{ {
_n++; _n++;
AtomType d = v - _mean; AtomType delta = x - _mean;
_mean += 1 / _n * d; _mean += delta / _n;
_sumvar += (_n - 1) / _n * d * d; _M2 += delta * ( x - _mean );
} }
AtomType get_mean() const {return _mean;} AtomType mean() const {return _mean;}
AtomType get_var() const {return _sumvar / (_n - 1);}
AtomType get_std() const {return sqrt( get_var() );} //! Population variance
AtomType var_n() const {
assert( _n > 0 );
return _M2 / _n;
}
/** Sample variance (using Bessel's correction)
* is an unbiased estimate of the population variance,
* but it has uniformly higher mean squared error
*/
AtomType var() const {
assert( _n > 1 );
return _M2 / (_n - 1);
}
//! Population standard deviation
AtomType std_n() const {return sqrt( var_n() );}
//! Sample standard deviation, is a biased estimate of the population standard deviation
AtomType std() const {return sqrt( var() );}
private: private:
AtomType _n; AtomType _n;
AtomType _mean; AtomType _mean;
AtomType _sumvar; AtomType _M2;
}; };
public: public:
edoNormalMono< EOT > operator()(eoPop<EOT>& pop) edoNormalMono< EOT > operator()(eoPop<EOT>& pop)
{ {
unsigned int popsize = pop.size(); unsigned int popsize = pop.size();
@ -91,8 +111,8 @@ public:
for (unsigned int d = 0; d < dimsize; ++d) for (unsigned int d = 0; d < dimsize; ++d)
{ {
mean[d] = var[d].get_mean(); mean[d] = var[d].mean();
variance[d] = var[d].get_var(); variance[d] = var[d].var_n();
} }
return edoNormalMono< EOT >( mean, variance ); return edoNormalMono< EOT >( mean, variance );

View file

@ -34,6 +34,7 @@ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/application/common)
SET(SOURCES SET(SOURCES
#t-cholesky #t-cholesky
t-variance
t-edoEstimatorNormalMulti t-edoEstimatorNormalMulti
t-mean-distance t-mean-distance
t-bounderno t-bounderno

38
edo/test/t-variance.cpp Normal file
View file

@ -0,0 +1,38 @@
#include <iostream>
#include <vector>
#include <eo>
#include <es.h>
#include <edo>
int main()
{
typedef eoReal<eoMinimizingFitness> Vec;
eoPop<Vec> pop;
for( unsigned int i=1; i<7; ++i) {
Vec indiv(1,i);
pop.push_back( indiv );
std::clog << indiv << " ";
}
std::clog << std::endl;
edoEstimatorNormalMono<Vec> estimator;
edoNormalMono<Vec> distrib = estimator(pop);
Vec ex_mean(1,3.5);
Vec ex_var(1,17.5/6);
Vec es_mean = distrib.mean();
Vec es_var = distrib.variance();
std::cout << "expected mean=" << ex_mean << " variance=" << ex_var << std::endl;
std::cout << "estimated mean=" << es_mean << " variance=" << es_var << std::endl;
for( unsigned int i=0; i<ex_mean.size(); ++i ) {
assert( es_mean[i] == ex_mean[i] );
assert( es_var[i] == ex_var[i] );
}
}