diff --git a/edo/CMakeLists.txt b/edo/CMakeLists.txt index 3cb182b3..9a07cd61 100644 --- a/edo/CMakeLists.txt +++ b/edo/CMakeLists.txt @@ -125,7 +125,7 @@ SET(SAMPLE_SRCS) ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(application) -#ADD_SUBDIRECTORY(test) +ADD_SUBDIRECTORY(test) ADD_SUBDIRECTORY(doc) ###################################################################################### diff --git a/edo/src/edoEstimatorNormalMono.h b/edo/src/edoEstimatorNormalMono.h index 12db5188..9e80145d 100644 --- a/edo/src/edoEstimatorNormalMono.h +++ b/edo/src/edoEstimatorNormalMono.h @@ -39,64 +39,84 @@ Authors: template < typename EOT > class edoEstimatorNormalMono : public edoEstimator< edoNormalMono< EOT > > { -public: - typedef typename EOT::AtomType AtomType; - - class Variance - { public: - Variance() : _sumvar(0){} + typedef typename EOT::AtomType AtomType; - void update(AtomType v) + //! Knuth's algorithm, online variance, numericably stable + class Variance { - _n++; + public: + Variance() : _n(0), _mean(0), _M2(0) {} - AtomType d = v - _mean; - - _mean += 1 / _n * d; - _sumvar += (_n - 1) / _n * d * d; - } - - AtomType get_mean() const {return _mean;} - AtomType get_var() const {return _sumvar / (_n - 1);} - AtomType get_std() const {return sqrt( get_var() );} - - private: - AtomType _n; - AtomType _mean; - AtomType _sumvar; - }; - -public: - edoNormalMono< EOT > operator()(eoPop& pop) - { - unsigned int popsize = pop.size(); - assert(popsize > 0); - - unsigned int dimsize = pop[0].size(); - assert(dimsize > 0); - - std::vector< Variance > var( dimsize ); - - for (unsigned int i = 0; i < popsize; ++i) - { - for (unsigned int d = 0; d < dimsize; ++d) + void update(AtomType x) { - var[d].update( pop[i][d] ); + _n++; + + AtomType delta = x - _mean; + + _mean += delta / _n; + _M2 += delta * ( x - _mean ); + } + + AtomType mean() const {return _mean;} + + //! Population variance + AtomType var_n() const { + assert( _n > 0 ); + return _M2 / _n; + } + + /** Sample variance (using Bessel's correction) + * is an unbiased estimate of the population variance, + * but it has uniformly higher mean squared error + */ + AtomType var() const { + assert( _n > 1 ); + return _M2 / (_n - 1); + } + + //! Population standard deviation + AtomType std_n() const {return sqrt( var_n() );} + + //! Sample standard deviation, is a biased estimate of the population standard deviation + AtomType std() const {return sqrt( var() );} + + private: + AtomType _n; + AtomType _mean; + AtomType _M2; + }; + + public: + edoNormalMono< EOT > operator()(eoPop& pop) + { + unsigned int popsize = pop.size(); + assert(popsize > 0); + + unsigned int dimsize = pop[0].size(); + assert(dimsize > 0); + + std::vector< Variance > var( dimsize ); + + for (unsigned int i = 0; i < popsize; ++i) + { + for (unsigned int d = 0; d < dimsize; ++d) + { + var[d].update( pop[i][d] ); } } - EOT mean( dimsize ); - EOT variance( dimsize ); + EOT mean( dimsize ); + EOT variance( dimsize ); - for (unsigned int d = 0; d < dimsize; ++d) + for (unsigned int d = 0; d < dimsize; ++d) { - mean[d] = var[d].get_mean(); - variance[d] = var[d].get_var(); + mean[d] = var[d].mean(); + variance[d] = var[d].var_n(); } - return edoNormalMono< EOT >( mean, variance ); - } + return edoNormalMono< EOT >( mean, variance ); + } }; #endif // !_edoEstimatorNormalMono_h diff --git a/edo/test/CMakeLists.txt b/edo/test/CMakeLists.txt index 7a3129a1..ffc0cfdb 100644 --- a/edo/test/CMakeLists.txt +++ b/edo/test/CMakeLists.txt @@ -34,6 +34,7 @@ INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/application/common) SET(SOURCES #t-cholesky + t-variance t-edoEstimatorNormalMulti t-mean-distance t-bounderno diff --git a/edo/test/t-variance.cpp b/edo/test/t-variance.cpp new file mode 100644 index 00000000..ca6c1226 --- /dev/null +++ b/edo/test/t-variance.cpp @@ -0,0 +1,38 @@ + +#include +#include + +#include +#include +#include + +int main() +{ + typedef eoReal Vec; + + eoPop pop; + for( unsigned int i=1; i<7; ++i) { + Vec indiv(1,i); + pop.push_back( indiv ); + std::clog << indiv << " "; + } + std::clog << std::endl; + + edoEstimatorNormalMono estimator; + + edoNormalMono distrib = estimator(pop); + + Vec ex_mean(1,3.5); + Vec ex_var(1,17.5/6); + Vec es_mean = distrib.mean(); + Vec es_var = distrib.variance(); + + std::cout << "expected mean=" << ex_mean << " variance=" << ex_var << std::endl; + std::cout << "estimated mean=" << es_mean << " variance=" << es_var << std::endl; + + for( unsigned int i=0; i