From 008f2571b2f4e2571c13a0bf9bd6a6b5663145dd Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Tue, 10 Jul 2012 17:32:18 +0200 Subject: [PATCH 01/19] MultiJob and OneShotJob allow to choose job's way of termination (Kill for multi, Finish for one shot). --- eo/src/mpi/eoMpi.h | 34 +++++++++++++++++++++++++++++++++- eo/src/mpi/eoParallelApply.h | 4 ++-- eo/src/mpi/eoTerminateJob.h | 4 ++-- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/eo/src/mpi/eoMpi.h b/eo/src/mpi/eoMpi.h index 0ef80c12..8521d5c3 100644 --- a/eo/src/mpi/eoMpi.h +++ b/eo/src/mpi/eoMpi.h @@ -199,10 +199,12 @@ namespace eo public: Job( AssignmentAlgorithm& _algo, int _masterRank, + int _workerStopCondition, JobStore & store ) : assignmentAlgo( _algo ), masterRank( _masterRank ), + workerStopCondition( _workerStopCondition ), comm( Node::comm() ), // Functors sendTask( store.sendTask() ), @@ -339,7 +341,7 @@ namespace eo # ifndef NDEBUG eo::log << "[W" << comm.rank() << "] Waiting for an order..." << std::endl; # endif - if ( order == Message::Kill ) + if ( order == workerStopCondition ) { # ifndef NDEBUG eo::log << "[W" << comm.rank() << "] Leaving worker task." << std::endl; @@ -375,6 +377,7 @@ namespace eo AssignmentAlgorithm& assignmentAlgo; int masterRank; + const int workerStopCondition; bmpi::communicator& comm; SendTaskFunction & sendTask; @@ -383,6 +386,35 @@ namespace eo IsFinishedFunction & isFinished; bool _isMaster; + + }; + + template< class JobData > + class OneShotJob : public Job< JobData > + { + // TODO commentaire : ce job s'arrête sur message terminate + public: + OneShotJob( AssignmentAlgorithm& algo, + int masterRank, + JobStore & store ) + : Job( algo, masterRank, Message::Finish, store ) + { + // empty + } + }; + + template< class JobData > + class MultiJob : public Job< JobData > + { + public: + // TODO commentaire : ce job s'arrête sur message kill + MultiJob ( AssignmentAlgorithm& algo, + int masterRank, + JobStore & store ) + : Job( algo, masterRank, Message::Kill, store ) + { + // empty + } }; } } diff --git a/eo/src/mpi/eoParallelApply.h b/eo/src/mpi/eoParallelApply.h index a0225471..4ef58830 100644 --- a/eo/src/mpi/eoParallelApply.h +++ b/eo/src/mpi/eoParallelApply.h @@ -207,7 +207,7 @@ namespace eo // TODO commentaire : impossible de faire un typedef sur un template sans passer // par un traits => complique la tâche de l'utilisateur pour rien. template< typename EOT > - class ParallelApply : public Job< ParallelApplyData > + class ParallelApply : public MultiJob< ParallelApplyData > { public: @@ -216,7 +216,7 @@ namespace eo int _masterRank, ParallelApplyStore & store ) : - Job< ParallelApplyData >( algo, _masterRank, store ) + MultiJob< ParallelApplyData >( algo, _masterRank, store ) { // empty } diff --git a/eo/src/mpi/eoTerminateJob.h b/eo/src/mpi/eoTerminateJob.h index d85aeeb0..bda085a0 100644 --- a/eo/src/mpi/eoTerminateJob.h +++ b/eo/src/mpi/eoTerminateJob.h @@ -65,10 +65,10 @@ namespace eo void* data() { return 0; } }; - struct EmptyJob : public Job + struct EmptyJob : public OneShotJob { EmptyJob( AssignmentAlgorithm& algo, int masterRank ) : - Job( algo, masterRank, *(new DummyJobStore) ) + OneShotJob( algo, masterRank, *(new DummyJobStore) ) // FIXME memory leak => will be corrected by using const correctness { // empty From 472b86bc6856bc3a33e172ae862fef0d82385d2f Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Tue, 10 Jul 2012 17:34:18 +0200 Subject: [PATCH 02/19] MPI job functors have to be created with new, so as to be deleted by delete, because of Composite / Decorator pattern. --- eo/src/mpi/eoMpi.h | 13 +++++++++++- eo/src/mpi/eoParallelApply.h | 4 ---- eo/src/mpi/eoTerminateJob.h | 40 ------------------------------------ 3 files changed, 12 insertions(+), 45 deletions(-) diff --git a/eo/src/mpi/eoMpi.h b/eo/src/mpi/eoMpi.h index 8521d5c3..c277ddd0 100644 --- a/eo/src/mpi/eoMpi.h +++ b/eo/src/mpi/eoMpi.h @@ -119,6 +119,7 @@ namespace eo template< typename JobData > struct JobStore { + // TODO commentaire: ces pointeurs devraient être alloués avec new pour éviter segfault on quit. JobStore( SendTaskFunction* stf, HandleResponseFunction* hrf, @@ -135,6 +136,16 @@ namespace eo // empty } + ~JobStore() + { + // TODO commentaire: Composition Pattern => délégation de la destruction => destruction homogène => new + // et delete partout. + delete _stf; + delete _hrf; + delete _ptf; + delete _iff; + } + SendTaskFunction & sendTask() { return *_stf; } HandleResponseFunction & handleResponse() { return *_hrf; } ProcessTaskFunction & processTask() { return *_ptf; } @@ -377,7 +388,7 @@ namespace eo AssignmentAlgorithm& assignmentAlgo; int masterRank; - const int workerStopCondition; + const int workerStopCondition; // TODO commentaire: message signifiant l'arrêt pour un worker bmpi::communicator& comm; SendTaskFunction & sendTask; diff --git a/eo/src/mpi/eoParallelApply.h b/eo/src/mpi/eoParallelApply.h index 4ef58830..d93b5b81 100644 --- a/eo/src/mpi/eoParallelApply.h +++ b/eo/src/mpi/eoParallelApply.h @@ -194,10 +194,6 @@ namespace eo virtual ~ParallelApplyStore() { - delete _stf; - delete _hrf; - delete _ptf; - delete _iff; } protected: diff --git a/eo/src/mpi/eoTerminateJob.h b/eo/src/mpi/eoTerminateJob.h index bda085a0..4dc9f421 100644 --- a/eo/src/mpi/eoTerminateJob.h +++ b/eo/src/mpi/eoTerminateJob.h @@ -54,14 +54,6 @@ namespace eo _iff = new DummyIsFinishedFunction; } - ~DummyJobStore() - { - delete _stf; - delete _hrf; - delete _ptf; - delete _iff; - } - void* data() { return 0; } }; @@ -83,38 +75,6 @@ namespace eo } } }; - - /* - class TerminateJob : public Job - { - public: - TerminateJob( AssignmentAlgorithm& algo, int _ ) - : Job( algo, _ ) - { - // empty - } - - void sendTask( int wrkRank ) - { - // empty - } - - void handleResponse( int wrkRank ) - { - // empty - } - - void processTask( ) - { - // empty - } - - bool isFinished() - { - return true; - } - }; - */ } } From d805800731a0a9dc985071a9731dcbc7c669a2b8 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Wed, 11 Jul 2012 10:03:53 +0200 Subject: [PATCH 03/19] Added packet size to parallel parser and default argument to static assignment. --- eo/src/mpi/eoMpiAssignmentAlgorithm.h | 3 ++- eo/src/utils/eoParallel.cpp | 2 ++ eo/src/utils/eoParallel.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/eo/src/mpi/eoMpiAssignmentAlgorithm.h b/eo/src/mpi/eoMpiAssignmentAlgorithm.h index c3d24a19..0630a6f7 100644 --- a/eo/src/mpi/eoMpiAssignmentAlgorithm.h +++ b/eo/src/mpi/eoMpiAssignmentAlgorithm.h @@ -114,13 +114,14 @@ namespace eo init( workers, runs ); } - StaticAssignmentAlgorithm( int runs ) + StaticAssignmentAlgorithm( int runs = 0 ) { std::vector workers; for(int i = 1; i < Node::comm().size(); ++i) { workers.push_back( i ); } + init( workers, runs ); } diff --git a/eo/src/utils/eoParallel.cpp b/eo/src/utils/eoParallel.cpp index d9d09c38..41865198 100644 --- a/eo/src/utils/eoParallel.cpp +++ b/eo/src/utils/eoParallel.cpp @@ -37,6 +37,7 @@ eoParallel::eoParallel() : _nthreads( 0, "parallelize-nthreads", "Define the number of threads you want to use, nthreads = 0 means you want to use all threads available", '\0' ), _enableResults( false, "parallelize-enable-results", "Enable the generation of results", '\0' ), _doMeasure( false, "parallelize-do-measure", "Do some measures during execution", '\0' ), + _packetSize( 1U, "parallelize-packet-size", "Number of elements which should be sent at a time during a parallel evaluation based on message passing.", '\0'), _t_start(0) { } @@ -90,6 +91,7 @@ void eoParallel::_createParameters( eoParser& parser ) parser.processParam( _nthreads, section ); parser.processParam( _enableResults, section ); parser.processParam( _doMeasure, section ); + parser.processParam( _packetSize, section ); } void make_parallel(eoParser& parser) diff --git a/eo/src/utils/eoParallel.h b/eo/src/utils/eoParallel.h index 3f22f6c4..3aae8e43 100644 --- a/eo/src/utils/eoParallel.h +++ b/eo/src/utils/eoParallel.h @@ -54,6 +54,7 @@ public: std::string prefix() const; inline unsigned int nthreads() const { return _nthreads.value(); } + inline unsigned int packetSize() const { return _packetSize.value(); } inline bool enableResults() const { return _enableResults.value(); } inline bool doMeasure() const { return _doMeasure.value(); } @@ -68,6 +69,7 @@ private: eoValueParam _isDynamic; eoValueParam _prefix; eoValueParam _nthreads; + eoValueParam _packetSize; eoValueParam _enableResults; eoValueParam _doMeasure; double _t_start; From 108c0bcf35e9e480ef9b0bb8843ad99270c15ccf Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Wed, 11 Jul 2012 10:41:48 +0200 Subject: [PATCH 04/19] Automatic deleting functors, only if necessary. --- eo/src/mpi/eoMpi.h | 25 +++++++++++----- eo/src/mpi/eoMultiParallelApply.h | 2 -- eo/src/mpi/eoParallelApply.h | 49 ++++++++++++++++++++++++------- eo/src/mpi/eoTerminateJob.h | 4 +++ 4 files changed, 60 insertions(+), 20 deletions(-) diff --git a/eo/src/mpi/eoMpi.h b/eo/src/mpi/eoMpi.h index c277ddd0..fc419b13 100644 --- a/eo/src/mpi/eoMpi.h +++ b/eo/src/mpi/eoMpi.h @@ -40,11 +40,19 @@ namespace eo template< typename JobData, typename Wrapped > struct SharedDataFunction { - SharedDataFunction( Wrapped * w = 0 ) : _wrapped( w ) + SharedDataFunction( Wrapped * w = 0 ) : _wrapped( w ), _needDelete( false ) { // empty } + virtual ~SharedDataFunction() + { + if( _wrapped && _wrapped->needDelete() ) + { + delete _wrapped; + } + } + void wrapped( Wrapped * w ) { _wrapped = w; @@ -59,9 +67,13 @@ namespace eo } } + bool needDelete() { return _needDelete; } + void needDelete( bool b ) { _needDelete = b; } + protected: JobData* d; Wrapped* _wrapped; + bool _needDelete; }; template< typename JobData > @@ -119,7 +131,6 @@ namespace eo template< typename JobData > struct JobStore { - // TODO commentaire: ces pointeurs devraient être alloués avec new pour éviter segfault on quit. JobStore( SendTaskFunction* stf, HandleResponseFunction* hrf, @@ -138,12 +149,10 @@ namespace eo ~JobStore() { - // TODO commentaire: Composition Pattern => délégation de la destruction => destruction homogène => new - // et delete partout. - delete _stf; - delete _hrf; - delete _ptf; - delete _iff; + if( _stf->needDelete() ) delete _stf; + if( _hrf->needDelete() ) delete _hrf; + if( _ptf->needDelete() ) delete _ptf; + if( _iff->needDelete() ) delete _iff; } SendTaskFunction & sendTask() { return *_stf; } diff --git a/eo/src/mpi/eoMultiParallelApply.h b/eo/src/mpi/eoMultiParallelApply.h index 85b1689e..f4fe47b5 100644 --- a/eo/src/mpi/eoMultiParallelApply.h +++ b/eo/src/mpi/eoMultiParallelApply.h @@ -27,7 +27,6 @@ namespace eo ~ProcessTaskParallelEval() { - delete _wrapped; } }; @@ -39,7 +38,6 @@ namespace eo ParallelEvalStore( eoUF & _proc, int _masterRank, - // long _maxTime = 0, int _packetSize = 1 ) : ParallelApplyStore< EOT >( _proc, *( new std::vector ), _masterRank, _packetSize ) diff --git a/eo/src/mpi/eoParallelApply.h b/eo/src/mpi/eoParallelApply.h index d93b5b81..9598b952 100644 --- a/eo/src/mpi/eoParallelApply.h +++ b/eo/src/mpi/eoParallelApply.h @@ -21,17 +21,23 @@ namespace eo { ParallelApplyData( eoUF & _proc, - std::vector & _pop, int _masterRank, // long _maxTime = 0, - int _packetSize + int _packetSize, + std::vector * _pop = 0 ) : - _data( &_pop ), func( _proc ), index( 0 ), size( _pop.size() ), packetSize( _packetSize ), masterRank( _masterRank ), comm( Node::comm() ) + _data( _pop ), func( _proc ), index( 0 ), packetSize( _packetSize ), masterRank( _masterRank ), comm( Node::comm() ) { if ( _packetSize <= 0 ) { throw std::runtime_error("Packet size should not be negative."); } + + if( _pop ) + { + size = _pop->size(); + } + tempArray = new EOT[ _packetSize ]; } @@ -172,18 +178,36 @@ namespace eo ParallelApplyStore( eoUF & _proc, - std::vector& _pop, int _masterRank, - // long _maxTime = 0, int _packetSize = 1, // JobStore functors - SendTaskParallelApply * stpa = new SendTaskParallelApply, - HandleResponseParallelApply* hrpa = new HandleResponseParallelApply, - ProcessTaskParallelApply* ptpa = new ProcessTaskParallelApply, - IsFinishedParallelApply* ifpa = new IsFinishedParallelApply + SendTaskParallelApply * stpa = 0, + HandleResponseParallelApply* hrpa = 0, + ProcessTaskParallelApply* ptpa = 0, + IsFinishedParallelApply* ifpa = 0 ) : - _data( _proc, _pop, _masterRank, _packetSize ) + _data( _proc, _masterRank, _packetSize ) { + if( stpa == 0 ) { + stpa = new SendTaskParallelApply; + stpa->needDelete( true ); + } + + if( hrpa == 0 ) { + hrpa = new HandleResponseParallelApply; + hrpa->needDelete( true ); + } + + if( ptpa == 0 ) { + ptpa = new ProcessTaskParallelApply; + ptpa->needDelete( true ); + } + + if( ifpa == 0 ) { + ifpa = new IsFinishedParallelApply; + ifpa->needDelete( true ); + } + _stf = stpa; _hrf = hrpa; _ptf = ptpa; @@ -192,6 +216,11 @@ namespace eo ParallelApplyData* data() { return &_data; } + void data( std::vector& _pop ) + { + _data.init( _pop ); + } + virtual ~ParallelApplyStore() { } diff --git a/eo/src/mpi/eoTerminateJob.h b/eo/src/mpi/eoTerminateJob.h index 4dc9f421..ed45ca05 100644 --- a/eo/src/mpi/eoTerminateJob.h +++ b/eo/src/mpi/eoTerminateJob.h @@ -49,9 +49,13 @@ namespace eo DummyJobStore() { _stf = new DummySendTaskFunction; + _stf->needDelete( true ); _hrf = new DummyHandleResponseFunction; + _hrf->needDelete( true ); _ptf = new DummyProcessTaskFunction; + _ptf->needDelete( true ); _iff = new DummyIsFinishedFunction; + _iff->needDelete( true ); } void* data() { return 0; } From 414e011296c1c73d9d40ed3a4a3a64067a7a3e73 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Wed, 11 Jul 2012 10:42:35 +0200 Subject: [PATCH 05/19] Using ParallelApplyJob instead of MultiParallelApplyJob in eoPopEvalFunc. --- eo/src/apply.h | 2 +- eo/src/eoPopEvalFunc.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/eo/src/apply.h b/eo/src/apply.h index 7845c14e..b13c434e 100644 --- a/eo/src/apply.h +++ b/eo/src/apply.h @@ -91,7 +91,7 @@ void parallelApply( std::vector& _pop, eo::mpi::AssignmentAlgorithm& _algo, int _masterRank, - eo::mpi::ParallelEvalStore & _store ) + eo::mpi::ParallelApplyStore & _store ) { _store.data( _pop ); _algo.reinit( _pop.size() ); diff --git a/eo/src/eoPopEvalFunc.h b/eo/src/eoPopEvalFunc.h index d37eebaf..c811e3a5 100644 --- a/eo/src/eoPopEvalFunc.h +++ b/eo/src/eoPopEvalFunc.h @@ -96,14 +96,15 @@ class eoParallelPopLoopEval : public eoPopEvalFunc masterRank( _masterRank ), needToDeleteStore( true ) { - store = new eo::mpi::ParallelEvalStore( _eval, _masterRank, _packetSize ); + // FIXME memory leak because of new. + store = new eo::mpi::ParallelApplyStore( _eval, _masterRank, _packetSize ); } eoParallelPopLoopEval( // Job parameters eo::mpi::AssignmentAlgorithm& _assignAlgo, int _masterRank, - eo::mpi::ParallelEvalStore* _store + eo::mpi::ParallelApplyStore* _store ) : assignAlgo( _assignAlgo ), masterRank( _masterRank ), @@ -138,7 +139,7 @@ class eoParallelPopLoopEval : public eoPopEvalFunc eo::mpi::AssignmentAlgorithm & assignAlgo; int masterRank; - eo::mpi::ParallelEvalStore* store; + eo::mpi::ParallelApplyStore* store; bool needToDeleteStore; }; #endif From 4ab5b2fd8983eb24be122ac009e46bc9a6876cf7 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Wed, 11 Jul 2012 11:41:10 +0200 Subject: [PATCH 06/19] Taking time measures only if parallel.doMeasure() is true. --- eo/src/utils/eoTimer.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/eo/src/utils/eoTimer.h b/eo/src/utils/eoTimer.h index c97ab5d3..8aa09c51 100644 --- a/eo/src/utils/eoTimer.h +++ b/eo/src/utils/eoTimer.h @@ -7,6 +7,8 @@ # include # include +# include "utils/eoParallel.h" + # ifdef WITH_MPI # include # include @@ -119,16 +121,22 @@ class eoTimerStat void start( const std::string & key ) { - _timers[ key ].restart(); + if( eo::parallel.doMeasure() ) + { + _timers[ key ].restart(); + } } void stop( const std::string& key ) { - Stat & s = _stats[ key ]; - eoTimer & t = _timers[ key ]; - s.utime.push_back( t.usertime() ); - s.stime.push_back( t.systime() ); - s.wtime.push_back( t.wallclock() ); + if( eo::parallel.doMeasure() ) + { + Stat & s = _stats[ key ]; + eoTimer & t = _timers[ key ]; + s.utime.push_back( t.usertime() ); + s.stime.push_back( t.systime() ); + s.wtime.push_back( t.wallclock() ); + } } std::map< std::string, Stat > stats() From 12560899ae6b9546b43297bc64039baae706631a Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Wed, 11 Jul 2012 12:00:36 +0200 Subject: [PATCH 07/19] Updating MPI tests. --- eo/test/mpi/eval.cpp | 8 +++----- eo/test/mpi/multipleRoles.cpp | 9 +++++++-- eo/test/mpi/parallelApply.cpp | 6 ++++-- eo/test/mpi/wrapper.cpp | 5 ++++- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/eo/test/mpi/eval.cpp b/eo/test/mpi/eval.cpp index 312c9d65..72fca755 100644 --- a/eo/test/mpi/eval.cpp +++ b/eo/test/mpi/eval.cpp @@ -11,8 +11,6 @@ #include -#include - #include #include @@ -144,10 +142,10 @@ int main(int ac, char** av) eo::log << "Size of population : " << popSize << std::endl; - eo::mpi::ParallelEvalStore< EOT > store( eval, eo::mpi::DEFAULT_MASTER ); + eo::mpi::ParallelApplyStore< EOT > store( eval, eo::mpi::DEFAULT_MASTER ); store.wrapHandleResponse( new CatBestAnswers ); - eoParallelPopLoopEval< EOT > popEval( eval, assign, &store, eo::mpi::DEFAULT_MASTER, 3 ); + eoParallelPopLoopEval< EOT > popEval( assign, eo::mpi::DEFAULT_MASTER, &store ); eo::log << eo::quiet << "Before first evaluation." << std::endl; popEval( pop, pop ); eo::log << eo::quiet << "After first evaluation." << std::endl; @@ -160,7 +158,7 @@ int main(int ac, char** av) } else { eoPop< EOT > pop( popSize, init ); - eoParallelPopLoopEval< EOT > popEval( eval, assign, eo::mpi::DEFAULT_MASTER, 3 ); + eoParallelPopLoopEval< EOT > popEval( assign, eo::mpi::DEFAULT_MASTER, eval ); popEval( pop, pop ); } diff --git a/eo/test/mpi/multipleRoles.cpp b/eo/test/mpi/multipleRoles.cpp index ce10f621..232a6110 100644 --- a/eo/test/mpi/multipleRoles.cpp +++ b/eo/test/mpi/multipleRoles.cpp @@ -1,5 +1,6 @@ # include # include +# include # include @@ -32,9 +33,11 @@ void subtask( vector& v, int rank ) DynamicAssignmentAlgorithm algo( workers ); SubWork sw; - ParallelApplyStore store( sw, v, rank ); + ParallelApplyStore store( sw, rank ); + store.data( v ); ParallelApply job( algo, rank, store ); job.run(); + EmptyJob stop( algo, rank ); } struct Work: public eoUF< vector&, void > @@ -74,11 +77,13 @@ int main(int argc, char** argv) { Work w; DynamicAssignmentAlgorithm algo( 1, 2 ); - ParallelApplyStore< vector > store( w, metaV, 0 ); + ParallelApplyStore< vector > store( w, 0 ); + store.data( metaV ); ParallelApply< vector > job( algo, 0, store ); job.run(); if( job.isMaster() ) { + EmptyJob stop( algo, 0 ); v = metaV[0]; cout << "Results : " << endl; for(int i = 0; i < v.size(); ++i) diff --git a/eo/test/mpi/parallelApply.cpp b/eo/test/mpi/parallelApply.cpp index d562ef0e..26b0b30e 100644 --- a/eo/test/mpi/parallelApply.cpp +++ b/eo/test/mpi/parallelApply.cpp @@ -1,5 +1,6 @@ # include # include +# include # include @@ -110,8 +111,8 @@ int main(int argc, char** argv) for( unsigned int i = 0; i < tests.size(); ++i ) { - ParallelApplyStore< int > store( plusOneInstance, v, eo::mpi::DEFAULT_MASTER, 3 ); - // Job< JobData > job( *(tests[i].assign), eo::mpi::DEFAULT_MASTER, store ); + ParallelApplyStore< int > store( plusOneInstance, eo::mpi::DEFAULT_MASTER, 3 ); + store.data( v ); ParallelApply< int > job( *(tests[i].assign), eo::mpi::DEFAULT_MASTER, store ); if( job.isMaster() ) @@ -126,6 +127,7 @@ int main(int argc, char** argv) if( job.isMaster() ) { + EmptyJob stop( *(tests[i].assign), eo::mpi::DEFAULT_MASTER ); ++offset; for(int i = 0; i < v.size(); ++i) { diff --git a/eo/test/mpi/wrapper.cpp b/eo/test/mpi/wrapper.cpp index d8b72c98..527c7cc6 100644 --- a/eo/test/mpi/wrapper.cpp +++ b/eo/test/mpi/wrapper.cpp @@ -1,5 +1,6 @@ # include # include +# include # include @@ -60,13 +61,15 @@ int main(int argc, char** argv) StaticAssignmentAlgorithm assign( v.size() ); - ParallelApplyStore< int > store( plusOneInstance, v, eo::mpi::DEFAULT_MASTER, 1 ); + ParallelApplyStore< int > store( plusOneInstance, eo::mpi::DEFAULT_MASTER, 1 ); + store.data( v ); store.wrapIsFinished( new ShowWrappedResult ); ParallelApply job( assign, eo::mpi::DEFAULT_MASTER, store ); // Equivalent to: // Job< ParallelApplyData > job( assign, 0, store ); job.run(); + EmptyJob stop( assign, eo::mpi::DEFAULT_MASTER ); if( job.isMaster() ) { From 60fff427fedb40afd6009a84441ce52ad9ad42a7 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Thu, 12 Jul 2012 13:56:54 +0200 Subject: [PATCH 08/19] Documentation of serialization, utils/eoTimer and eoPopEvalFunc. --- eo/src/apply.h | 21 --- eo/src/eoEasyEA.h | 70 ++++----- eo/src/eoPopEvalFunc.h | 81 ++++++++++- eo/src/serial/Array.cpp | 70 +++++---- eo/src/serial/Array.h | 267 +++++++++++++++++++---------------- eo/src/serial/Entity.h | 39 ++++- eo/src/serial/Object.cpp | 74 ++++++---- eo/src/serial/Object.h | 111 +++++++++------ eo/src/serial/Parser.cpp | 24 +++- eo/src/serial/Parser.h | 25 ++++ eo/src/serial/Serializable.h | 80 +++++++---- eo/src/serial/String.cpp | 21 +++ eo/src/serial/String.h | 140 ++++++++++-------- eo/src/serial/Utils.h | 25 +++- eo/src/serial/eoSerial.h | 21 +++ eo/src/utils/eoParallel.cpp | 4 +- eo/src/utils/eoParallel.h | 3 +- eo/src/utils/eoTimer.h | 198 ++++++++++++++++++++++---- 18 files changed, 856 insertions(+), 418 deletions(-) diff --git a/eo/src/apply.h b/eo/src/apply.h index b13c434e..939e62bd 100644 --- a/eo/src/apply.h +++ b/eo/src/apply.h @@ -33,12 +33,6 @@ #include #include -# ifdef WITH_MPI -# include -# include -# include -# endif // WITH_MPI - /** Applies a unary function to a std::vector of things. @@ -85,21 +79,6 @@ void apply(eoUF& _proc, std::vector& _pop) #endif // !_OPENMP } -#ifdef WITH_MPI -template -void parallelApply( - std::vector& _pop, - eo::mpi::AssignmentAlgorithm& _algo, - int _masterRank, - eo::mpi::ParallelApplyStore & _store ) -{ - _store.data( _pop ); - _algo.reinit( _pop.size() ); - eo::mpi::ParallelApply job( _algo, _masterRank, _store ); - job.run(); -} -#endif - /** This is a variant of apply which is called in parallel thanks to OpenMP. diff --git a/eo/src/eoEasyEA.h b/eo/src/eoEasyEA.h index 4d74932f..4d9c7b6d 100644 --- a/eo/src/eoEasyEA.h +++ b/eo/src/eoEasyEA.h @@ -37,8 +37,6 @@ #include #include - - template class eoIslandsEasyEA ; template class eoDistEvalEasyEA ; @@ -106,7 +104,7 @@ template class eoEasyEA: public eoAlgo * @brief Ctor allowing to specify which pop eval function we're going to use. * * Ctor taking a breed and merge, an overload of ctor to define an offspring size, and - * the pop eval function used. This allows to precise if we would like to use the + * the pop eval function used. This allows to precise if we would like to use the * parallel evaluation, for instance. */ eoEasyEA( @@ -247,57 +245,43 @@ template class eoEasyEA: public eoAlgo virtual void operator()(eoPop& _pop) { - eo::log << "[EasyEA] Call to operator()" << std::endl; - if (isFirstCall) - { - size_t total_capacity = _pop.capacity() + offspring.capacity(); - _pop.reserve(total_capacity); - offspring.reserve(total_capacity); - isFirstCall = false; - } - - // TODO TODOB delete all log traces - std::cout << "[EasyEA] After is first call." << std::endl; - - eoPop empty_pop; - std::cout << "[EasyEA] After empty_pop." << std::endl; - - popEval(empty_pop, _pop); // A first eval of pop. - std::cout << "[EasyEA] After pop_eval." << std::endl; - - do + if (isFirstCall) { - try + size_t total_capacity = _pop.capacity() + offspring.capacity(); + _pop.reserve(total_capacity); + offspring.reserve(total_capacity); + isFirstCall = false; + } + + eoPop empty_pop; + + do + { + try { - std::cout << "[EasyEA] Beginning try." << std::endl; - unsigned pSize = _pop.size(); - std::cout << "[EasyEA] psize determinated." << std::endl; - offspring.clear(); // new offspring - std::cout << "[EasyEA] offspring cleared." << std::endl; + unsigned pSize = _pop.size(); - breed(_pop, offspring); + offspring.clear(); // new offspring - std::cout << "[EasyEA] After breed, evaluating pop." << std::endl; - popEval(_pop, offspring); // eval of parents + offspring if necessary + breed(_pop, offspring); - std::cout << "[EasyEA] After evaluation, replacing pop." << std::endl; - replace(_pop, offspring); // after replace, the new pop. is in _pop - std::cout << "[EasyEA] After replacing, continuator." << std::endl; + popEval(_pop, offspring); // eval of parents + offspring if necessary - if (pSize > _pop.size()) - throw std::runtime_error("Population shrinking!"); - else if (pSize < _pop.size()) - throw std::runtime_error("Population growing!"); + replace(_pop, offspring); // after replace, the new pop. is in _pop + if (pSize > _pop.size()) + throw std::runtime_error("Population shrinking!"); + else if (pSize < _pop.size()) + throw std::runtime_error("Population growing!"); } - catch (std::exception& e) + catch (std::exception& e) { - std::string s = e.what(); - s.append( " in eoEasyEA"); - throw std::runtime_error( s ); + std::string s = e.what(); + s.append( " in eoEasyEA"); + throw std::runtime_error( s ); } } - while ( continuator( _pop ) ); + while ( continuator( _pop ) ); } protected : diff --git a/eo/src/eoPopEvalFunc.h b/eo/src/eoPopEvalFunc.h index c811e3a5..2cc86807 100644 --- a/eo/src/eoPopEvalFunc.h +++ b/eo/src/eoPopEvalFunc.h @@ -30,6 +30,13 @@ #include #include +# ifdef WITH_MPI +#include +#include +#include +#include +# endif // WITH_MPI + /** eoPopEvalFunc: This abstract class is for GLOBAL evaluators * of a population after variation. * It takes 2 populations (typically the parents and the offspring) @@ -78,12 +85,43 @@ private: }; #ifdef WITH_MPI -// TODO TODOB commenter +/** + * @brief Evaluator of a population of EOT which uses parallelization to evaluate individuals. + * + * This class implements an instance of eoPopEvalFunc that applies a private eoEvalFunc to + * all offspring, but in a parallel way. The original process becomes the central host from a network ("master"), and + * other machines disponible in the MPI network ("slaves") are used as evaluators. Population to evaluate is splitted in + * little packets of individuals, which are sent to the evaluators, that process the effective call to eval. Once all + * the individuals have been evaluated, they are returned to the master. The whole process is entirely invisible to the + * eyes of the user, who just has to launch a certain number of processes in MPI so as to have a result. + * + * The eoEvalFunc is no more directly given, but it is stored in the eo::mpi::ParallelApplyStore, which can be + * instanciated if no one is given at construction. + * + * The use of this class requires the user to have called the eo::mpi::Node::init function, at the beginning of its + * program. + * + * @ingroup Evaluation Parallel + * + * @author Benjamin Bouvier + */ template class eoParallelPopLoopEval : public eoPopEvalFunc { public: - /** Ctor: set value of embedded eoEvalFunc */ + /** + * @brief Constructor which creates the job store for the user. + * + * This constructor is the simplest to use, as it creates the store used by the parallel job, for the user. + * The user just precises the scheduling algorithm, the rank of the master and then gives its eval function and + * the size of a packet (how many individuals should be in a single message to evaluator). + * + * @param _assignAlgo The scheduling algorithm used to give orders to evaluators. + * @param _masterRank The MPI rank of the master. + * @param _eval The evaluation functor used to evaluate each individual in the population. + * @param _packetSize The number of individuals to send in one message to evaluator, and which are evaluated at + * a time. + */ eoParallelPopLoopEval( // Job parameters eo::mpi::AssignmentAlgorithm& _assignAlgo, @@ -94,12 +132,21 @@ class eoParallelPopLoopEval : public eoPopEvalFunc ) : assignAlgo( _assignAlgo ), masterRank( _masterRank ), - needToDeleteStore( true ) + needToDeleteStore( true ) // we used new, we'll have to use delete (RAII) { - // FIXME memory leak because of new. store = new eo::mpi::ParallelApplyStore( _eval, _masterRank, _packetSize ); } + /** + * @brief Constructor which allows the user to customize its job store. + * + * This constructor allows the user to customize the store, for instance by adding wrappers and other + * functionnalities, before using it for the parallelized evaluation. + * + * @param _assignAlgo The scheduling algorithm used to give orders to evaluators. + * @param _masterRank The MPI rank of the master. + * @param _store Pointer to a parallel eval store given by the user. + */ eoParallelPopLoopEval( // Job parameters eo::mpi::AssignmentAlgorithm& _assignAlgo, @@ -109,37 +156,57 @@ class eoParallelPopLoopEval : public eoPopEvalFunc assignAlgo( _assignAlgo ), masterRank( _masterRank ), store( _store ), - needToDeleteStore( false ) + needToDeleteStore( false ) // we haven't used new for creating store, we don't care if we have to delete it (RAII). { // empty } + /** + * @brief Default destructor. Sends a message to all evaluators indicating that the global loop (eoEasyEA, for + * instance) is over. + */ ~eoParallelPopLoopEval() { + // Only the master has to send the termination message if( eo::mpi::Node::comm().rank() == masterRank ) { eo::mpi::EmptyJob job( assignAlgo, masterRank ); job.run(); } + // RAII if( needToDeleteStore ) { delete store; } } - /** Do the job: simple loop over the offspring */ + /** + * @brief Parallel implementation of the operator(). + * + * @param _parents Population of parents (ignored). + * @param _offspring Population of children, which will be evaluated. + */ void operator()( eoPop & _parents, eoPop & _offspring ) { (void)_parents; - parallelApply(_offspring, assignAlgo, masterRank, *store); + // Reinits the store and the scheduling algorithm + store->data( _offspring ); + assignAlgo.reinit( _offspring.size() ); + // Effectively launches the job. + eo::mpi::ParallelApply job( assignAlgo, masterRank, *store ); + job.run(); } private: + // Scheduling algorithm eo::mpi::AssignmentAlgorithm & assignAlgo; + // Master MPI rank int masterRank; + // Store eo::mpi::ParallelApplyStore* store; + // Do we have to delete the store by ourselves ? bool needToDeleteStore; }; #endif diff --git a/eo/src/serial/Array.cpp b/eo/src/serial/Array.cpp index f70b00e8..180aad16 100644 --- a/eo/src/serial/Array.cpp +++ b/eo/src/serial/Array.cpp @@ -1,38 +1,58 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # include "Array.h" namespace eoserial { - -std::ostream& Array::print( std::ostream& out ) const -{ - out << "["; - bool first = true; - for (ArrayChildren::const_iterator it = begin(), - end = this->end(); - it != end; - ++it) + std::ostream& Array::print( std::ostream& out ) const { - if ( first ) + out << "["; + bool first = true; + for (ArrayChildren::const_iterator it = begin(), + end = this->end(); + it != end; + ++it) { - first = false; - } else { - out << ", "; + if ( first ) + { + first = false; + } else { + out << ", "; + } + (*it)->print( out ); } - (*it)->print( out ); + out << "]\n"; + return out; } - out << "]\n"; - return out; -} -Array::~Array() -{ - for (ArrayChildren::iterator it = begin(), - end = this->end(); - it != end; - ++it) + Array::~Array() { - delete *it; + for (ArrayChildren::iterator it = begin(), + end = this->end(); + it != end; + ++it) + { + delete *it; + } } -} } // namespace eoserial diff --git a/eo/src/serial/Array.h b/eo/src/serial/Array.h index 69231980..d453add9 100644 --- a/eo/src/serial/Array.h +++ b/eo/src/serial/Array.h @@ -1,148 +1,169 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EOSERIAL_ARRAY_H__ # define __EOSERIAL_ARRAY_H__ # include -# include # include "Entity.h" # include "Serializable.h" - # include "Object.h" -# include "String.h" namespace eoserial { -// Forward declaration for below declarations. -class Array; - -/* - * Declarations of functions present in Utils.h - * These are put here to avoid instead of including the file Utils.h, which would - * cause a circular inclusion. - */ -template< class T > -void unpack( const Array & array, unsigned int index, T & value ); - -void unpackObject( const Array & array, unsigned int index, Persistent & value ); - -template< class Container, template class UnpackAlgorithm > -void unpackArray( const Array & array, unsigned int index, Container & container ); - -/** - * @brief Represents a JSON array. - * - * Wrapper for an array, so as to be used as a JSON object. - */ -class Array : public eoserial::Entity, public std::vector< eoserial::Entity* > -{ -protected: - typedef std::vector< eoserial::Entity* > ArrayChildren; - -public: - /** - * @brief Adds the serializable object as a JSON object. - * @param obj Object which implemnets JsonSerializable. - */ - void push_back( const eoserial::Printable* obj ) - { - ArrayChildren::push_back( obj->pack() ); - } - - /** - * @brief Proxy for vector::push_back. - */ - void push_back( eoserial::Entity* json ) - { - ArrayChildren::push_back( json ); - } - - /** - * @brief Prints the JSON array into the given stream. - * @param out The stream - */ - virtual std::ostream& print( std::ostream& out ) const; - - /** - * @brief Dtor - */ - ~Array(); + // Forward declaration for below declarations. + class Array; /* - * The following parts allows the user to automatically deserialize an eoserial::Array into a - * standard container, by giving the algorithm which will be used to deserialize contained entities. + * Declarations of functions present in Utils.h + * These are put here to avoid instead of including the file Utils.h, which would + * cause a circular inclusion. */ + template< class T > + void unpack( const Array & array, unsigned int index, T & value ); + + void unpackObject( const Array & array, unsigned int index, Persistent & value ); + + template< class Container, template class UnpackAlgorithm > + void unpackArray( const Array & array, unsigned int index, Container & container ); + /** - * @brief Functor which determines how to retrieve the real value contained in a eoserial::Entity at - * a given place. + * @brief Represents a JSON array. * - * It will be applied for each contained variable in the array. + * Wrapper for an array, so as to be used as a JSON object. + * + * @ingroup Serialization */ - template - struct BaseAlgorithm + class Array : public eoserial::Entity, public std::vector< eoserial::Entity* > { - /** - * @brief Main operator. - * - * @param array The eoserial::Array from which we're reading. - * @param i The index of the contained value. - * @param container The standard (STL) container in which we'll push back the read value. - */ - virtual void operator()( const eoserial::Array& array, unsigned int i, Container & container ) const = 0; + protected: + typedef std::vector< eoserial::Entity* > ArrayChildren; + + public: + /** + * @brief Adds the serializable object as a JSON object. + * @param obj Object which implemnets JsonSerializable. + */ + void push_back( const eoserial::Printable* obj ) + { + ArrayChildren::push_back( obj->pack() ); + } + + /** + * @brief Proxy for vector::push_back. + */ + void push_back( eoserial::Entity* json ) + { + ArrayChildren::push_back( json ); + } + + /** + * @brief Prints the JSON array into the given stream. + * @param out The stream + */ + virtual std::ostream& print( std::ostream& out ) const; + + /** + * @brief Dtor + */ + ~Array(); + + /* + * The following parts allows the user to automatically deserialize an eoserial::Array into a + * standard container, by giving the algorithm which will be used to deserialize contained entities. + */ + + /** + * @brief Functor which determines how to retrieve the real value contained in a eoserial::Entity at + * a given place. + * + * It will be applied for each contained variable in the array. + */ + template + struct BaseAlgorithm + { + /** + * @brief Main operator. + * + * @param array The eoserial::Array from which we're reading. + * @param i The index of the contained value. + * @param container The standard (STL) container in which we'll push back the read value. + */ + virtual void operator()( const eoserial::Array& array, unsigned int i, Container & container ) const = 0; + }; + + /** + * @brief BaseAlgorithm for retrieving primitive variables. + * + * This one should be used to retrieve primitive (and types which implement operator>>) variables, for instance + * int, double, std::string, etc... + */ + template + struct UnpackAlgorithm : public BaseAlgorithm + { + void operator()( const eoserial::Array& array, unsigned int i, C & container ) const + { + typename C::value_type t; + unpack( array, i, t ); + container.push_back( t ); + } + }; + + /** + * @brief BaseAlgorithm for retrieving eoserial::Persistent objects. + * + * This one should be used to retrieve objects which implement eoserial::Persistent. + */ + template + struct UnpackObjectAlgorithm : public BaseAlgorithm + { + void operator()( const eoserial::Array& array, unsigned int i, C & container ) const + { + typename C::value_type t; + unpackObject( array, i, t ); + container.push_back( t ); + } + }; + + /** + * @brief General algorithm for array deserialization. + * + * Applies the BaseAlgorithm to each contained variable in the eoserial::Array. + */ + template class UnpackAlgorithm> + inline void deserialize( Container & array ) + { + UnpackAlgorithm< Container > algo; + for( unsigned int i = 0, size = this->size(); + i < size; + ++i) + { + algo( *this, i, array ); + } + } }; - /** - * @brief BaseAlgorithm for retrieving primitive variables. - * - * This one should be used to retrieve primitive (and types which implement operator>>) variables, for instance - * int, double, std::string, etc... - */ - template - struct UnpackAlgorithm : public BaseAlgorithm - { - void operator()( const eoserial::Array& array, unsigned int i, C & container ) const - { - typename C::value_type t; - unpack( array, i, t ); - container.push_back( t ); - } - }; - - /** - * @brief BaseAlgorithm for retrieving eoserial::Persistent objects. - * - * This one should be used to retrieve objects which implement eoserial::Persistent. - */ - template - struct UnpackObjectAlgorithm : public BaseAlgorithm - { - void operator()( const eoserial::Array& array, unsigned int i, C & container ) const - { - typename C::value_type t; - unpackObject( array, i, t ); - container.push_back( t ); - } - }; - - /** - * @brief General algorithm for array deserialization. - * - * Applies the BaseAlgorithm to each contained variable in the eoserial::Array. - */ - template class UnpackAlgorithm> - inline void deserialize( Container & array ) - { - UnpackAlgorithm< Container > algo; - for( unsigned int i = 0, size = this->size(); - i < size; - ++i) - { - algo( *this, i, array ); - } - } -}; - } // namespace eoserial # endif // __EOSERIAL_ARRAY_H__ diff --git a/eo/src/serial/Entity.h b/eo/src/serial/Entity.h index df10002d..6fec3485 100644 --- a/eo/src/serial/Entity.h +++ b/eo/src/serial/Entity.h @@ -1,9 +1,42 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EOSERIAL_ENTITY_H__ # define __EOSERIAL_ENTITY_H__ -# include -# include +# include // ostream +/** + * @brief Contains all the necessary entities to serialize eo objects into JSON objects. + * + * Allows serialization from user objects into JSON objects, if they implement the interface + * eoserial::Serializable or eoserial::Persistent. The following user objects can be serialized: + * - primitive types (int, std::string, ...), in particular every type that can be written into a + * std::stringstream. + * - objects which implement eoserial::Serializable. + * - array of serializable things (primitive or serializable objects). + * + * @ingroup Utilities + * @defgroup Serialization Serialization helpers + */ namespace eoserial { @@ -12,6 +45,8 @@ namespace eoserial * * This class represents a JSON entity, which can be JSON objects, * strings or arrays. It is the base class for the JSON hierarchy. + * + * @ingroup Serialization */ class Entity { diff --git a/eo/src/serial/Object.cpp b/eo/src/serial/Object.cpp index 3e557a12..dd859052 100644 --- a/eo/src/serial/Object.cpp +++ b/eo/src/serial/Object.cpp @@ -1,40 +1,60 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # include "Object.h" using namespace eoserial; namespace eoserial { - -std::ostream& Object::print( std::ostream& out ) const -{ - out << '{'; - bool first = true; - for(JsonValues::const_iterator it = begin(), end = this->end(); - it != end; - ++it) + std::ostream& Object::print( std::ostream& out ) const { - if ( first ) + out << '{'; + bool first = true; + for(JsonValues::const_iterator it = begin(), end = this->end(); + it != end; + ++it) { - first = false; - } else { - out << ", "; - } + if ( first ) + { + first = false; + } else { + out << ", "; + } - out << '"' << it->first << "\":"; // key - it->second->print( out ); // value + out << '"' << it->first << "\":"; // key + it->second->print( out ); // value + } + out << "}\n"; + return out; + } + + Object::~Object() + { + for(JsonValues::iterator it = begin(), end = this->end(); + it != end; + ++it) + { + delete it->second; } - out << "}\n"; - return out; -} - -Object::~Object() -{ - for(JsonValues::iterator it = begin(), end = this->end(); - it != end; - ++it) - { - delete it->second; } -} } // namespace eoserial diff --git a/eo/src/serial/Object.h b/eo/src/serial/Object.h index 36769252..20fbdbf7 100644 --- a/eo/src/serial/Object.h +++ b/eo/src/serial/Object.h @@ -1,66 +1,87 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EOSERIAL_OBJECT_H__ # define __EOSERIAL_OBJECT_H__ # include # include -# include # include "Entity.h" # include "Serializable.h" namespace eoserial { - -/** - * @brief JSON Object - * - * This class represents a JSON object, which is basically a dictionnary - * of keys (strings) and values (JSON entities). - */ -class Object : public eoserial::Entity, public std::map< std::string, eoserial::Entity* > -{ -public: - typedef std::map JsonValues; - /** - * @brief Adds a pair into the JSON object. - * @param key The key associated with the eoserial object - * @param eoserial The JSON object as created with framework. + * @brief JSON Object + * + * This class represents a JSON object, which is basically a dictionnary + * of keys (strings) and values (JSON entities). + * + * @ingroup Serialization */ - void add( const std::string& key, eoserial::Entity* json ) + class Object : public eoserial::Entity, public std::map< std::string, eoserial::Entity* > { - (*this)[ key ] = json; - } + public: + typedef std::map JsonValues; - /** - * @brief Adds a pair into the JSON object. - * @param key The key associated with the eoserial object - * @param obj A JSON-serializable object - */ - void add( const std::string& key, const eoserial::Printable* obj ) - { - (*this)[ key ] = obj->pack(); - } + /** + * @brief Adds a pair into the JSON object. + * @param key The key associated with the eoserial object + * @param eoserial The JSON object as created with framework. + */ + void add( const std::string& key, eoserial::Entity* json ) + { + (*this)[ key ] = json; + } - /** - * @brief Deserializes a Serializable class instance from this JSON object. - * @param obj The object we want to rebuild. - */ - void deserialize( eoserial::Persistent & obj ) - { - obj.unpack( this ); - } + /** + * @brief Adds a pair into the JSON object. + * @param key The key associated with the eoserial object + * @param obj A JSON-serializable object + */ + void add( const std::string& key, const eoserial::Printable* obj ) + { + (*this)[ key ] = obj->pack(); + } - /** - * @brief Dtor - */ - ~Object(); + /** + * @brief Deserializes a Serializable class instance from this JSON object. + * @param obj The object we want to rebuild. + */ + void deserialize( eoserial::Persistent & obj ) + { + obj.unpack( this ); + } - /** - * @brief Prints the content of a JSON object into a stream. - */ - virtual std::ostream& print( std::ostream& out ) const; -}; + /** + * @brief Dtor + */ + ~Object(); + + /** + * @brief Prints the content of a JSON object into a stream. + */ + virtual std::ostream& print( std::ostream& out ) const; + }; } // namespace eoserial # endif // __EOSERIAL_OBJECT_H__ diff --git a/eo/src/serial/Parser.cpp b/eo/src/serial/Parser.cpp index c7822d29..258174c8 100644 --- a/eo/src/serial/Parser.cpp +++ b/eo/src/serial/Parser.cpp @@ -1,7 +1,25 @@ -# include +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # include -# include -# include # include "Parser.h" diff --git a/eo/src/serial/Parser.h b/eo/src/serial/Parser.h index f0a94ee2..5bcdc597 100644 --- a/eo/src/serial/Parser.h +++ b/eo/src/serial/Parser.h @@ -1,3 +1,24 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EOSERIAL_PARSER_H__ # define __EOSERIAL_PARSER_H__ @@ -6,6 +27,8 @@ # include "Object.h" /** + * @file Parser.h + * * This file contains a tiny JSON parser used in DAE. This parser just handles * a subset of JSON grammar, with the following restrictions : * - all strings must be surrounded by double quotes. @@ -26,6 +49,8 @@ namespace eoserial * This parser does just retrieve values and does NOT check the structure of * the input. This implies that if the input is not correct, the result is undefined * and can result to a failure on execution. + * + * @ingroup Serialization */ class Parser { diff --git a/eo/src/serial/Serializable.h b/eo/src/serial/Serializable.h index 482a918a..715e9c97 100644 --- a/eo/src/serial/Serializable.h +++ b/eo/src/serial/Serializable.h @@ -1,43 +1,65 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EOSERIAL_SERIALIZABLE_H__ # define __EOSERIAL_SERIALIZABLE_H__ -# include - namespace eoserial { + class Object; // to avoid recursive inclusion with JsonObject -class Object; // to avoid recursive inclusion with JsonObject - -/** - * @brief Interface showing that object can be written to a eoserial type - * (currently JSON). - */ -class Printable -{ -public: /** - * @brief Serializes the object to JSON format. - * @return A JSON object created with new. + * @brief Interface showing that object can be written to a eoserial type + * (currently JSON). + * + * @ingroup Serialization */ - virtual eoserial::Object* pack() const = 0; -}; + class Printable + { + public: + /** + * @brief Serializes the object to JSON format. + * @return A JSON object created with new. + */ + virtual eoserial::Object* pack() const = 0; + }; -/** - * @brief Interface showing that object can be eoserialized (written and read - * from an input). - * - * Note : Persistent objects should have a default non-arguments constructor. - */ -class Persistent : public Printable -{ - public: /** - * @brief Loads class fields from a JSON object. - * @param json A JSON object. Programmer doesn't have to delete it, it - * is automatically done. + * @brief Interface showing that object can be eoserialized (written and read + * from an input). + * + * Note : Persistent objects should have a default non-arguments constructor. + * + * @ingroup Serialization */ - virtual void unpack(const eoserial::Object* json) = 0; -}; + class Persistent : public Printable + { + public: + /** + * @brief Loads class fields from a JSON object. + * @param json A JSON object. Programmer doesn't have to delete it, it + * is automatically done. + */ + virtual void unpack(const eoserial::Object* json) = 0; + }; } // namespace eoserial diff --git a/eo/src/serial/String.cpp b/eo/src/serial/String.cpp index deba05a0..c5088278 100644 --- a/eo/src/serial/String.cpp +++ b/eo/src/serial/String.cpp @@ -1,3 +1,24 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # include "String.h" namespace eoserial diff --git a/eo/src/serial/String.h b/eo/src/serial/String.h index 6d81937f..526cab36 100644 --- a/eo/src/serial/String.h +++ b/eo/src/serial/String.h @@ -1,3 +1,24 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EOSERIAL_STRING_H__ # define __EOSERIAL_STRING_H__ @@ -9,72 +30,73 @@ namespace eoserial { + /** + * @brief JSON String + * + * Wrapper for string, so as to be used as a JSON object. + * + * @ingroup Serialization + */ + class String : public eoserial::Entity, public std::string + { + public: -/** - * @brief JSON String - * - * Wrapper for string, so as to be used as a JSON object. - */ -class String : public eoserial::Entity, public std::string -{ - public: + /** + * @brief Default ctor. + * @param str The string we want to wrap. + */ + String( const std::string& str ) : std::string( str ) {} - /** - * @brief Default ctor. - * @param str The string we want to wrap. - */ - String( const std::string& str ) : std::string( str ) {} + /** + * @brief Ctor used only when parsing. + */ + String( ) {} - /** - * @brief Ctor used only on parsing. - */ - String( ) {} + /** + * @brief Prints out the string. + */ + virtual std::ostream& print( std::ostream& out ) const; - /** - * @brief Prints out the string. - */ - virtual std::ostream& print( std::ostream& out ) const; + /** + * @brief Deserializes the current String into a given primitive type value. + * @param value The value in which we're writing. + */ + template + inline void deserialize( T & value ); - /** - * @brief Deserializes the current String into a given primitive type value. - * @param value The value in which we're writing. - */ - template - inline void deserialize( T & value ); + protected: + // Copy and reaffectation are forbidden + explicit String( const String& _ ); + String& operator=( const String& _ ); + }; - protected: - // Copy and reaffectation are forbidden - explicit String( const String& _ ); - String& operator=( const String& _ ); -}; + /** + * @brief Casts a eoserial::String into a primitive value, or in a type which at + * least overload operator>>. + * + * @param value A reference to the variable we're writing into. + * + * It's not necessary to specify the variable type, which can be infered by compiler when + * invoking. + */ + template + inline void String::deserialize( T & value ) + { + std::stringstream ss; + ss.precision(std::numeric_limits::digits10 + 1); + ss << *this; + ss >> value; + } -/** - * @brief Casts a eoserial::String into a primitive value, or in a type which at - * least overload operator>>. - * - * @param value A reference to the variable we're writing into. - * - * It's not necessary to specify the variable type, which can be infered by compiler when - * invoking. - */ -template -inline void String::deserialize( T & value ) -{ - std::stringstream ss; - ss.precision(std::numeric_limits::digits10 + 1); - ss << *this; - ss >> value; -} - -/** - * @brief Specialization for strings, which don't need to be converted through - * a stringstream. - */ -template<> -inline void String::deserialize( std::string & value ) -{ - value = *this; -} + /** + * @brief Specialization for strings, which don't need to be converted through + * a stringstream. + */ + template<> + inline void String::deserialize( std::string & value ) + { + value = *this; + } } // namespace eoserial diff --git a/eo/src/serial/Utils.h b/eo/src/serial/Utils.h index f40c08d9..33172a74 100644 --- a/eo/src/serial/Utils.h +++ b/eo/src/serial/Utils.h @@ -1,3 +1,24 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EOSERIAL_UTILS_H__ # define __EOSERIAL_UTILS_H__ @@ -7,7 +28,7 @@ namespace eoserial { - /***************************** + /* *************************** * DESERIALIZATION FUNCTIONS * ***************************** These functions are useful for casting eoserial::objects into simple, primitive @@ -54,7 +75,7 @@ namespace eoserial static_cast( array[ index ] )->deserialize< Container, UnpackAlgorithm >( container ); } - /******************************* + /* ***************************** *** SERIALIZATION FUNCTIONS *** ******************************* These functions are useful for casting classic objects and diff --git a/eo/src/serial/eoSerial.h b/eo/src/serial/eoSerial.h index a10f6c01..55a116f0 100644 --- a/eo/src/serial/eoSerial.h +++ b/eo/src/serial/eoSerial.h @@ -1,3 +1,24 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EOSERIAL_HEADERS__ # define __EOSERIAL_HEADERS__ diff --git a/eo/src/utils/eoParallel.cpp b/eo/src/utils/eoParallel.cpp index 41865198..fc57910c 100644 --- a/eo/src/utils/eoParallel.cpp +++ b/eo/src/utils/eoParallel.cpp @@ -21,7 +21,7 @@ Contact: http://eodev.sourceforge.net Authors: -Caner Candan + Caner Candan */ @@ -37,7 +37,7 @@ eoParallel::eoParallel() : _nthreads( 0, "parallelize-nthreads", "Define the number of threads you want to use, nthreads = 0 means you want to use all threads available", '\0' ), _enableResults( false, "parallelize-enable-results", "Enable the generation of results", '\0' ), _doMeasure( false, "parallelize-do-measure", "Do some measures during execution", '\0' ), - _packetSize( 1U, "parallelize-packet-size", "Number of elements which should be sent at a time during a parallel evaluation based on message passing.", '\0'), + _packetSize( 1U, "parallelize-packet-size", "Number of elements which should be sent in a single message during a parallel evaluation based on message passing.", '\0'), _t_start(0) { } diff --git a/eo/src/utils/eoParallel.h b/eo/src/utils/eoParallel.h index 3aae8e43..b812fecc 100644 --- a/eo/src/utils/eoParallel.h +++ b/eo/src/utils/eoParallel.h @@ -20,8 +20,7 @@ Contact: http://eodev.sourceforge.net Authors: -Caner Candan - + Caner Candan */ /** @defgroup Parallel Parallel diff --git a/eo/src/utils/eoTimer.h b/eo/src/utils/eoTimer.h index 8aa09c51..edecce62 100644 --- a/eo/src/utils/eoTimer.h +++ b/eo/src/utils/eoTimer.h @@ -1,38 +1,84 @@ -# ifndef __TIMER_H__ -# define __TIMER_H__ +/* +(c) Thales group, 2012 -# include -# include + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. -# include -# include + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -# include "utils/eoParallel.h" + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ +# ifndef __EO_TIMER_H__ +# define __EO_TIMER_H__ + +# include // time() +# include // rusage() + +# include // std::vector +# include // std::map + +# include "utils/eoParallel.h" // eo::parallel # ifdef WITH_MPI +// For serialization purposes # include # include # include # endif -// TODO TODOB commenter +/** + * @brief Timer allowing to measure time between a start point and a stop point. + * + * This timer allows the user to measure user time, system time and wallclock time + * between two points. Basically, user time is time spent in developer code ; system + * time is time spent during the IO wait and system calls ; wallclock is the difference + * of time we could observe by measuring time with a watch. + * + * @ingroup Utilities + */ class eoTimer { public: + /** + * @brief Default ctor. Begins all the timers. + */ eoTimer() - { - restart(); - } - - void restart() { uuremainder = 0; usremainder = 0; + restart(); + } + + /** + * @brief Restarts all the timers and launch the measure. + */ + void restart() + { wc_start = time(NULL); getrusage( RUSAGE_SELF, &_start ); } + /** + * @brief Measures the user time spent since the last restart(). + * + * If the number of elapsed seconds is zero, the spent milliseconds are + * added to a remainder. If the remainder exceeds one second, it is + * added to the number of elapsed seconds. + * + * @return Number of seconds elapsed in user space. + */ long int usertime() { struct rusage _now; @@ -44,12 +90,21 @@ class eoTimer if( uuremainder > 1000000) { ++result; - uuremainder = 0; + uuremainder -= 1000000; } } return result; } + /** + * @brief Measures the system time spent since the last restart(). + * + * If the number of elapsed seconds is zero, the spent milliseconds are + * added to a remainder. If the remainder exceeds one second, it is + * added to the number of elapsed seconds. + * + * @return Number of seconds elapsed in system (kernel) space. + */ long int systime() { struct rusage _now; @@ -61,28 +116,90 @@ class eoTimer if( usremainder > 1000000) { ++result; - usremainder = 0; + usremainder -= 1000000; } } return result; } + /** + * @brief Measures the wallclock time spent since the last restart(). + * + * @return Number of seconds elapsed, as a double. + */ double wallclock() { return std::difftime( std::time(NULL) , wc_start ); } protected: + // Structure used to measure user and system time. struct rusage _start; + // Remainder (in milliseconds) for user time. long int uuremainder; + // Remainder (in milliseconds) for system time. long int usremainder; + // Structure used to measure wallclock time. time_t wc_start; }; +/** + * @brief Registers a group of statistics, each statistic corresponding to user, system and wallclock times distribution. + * + * This class helps the user to measure time in different parts of an application. A name is associated to a statistic, + * on each call to start() and stop() for this name, a new number is added to the statistic, for each of the three + * measured times. + * + * The statistics are only registered if the option "--parallelized-do-measure" is set to true, which can be checked + * thanks to global object eo::parallel. + * + * This shows how the eoTimerStat can be used : + * @code + * eoTimerStat timerStat; + * timerStat.start("first_point"); + * for( int i = 0; i < 1000; ++i ) + * { + * timerStat.start("single_computation"); + * single_computation( i ); + * timerStat.stop("single_computation"); + * } + * // After this loop, timerStat contains a statistic of key "single_computation" which contains 1000 measures for + * // each type of time. + * timerStat.stop("first_point"); + * // After this line, timerStat contains another statistic of key "first_point" which counted the duration of the + * // whole loop. + * + * int singleComputationUsertimeMean = 0; + * for( int i = 0; i < 1000; ++i ) + * { + * singleComputationUsertimeMean += timerStat.stats()["single_computation"].utime[i]; + * } + * std::cout << "Mean of user time spent in single computation: " << singleComputationUsertimeMean / 1000. << std::endl; + * @endcode + * + * When using MPI, these statistics can be readily be serialized, so as to be sent over a network, for instance. + * + * Implementation details: this eoTimerStat is in fact a map of strings (key) / Stat (value). Stat is an internal + * structure directly defined in the class, which contains three vectors modeling the distributions of the different + * types of elapsed times. Another map of strings (key) / eoTimer (value) allows to effectively retrieve the different + * times. The struct Stat will be exposed to client, which will use its members ; however, + * the client doesn't have anything to do directly with the timer, that's why the two maps are splitted. + * + * @ingroup Utilities + */ class eoTimerStat { public: + /** + * @brief Statistic related to a key (name). + * + * This structure is the value in the map saved in the eoTimerStat. It contains the statistic bound to a key, + * which are the user time distribution, the system time distribution and the wallclock time distribution, as + * std::vector s. + * + * It can readily be serialized with boost when compiling with mpi. + */ struct Stat { std::vector utime; @@ -93,25 +210,27 @@ class eoTimerStat friend class boost::serialization::access; /** - * Serializes the statistique in a boost archive (useful for boost::mpi) + * Serializes the single statistic in a boost archive (useful for boost::mpi). + * Just serializes the 3 vectors. */ template - void serialize( Archive & ar, const unsigned int version ) - { - ar & utime & stime & wtime; - (void) version; // avoid compilation warning - } + void serialize( Archive & ar, const unsigned int version ) + { + ar & utime & stime & wtime; + (void) version; // avoid compilation warning + } # endif }; #ifdef WITH_MPI - // Gives access to boost serialization - friend class boost::serialization::access; + // Gives access to boost serialization + friend class boost::serialization::access; - /** - * Serializes the map of statistics in a boost archive (useful for boost::mpi) - */ - template + /** + * Serializes the timerStat object in a boost archive (useful for boost::mpi). + * Just serializes the map. + */ + template void serialize( Archive & ar, const unsigned int version ) { ar & _stats; @@ -119,6 +238,14 @@ class eoTimerStat } # endif + /** + * @brief Starts a new measure for the given key. + * + * This is only performed if parallel.doMeasure() is true, which is equivalent to the fact that + * parser found "--parallel-do-measure=1" in command line args. + * + * @param key The key of the statistic. + */ void start( const std::string & key ) { if( eo::parallel.doMeasure() ) @@ -127,6 +254,16 @@ class eoTimerStat } } + /** + * @brief Stops the mesure for the given key and saves the elapsed times. + * + * Must follow a call of start with the same key. + * + * This is only performed if parallel.doMeasure() is true, which is equivalent to the fact that + * parser found "--parallel-do-measure=1" in command line args. + * + * @param key The key of the statistic. + */ void stop( const std::string& key ) { if( eo::parallel.doMeasure() ) @@ -139,13 +276,18 @@ class eoTimerStat } } - std::map< std::string, Stat > stats() + /** + * @brief Getter for the statistics map. + */ + std::map< std::string, Stat >& stats() { return _stats; } protected: + // Statistics map: links a key (string) to a statistic. std::map< std::string, Stat > _stats; + // Timers map: links a key to its timer. std::map< std::string, eoTimer > _timers; }; From d711369f12b65969cfb8ad2478996c849922bc4a Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Thu, 12 Jul 2012 14:17:51 +0200 Subject: [PATCH 09/19] Correctly handling remainders when using eoTimer. --- eo/src/utils/eoTimer.h | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/eo/src/utils/eoTimer.h b/eo/src/utils/eoTimer.h index edecce62..21131377 100644 --- a/eo/src/utils/eoTimer.h +++ b/eo/src/utils/eoTimer.h @@ -83,15 +83,22 @@ class eoTimer { struct rusage _now; getrusage( RUSAGE_SELF, &_now ); + long int result = _now.ru_utime.tv_sec - _start.ru_utime.tv_sec; - if( _now.ru_utime.tv_sec == _start.ru_utime.tv_sec ) + long int remainder = _now.ru_utime.tv_usec - _start.ru_utime.tv_usec; + if( remainder >= 0 ) { - uuremainder += _now.ru_utime.tv_usec - _start.ru_utime.tv_usec; - if( uuremainder > 1000000) - { - ++result; - uuremainder -= 1000000; - } + uuremainder += remainder; + } else + { + uuremainder += ( 1000000 - remainder ); + --result; + } + + if( uuremainder >= 1000000 ) + { + uuremainder -= 1000000; + ++result; } return result; } @@ -109,15 +116,22 @@ class eoTimer { struct rusage _now; getrusage( RUSAGE_SELF, &_now ); + long int result = _now.ru_stime.tv_sec - _start.ru_stime.tv_sec; - if( _now.ru_stime.tv_sec == _start.ru_stime.tv_sec ) + long int remainder = _now.ru_stime.tv_usec - _start.ru_stime.tv_usec; + if( remainder >= 0 ) { - usremainder += _now.ru_stime.tv_usec - _start.ru_stime.tv_usec; - if( usremainder > 1000000) - { - ++result; - usremainder -= 1000000; - } + usremainder += remainder; + } else + { + usremainder += ( 1000000 - remainder ); + --result; + } + + if( usremainder >= 1000000 ) + { + usremainder -= 1000000; + ++result; } return result; } From f963a15fbe23ad8e3cb4c279ac0f557ddbe70e83 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Thu, 12 Jul 2012 19:15:07 +0200 Subject: [PATCH 10/19] Fixing assignment algorithm, who gave too many assignments by worker. Now, when using static assignment, the optimal size of packet (so as each worker has to process only one packet of data) is sent. --- eo/src/eoPopEvalFunc.h | 10 +++++++++- eo/src/mpi/eoMpiAssignmentAlgorithm.h | 3 +++ eo/src/mpi/eoParallelApply.h | 14 ++++---------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/eo/src/eoPopEvalFunc.h b/eo/src/eoPopEvalFunc.h index 2cc86807..b96e8cac 100644 --- a/eo/src/eoPopEvalFunc.h +++ b/eo/src/eoPopEvalFunc.h @@ -35,6 +35,9 @@ #include #include #include +#include + +#include // ceil # endif // WITH_MPI /** eoPopEvalFunc: This abstract class is for GLOBAL evaluators @@ -192,7 +195,12 @@ class eoParallelPopLoopEval : public eoPopEvalFunc (void)_parents; // Reinits the store and the scheduling algorithm store->data( _offspring ); - assignAlgo.reinit( _offspring.size() ); + // For static scheduling, it's mandatory to reinit attributions + int nbWorkers = assignAlgo.availableWorkers(); + assignAlgo.reinit( nbWorkers ); + if( ! eo::parallel.isDynamic() ) { + store->data()->packetSize = ceil( static_cast( _offspring.size() ) / nbWorkers ); + } // Effectively launches the job. eo::mpi::ParallelApply job( assignAlgo, masterRank, *store ); job.run(); diff --git a/eo/src/mpi/eoMpiAssignmentAlgorithm.h b/eo/src/mpi/eoMpiAssignmentAlgorithm.h index 0630a6f7..85177ea0 100644 --- a/eo/src/mpi/eoMpiAssignmentAlgorithm.h +++ b/eo/src/mpi/eoMpiAssignmentAlgorithm.h @@ -137,7 +137,10 @@ namespace eo { unsigned int nbWorkers = workers.size(); freeWorkers = nbWorkers; + + attributions.clear(); attributions.reserve( nbWorkers ); + busy.clear(); busy.resize( nbWorkers, false ); // Let be the euclidean division of runs by nbWorkers : diff --git a/eo/src/mpi/eoParallelApply.h b/eo/src/mpi/eoParallelApply.h index 9598b952..d0435cea 100644 --- a/eo/src/mpi/eoParallelApply.h +++ b/eo/src/mpi/eoParallelApply.h @@ -37,8 +37,6 @@ namespace eo { size = _pop->size(); } - - tempArray = new EOT[ _packetSize ]; } void init( std::vector& _pop ) @@ -49,11 +47,6 @@ namespace eo assignedTasks.clear(); } - ~ParallelApplyData() - { - delete [] tempArray; - } - std::vector& data() { return *_data; @@ -65,7 +58,7 @@ namespace eo int size; std::map< int /* worker rank */, ParallelApplyAssignment /* min indexes in vector */> assignedTasks; int packetSize; - EOT* tempArray; + std::vector tempArray; int masterRank; bmpi::communicator& comm; @@ -140,14 +133,15 @@ namespace eo int recvSize; d->comm.recv( d->masterRank, 1, recvSize ); - d->comm.recv( d->masterRank, 1, d->tempArray, recvSize ); + d->tempArray.resize( recvSize ); + d->comm.recv( d->masterRank, 1, & d->tempArray[0] , recvSize ); timerStat.start("worker_processes"); for( int i = 0; i < recvSize ; ++i ) { d->func( d->tempArray[ i ] ); } timerStat.stop("worker_processes"); - d->comm.send( d->masterRank, 1, d->tempArray, recvSize ); + d->comm.send( d->masterRank, 1, & d->tempArray[0], recvSize ); } }; From e7a48b8afb7b617c1f074e14492e98abca1c0745 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Thu, 12 Jul 2012 19:15:52 +0200 Subject: [PATCH 11/19] Using dynamic scheduling by default --- eo/src/utils/eoParallel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eo/src/utils/eoParallel.cpp b/eo/src/utils/eoParallel.cpp index fc57910c..b47d3f82 100644 --- a/eo/src/utils/eoParallel.cpp +++ b/eo/src/utils/eoParallel.cpp @@ -32,7 +32,7 @@ Authors: eoParallel::eoParallel() : _isEnabled( false, "parallelize-loop", "Enable memory shared parallelization into evaluation's loops", '\0' ), - _isDynamic( false, "parallelize-dynamic", "Enable dynamic memory shared parallelization", '\0' ), + _isDynamic( true, "parallelize-dynamic", "Enable dynamic memory shared parallelization", '\0' ), _prefix( "results", "parallelize-prefix", "Here's the prefix filename where the results are going to be stored", '\0' ), _nthreads( 0, "parallelize-nthreads", "Define the number of threads you want to use, nthreads = 0 means you want to use all threads available", '\0' ), _enableResults( false, "parallelize-enable-results", "Enable the generation of results", '\0' ), From 1eac497c4d4233e992d4dfb603e9845d5b7f8310 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 13 Jul 2012 11:54:21 +0200 Subject: [PATCH 12/19] Documentating eoMpi.h --- eo/src/mpi/eoMpi.h | 390 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 371 insertions(+), 19 deletions(-) diff --git a/eo/src/mpi/eoMpi.h b/eo/src/mpi/eoMpi.h index fc419b13..d4e0349e 100644 --- a/eo/src/mpi/eoMpi.h +++ b/eo/src/mpi/eoMpi.h @@ -1,10 +1,28 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EO_MPI_H__ # define __EO_MPI_H__ -# include -# include -# include -# include +# include // std::vector # include # include @@ -14,37 +32,196 @@ # include "eoMpiNode.h" # include "eoMpiAssignmentAlgorithm.h" -// TODO TODOB comment! - namespace eo { + /** + * @ingroup Parallel + * @defgroup MPI Message Passing Interface parallelization + */ + + /** + * @brief MPI parallelization helpers for EO. + * + * This namespace contains parallelization functions which help to parallelize computations in EO. It is based on a + * generic algorithm, which is then customized with functors, corresponding to the algorithm main steps. These + * computations are centralized, i.e there is one central host whose role is to handle the steps of the algorithm ; + * we call it the "master". The other hosts just have to perform a "dummy" computation, which may be any kind of + * processing ; we call them, the "slaves", or less pejoratively, the "workers". Workers can communicate to each + * other, but they receive their orders from the Master and send him back some results. A worker can also be the + * master of a different parallelization process, as soon as it is a part of its work. Machines of the network, also + * called hosts, are identified by an unique number: their rank. At any time during the execution of the program, + * all the hosts know the total number of hosts. + * + * A parallelized Job is a set of tasks which are independant (i.e can be executed in random order without + * modifiying the result) and take a data input and compute a data output to be sent to the Master. The data can be + * of any type, however they have to be serialized to be sent over a network. It is sufficient that they can be + * serialized through boost. + * + * @todo For serialization purposes, don't depend upon boost. It would be easy to use only eoserial and send strings + * via mpi. + * + * The main steps of the algorithm are the following: + * - For the master: + * - Have we done with the treatment we are doing ? + * - If this is the case, we can quit. + * - Otherwise, send an input data to some available worker. + * - If there's no available worker, wait for a worker to be free. + * - When receiving the response, handle it (eventually compute something on the output data, store it...). + * - Go back to the first step. + * - For the worker, it is even easier: + * - Wait for an order. + * - If there's nothing to do, just quit. + * - Otherwise, eventually retrieve data and do the work. + * - Go back to the first step. + * + * There is of course some network adjustements to do and precisions to give there, but the main ideas are present. As the + * job is fully centralized, this is the master who tells the workers when to quit and when to work. + * + * The idea behind these MPI helpers is to be the most generic possible. If we look back at the steps of the + * algorithm, we found that the steps can be splitted into 2 parts: the first consists in the steps of any + * parallelization algorithm and the other consists in the specific parts of the algorithm. Ideally, the user should + * just have to implement the specific parts of the algorithm. We identified these parts to be: + * - For the master: + * - What does mean to have terminated ? There are only two alternatives, in our binary world: either it is + * terminated, or it is not. Hence we only need a function returning a boolean to know if we're done with the + * computation : we'll call it IsFinished. + * - What do we have to do when we send a task ? We don't have any a priori on the form of the sent data, or + * the number of sent data. Moreover, as the tasks are all independant, we don't care of who will do the + * computation, as soon as it's done. Knowing the rank of the worker will be sufficient to send him data. We + * have identified another function, taking a single argument which is the rank of the worker: we'll call it + * SendTask. + * - What do we have to do when we receive a response from a worker ? One more time, we don't know which form + * or structure can have the receive data, only the user can know. Also we let the user the charge to retrieve + * the data ; he just has to know from who the master will retrieve the data. Here is another function, taking + * a rank (the sender's one) as a function argument : this will be HandleResponse. + * - For the worker: + * - What is the processing ? It can have any nature. We just need to be sure that a data is sent back to the + * master, but it seems difficult to check that: it will be the role of the user to assert that data is sent by + * the worker at the end of an execution. We've got identified our last function: ProcessTask. + * + * In term of implementation, it would be annoying to have only abstract classes with these 4 methods to implement. It + * would mean that if you want to alter just one of these 4 functions, you have to implement a new sub class, with a + * new constructor which could have the same signature. Besides, this fashion doesn't allow you to add dynamic + * functionalities, using the design pattern Decorator for instance, without implement a class for each type of + * decoration you want to add. For these reasons, we decided to transform function into functors ; the user can then + * wrap the existing, basic comportments into more sophisticated computations, whenever he wants, and without the + * notion of order. We retrieve here the power of extension given by the design pattern Decorator. + * + * Our 4 functors could have a big amount of data in common (see eoParallelApply to have an idea). + * So as to make it easy for the user to implement these 4 functors, we consider that these functors + * have to share a common data structure. This data structure is referenced (as a pointer) in the 4 functors, so the + * user doesn't need to pass a lot of parameters to each functor constructor. + * + * There are two kinds of jobs: + * - The job which are launched a fixed and well known amount of times, i.e both master and workers know how many + * times they will be launched. They are "one shot jobs". + * - The job which are launched an unknown amount of times, for instance embedded in a while loop for which we don't + * know the amount of repetitions (typically, eoEasyEA loop is a good example, as we don't know the continuator + * condition). They are called "multi job". + * As the master tells the workers to quit, we have to differentiate these two kinds of jobs. When the job is of the + * kind "multi job", the workers would have to perform a while(true) loop so as to receive the orders ; but even if + * the master tells them to quit, they would begin another job and wait for another order, while the master would + * have quit: this would cause a deadlock and workers processes would be blocked, waiting for an order. + */ namespace mpi { + /** + * @brief A timer which allows user to generate statistics about computation times. + */ extern eoTimerStat timerStat; + /** + * @brief Tags used in MPI messages for framework communication + * + * These tags are used for framework communication and fits "channels", so as to differentiate when we're + * sending an order to a worker (Commands) or data (Messages). They are not reserved by the framework and can be + * used by the user, but he is not bound to. + */ namespace Channel { const int Commands = 0; const int Messages = 1; } + /** + * @brief Simple orders used by the framework. + * + * These orders are sent by the master to the workers, to indicate to them if they should receive another task + * to do (Continue), if an one shot job is done (Finish) or if a multi job is done (Kill). + */ namespace Message { const int Continue = 0; - const int Finish = 1; // TODO commentaire : différence entre finir une tâche et arrêter le worker à expliciter. + const int Finish = 1; const int Kill = 2; } + /** + * @brief If the job only has one master, the user can use this constant, so as not to worry with integer ids. + */ const int DEFAULT_MASTER = 0; + /** + * @brief Base class for the 4 algorithm functors. + * + * This class can embed a data (JobData) and a wrapper, so as to make all the 4 functors wrappable. + * We can add a wrapper at initialization or at any time when executing the program. + * + * According to RAII, the boolean needDelete helps to know if we have to use the operator delete on the wrapper + * or not. Hence, if any functor is wrapped, user has just to put this boolean to true, to indicate to wrapper + * that it should call delete. This allows to mix wrapper initialized in the heap (with new) or in the stack. + * + * @param JobData a Data type, which can have any form. It can a struct, a single int, anything. + * + * @param Wrapped the type of the functor, which will be stored as a pointer under the name _wrapped. + * This allows to wrap directly the functor in functors of the same type + * here, instead of dealing with SharedDataFunction* that we would have to cast all the time. + * Doing also allows to handle the wrapped functor as the functor we're writing, when coding the wrappers, + * instead of doing some static_cast. For instance, if there are 2 functors subclasses, fA and fB, fA + * implementing doFa() and fB implementing doFb(), we could have the following code: + * @code + * struct fA_wrapper + * { + * // some code + * void doFa() + * { + * _wrapped->doFa(); + * std::cout << "I'm a fA wrapper!" << std::endl; + * // if we didn't have the second template parameter, but a SharedDataFunction, we would have to do this: + * static_cast(_wrapped)->doFa(); + * // do other things (it's a wrapper) + * } + * }; + * + * struct fB_wrapper + * { + * // some code + * void doFb() + * { + * _wrapped->doFb(); // and not: static_cast(_wrapped)->doFb(); + * } + * }; + * @endcode + * This makes the code easier to write for the user. + */ template< typename JobData, typename Wrapped > struct SharedDataFunction { + /** + * @brief Default constructor. + * + * The user is not bound to give a wrapped functor. + */ SharedDataFunction( Wrapped * w = 0 ) : _wrapped( w ), _needDelete( false ) { // empty } + /** + * @brief Destructor. + * + * Calls delete on the wrapped function, only if necessary. + */ virtual ~SharedDataFunction() { if( _wrapped && _wrapped->needDelete() ) @@ -53,11 +230,21 @@ namespace eo } } + /** + * @brief Setter for the wrapped function. + * + * It doesn't do anything on the current wrapped function, like deleting it. + */ void wrapped( Wrapped * w ) { _wrapped = w; } + /** + * @brief Setter for the data present in the functor. + * + * Calls the setter on the functor and on the wrapped functors, in a Composite pattern fashion. + */ void data( JobData* _d ) { d = _d; @@ -67,15 +254,29 @@ namespace eo } } + /** + * @brief Returns true if we need to use operator delete on this wrapper, false otherwise. + **/ bool needDelete() { return _needDelete; } void needDelete( bool b ) { _needDelete = b; } protected: JobData* d; - Wrapped* _wrapped; + Wrapped* _wrapped; // Pointer and not a reference so as to be set at any time and to avoid affectation bool _needDelete; }; + /** + * @brief Functor (master side) used to send a task to the worker. + * + * The user doesn't have to know which worker will receive a task, so we just indicate to master the rank of the + * worker. The data used for computation have to be explicitly sent by the master to the worker, with indicated + * rank. Once this functor has been called, the worker is considered busy until it sends a return message to the + * master. + * + * This is a functor implementing void operator()(int), and also a shared data function, containing wrapper on its + * own type. + */ template< typename JobData > struct SendTaskFunction : public eoUF, public SharedDataFunction< JobData, SendTaskFunction > { @@ -86,9 +287,19 @@ namespace eo // empty } - virtual ~SendTaskFunction() {} + virtual ~SendTaskFunction() {} // for inherited classes }; + /** + * @brief Functor (master side) used to indicate what to do when receiving a response. + * + * The master calls this function as soon as it receives some data, in some channel. Thanks to MPI, we retrieve + * the rank of the data's sender. This functor is then called with this rank. There is no memoization of a link + * between sent data and rank, so the user has to implement it, if he needs it. + * + * This is a functor implementing void operator()(int), and also a shared data function, containing wrapper on + * its own type. + */ template< typename JobData > struct HandleResponseFunction : public eoUF, public SharedDataFunction< JobData, HandleResponseFunction > { @@ -99,9 +310,21 @@ namespace eo // empty } - virtual ~HandleResponseFunction() {} + virtual ~HandleResponseFunction() {} // for inherited classes }; + /** + * @brief Functor (worker side) implementing the processing to do. + * + * This is where the real computation happen. + * Whenever the master sends the command "Continue" to workers, which indicates the worker will receive a task, + * the worker calls this functor. The user has to explicitly retrieve the data, handle it and transmit it, + * processed, back to the master. If the worker does not send any data back to the master, the latter will + * consider the worker isn't done and a deadlock could occur. + * + * This is a functor implementing void operator()(), and also a shared data function, containing wrapper on its + * own type. + */ template< typename JobData > struct ProcessTaskFunction : public eoF, public SharedDataFunction< JobData, ProcessTaskFunction > { @@ -112,9 +335,19 @@ namespace eo // empty } - virtual ~ProcessTaskFunction() {} + virtual ~ProcessTaskFunction() {} // for inherited classes }; + /** + * @brief Functor (master side) indicating whether the job is done or not. + * + * The master loops on this functor to know when to stop. When this functor returns true, the master will wait + * for the last responses and properly stops the job. Whenever this functor returns false, the master will send + * tasks, until this functor returns true. + * + * This is a functor implementing bool operator()(), and also a shared function, containing wrapper on its own + * type. + */ template< typename JobData > struct IsFinishedFunction : public eoF, public SharedDataFunction< JobData, IsFinishedFunction > { @@ -125,12 +358,31 @@ namespace eo // empty } - virtual ~IsFinishedFunction() {} + virtual ~IsFinishedFunction() {} // for inherited classes }; + /** + * @brief Contains all the required data and the functors to launch a job. + * + * Splitting the functors and data from the job in itself allows to use the same functors and data for multiples + * instances of the same job. You define your store once and can use it a lot of times during your program. If + * the store was included in the job, you'd have to give again all the functors and all the datas to each + * invokation of the job. + * + * Job store contains the 4 functors (pointers, so as to be able to wrap them ; references couldn't have + * permitted that) described above and the JobData used by all these functors. It contains + * also helpers to easily wrap the functors, getters and setters on all of them. + * + * The user has to implement data(), which is the getter for retrieving JobData. We don't have any idea of who + * owns the data, moreover it is impossible to initialize it in this generic JobStore, as we don't know its + * form. As a matter of fact, the user has to define this in the JobStore subclasses. + */ template< typename JobData > struct JobStore { + /** + * @brief Default ctor with the 4 functors. + */ JobStore( SendTaskFunction* stf, HandleResponseFunction* hrf, @@ -142,11 +394,22 @@ namespace eo // empty } + /** + * @brief Empty ctor, useful for not forcing users to call the other constructor. + * + * When using this constructor, the user have to care about the 4 functors pointers, otherwise null pointer + * segfaults have to be expected. + */ JobStore() { // empty } + /** + * @brief Default destructor. + * + * JobStore is the highest layer which calls needDelete on its functors. + */ ~JobStore() { if( _stf->needDelete() ) delete _stf; @@ -155,16 +418,21 @@ namespace eo if( _iff->needDelete() ) delete _iff; } + // Getters SendTaskFunction & sendTask() { return *_stf; } HandleResponseFunction & handleResponse() { return *_hrf; } ProcessTaskFunction & processTask() { return *_ptf; } IsFinishedFunction & isFinished() { return *_iff; } + // Setters void sendTask( SendTaskFunction* stf ) { _stf = stf; } void handleResponse( HandleResponseFunction* hrf ) { _hrf = hrf; } void processTask( ProcessTaskFunction* ptf ) { _ptf = ptf; } void isFinished( IsFinishedFunction* iff ) { _iff = iff; } + /** + * @brief Helpers for wrapping send task functor. + */ void wrapSendTask( SendTaskFunction* stf ) { if( stf ) @@ -174,6 +442,9 @@ namespace eo } } + /** + * @brief Helpers for wrapping handle response functor. + */ void wrapHandleResponse( HandleResponseFunction* hrf ) { if( hrf ) @@ -183,6 +454,9 @@ namespace eo } } + /** + * @brief Helpers for wrapping process task functor. + */ void wrapProcessTask( ProcessTaskFunction* ptf ) { if( ptf ) @@ -192,6 +466,9 @@ namespace eo } } + /** + * @brief Helpers for wrapping is finished functor. + */ void wrapIsFinished( IsFinishedFunction* iff ) { if( iff ) @@ -201,22 +478,48 @@ namespace eo } } - // TODO commenter : laissé à la couche d'en dessous car impossible d'initialiser une donnée membre d'une classe mère depuis une classe fille. virtual JobData* data() = 0; protected: - // TODO commenter : Utiliser des pointeurs pour éviter d'écraser les fonctions wrappées SendTaskFunction< JobData >* _stf; HandleResponseFunction< JobData >* _hrf; ProcessTaskFunction< JobData >* _ptf; IsFinishedFunction< JobData >* _iff; }; + /** + * @brief Class implementing the centralized job algorithm. + * + * This class handles all the job algorithm. With its store and its assignment (scheduling) algorithm, it + * executes the general algorithm described above, adding some networking, so as to make the global process + * work. It initializes all the functors with the data, then launches the main loop, indicating to workers when + * they will have to work and when they will finish, by sending them a termination message (integer that can be + * customized). As the algorithm is centralized, it is also mandatory to indicate what is the MPI rank of the + * master process, hence the workers will know from who they should receive their commands. + * + * Any of the 3 master functors can launch exception, it will be catched and rethrown as a std::runtime_exception + * to the higher layers. + */ template< class JobData > class Job { public: + /** + * @brief Main constructor for Job. + * + * @param _algo The used assignment (scheduling) algorithm. It has to be initialized, with its maximum + * possible number of workers (some workers referenced in this algorithm shouldn't be busy). See + * AssignmentAlgorithm for more details. + * + * @param _masterRank The MPI rank of the master. + * @param _workerStopCondition Number of the message which will cause the workers to terminate. It could + * be one of the constants defined in eo::mpi::Commands, or any other integer. The user has to be sure + * that a message containing this integer will be sent to each worker on the Commands channel, otherwise + * deadlock will happen. Master sends Finish messages at the end of a simple job, but as a job can + * happen multiples times (multi job), workers don't have to really finish on these messages but on + * another message. This is here where you can configurate it. See also OneShotJob and MultiJob. + */ Job( AssignmentAlgorithm& _algo, int _masterRank, int _workerStopCondition, @@ -242,6 +545,16 @@ namespace eo protected: + /** + * @brief Finally block of the main algorithm + * + * Herb Sutter's trick for having a finally block, in a try/catch section: invoke a class at the + * beginning of the try, its destructor will be called in every cases. + * + * This implements the end of the master algorithm: + * - sends to all available workers that they are free, + * - waits for last responses, handles them and sends termination messages to last workers. + */ struct FinallyBlock { FinallyBlock( @@ -301,6 +614,15 @@ namespace eo bmpi::communicator & comm; }; + /** + * @brief Master part of the job. + * + * Launches the parallelized job algorithm : while there is something to do (! IsFinished ), get a + * worker who will be the assignee ; if no worker is available, wait for a response, handle it and reask + * for an assignee. Then send the command and the task. + * Once there is no more to do (IsFinished), indicate to all available workers that they're free, wait + * for all the responses and send termination messages (see also FinallyBlock). + */ void master( ) { int totalWorkers = assignmentAlgo.availableWorkers(); @@ -346,6 +668,12 @@ namespace eo } } + /** + * @brief Worker part of the algorithm. + * + * The algorithm is more much simpler: wait for an order; if it's termination message, leave. Otherwise, + * prepare to work. + */ void worker( ) { int order; @@ -383,11 +711,18 @@ namespace eo public: + /** + * @brief Launches the job algorithm, according to the role of the host (roles are deduced from the + * master rank indicated in the constructor). + */ void run( ) { ( _isMaster ) ? master( ) : worker( ); } + /** + * @brief Returns true if the current host is the master, false otherwise. + */ bool isMaster( ) { return _isMaster; @@ -397,7 +732,7 @@ namespace eo AssignmentAlgorithm& assignmentAlgo; int masterRank; - const int workerStopCondition; // TODO commentaire: message signifiant l'arrêt pour un worker + const int workerStopCondition; bmpi::communicator& comm; SendTaskFunction & sendTask; @@ -406,13 +741,20 @@ namespace eo IsFinishedFunction & isFinished; bool _isMaster; - }; + /** + * @brief Job that will be launched only once. + * + * As explained in eo::mpi documentation, jobs can happen either a well known amount of times or an unknown + * amount of times. This class implements the general case when the job is launched a well known amount of + * times. The job will be terminated on both sides (master and worker) once the master would have said it. + * + * It uses the message Message::Finish as the termination message. + */ template< class JobData > class OneShotJob : public Job< JobData > { - // TODO commentaire : ce job s'arrête sur message terminate public: OneShotJob( AssignmentAlgorithm& algo, int masterRank, @@ -423,11 +765,22 @@ namespace eo } }; + /** + * @brief Job that will be launched an unknown amount of times, in worker side. + * + * As explained in eo::mpi documentation, jobs can happen either a well known amount of times or an unknown + * amount of times. This class implements the general case when the job is launched an unknown amount of times, for + * instance in a while loop. The master will run many jobs (or the same job many times), but the workers will + * launch it only once. + * + * It uses the message Message::Kill as the termination message. This message can be launched with an EmptyJob, + * launched only by the master. If no Message::Kill is sent on the Channels::Commands, the worker will wait + * forever, which will cause a deadlock. + */ template< class JobData > class MultiJob : public Job< JobData > { public: - // TODO commentaire : ce job s'arrête sur message kill MultiJob ( AssignmentAlgorithm& algo, int masterRank, JobStore & store ) @@ -438,6 +791,5 @@ namespace eo }; } } - # endif // __EO_MPI_H__ From 5e76ba30b076b805bcde5f03c941478d2e5b88a1 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 13 Jul 2012 14:31:29 +0200 Subject: [PATCH 13/19] mDocumentation of eo::mpi classes. --- eo/src/mpi/eoMpi.cpp | 1 - eo/src/mpi/eoMpi.h | 5 + eo/src/mpi/eoMpiAssignmentAlgorithm.h | 172 +++++++++++++++++++++++++- eo/src/mpi/eoMpiNode.h | 40 ++++++ eo/src/mpi/eoParallelApply.h | 149 ++++++++++++++++++++-- eo/src/mpi/eoTerminateJob.h | 56 ++++++++- 6 files changed, 409 insertions(+), 14 deletions(-) diff --git a/eo/src/mpi/eoMpi.cpp b/eo/src/mpi/eoMpi.cpp index 9ffc84bd..ab754328 100644 --- a/eo/src/mpi/eoMpi.cpp +++ b/eo/src/mpi/eoMpi.cpp @@ -1,6 +1,5 @@ # include "eoMpi.h" -// MpiNode* MpiNodeStore::singleton; namespace eo { namespace mpi diff --git a/eo/src/mpi/eoMpi.h b/eo/src/mpi/eoMpi.h index d4e0349e..23039904 100644 --- a/eo/src/mpi/eoMpi.h +++ b/eo/src/mpi/eoMpi.h @@ -37,6 +37,7 @@ namespace eo /** * @ingroup Parallel * @defgroup MPI Message Passing Interface parallelization + * @{ */ /** @@ -790,6 +791,10 @@ namespace eo } }; } + + /** + * @} + */ } # endif // __EO_MPI_H__ diff --git a/eo/src/mpi/eoMpiAssignmentAlgorithm.h b/eo/src/mpi/eoMpiAssignmentAlgorithm.h index 85177ea0..d42ec723 100644 --- a/eo/src/mpi/eoMpiAssignmentAlgorithm.h +++ b/eo/src/mpi/eoMpiAssignmentAlgorithm.h @@ -1,28 +1,118 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __MPI_ASSIGNMENT_ALGORITHM_H__ # define __MPI_ASSIGNMENT_ALGORITHM_H__ -# include +# include // std::vector # include "eoMpiNode.h" namespace eo { namespace mpi { + /** + * @brief Constant indicating to use all the resting available workers, in assignment algorithms constructor + * using an interval. + */ const int REST_OF_THE_WORLD = -1; + /** + * @brief Contains informations on the available workers and allows to find assignees for jobs. + * + * Available workers are workers who aren't processing anything. When they've received an order, workers switch + * from the state "available" to the state "busy", and the master has to wait for their response for considering + * them available again. + * + * @ingroup Parallel + */ struct AssignmentAlgorithm { + /** + * @brief Gets the rank of an available worker, so as to send it a task. + * + * @return The MPI rank of an available worker, or -1 if there is no available worker. + */ virtual int get( ) = 0; + + /** + * @brief Gets the number of total available workers. + * + * Before the first call, it is equal to the total number of present workers, as specified in the + * specific assignment algorithm constructor. It allows the Job class to know when all the responses have + * been received, by comparing this number to the total number of workers. + * + * @return Integer indicating how many workers are available. + */ virtual int availableWorkers( ) = 0; + + /** + * @brief Reinject the worker of indicated rank in the available state. + * + * @param wrkRank The MPI rank of the worker who has finished its job. + */ virtual void confirm( int wrkRank ) = 0; + + /** + * @brief Indicates who are the workers which do nothing. + * + * At the end of the algorithm, the master has to warn all the workers that it's done. All the workers mean, + * the workers which are currently processing data, and the other ones who could be waiting : the idles. + * This function indicates to the master which worker aren't doing anything. + * + * @param A std::vector containing all the MPI ranks of the idles workers. + */ virtual std::vector idles( ) = 0; + + /** + * @brief Reinitializes the assignment algorithm with the right number of runs. + * + * In fact, this is only useful for static assignment algorithm, which has to be reinitialized every time + * it's used, in the case of a Multi Job. It's the user's responsability to call this function. + * + * @todo Not really clean. Find a better way to do it. + */ virtual void reinit( int runs ) = 0; }; + /** + * @brief Assignment (scheduling) algorithm which handles workers in a queue. + * + * With this assignment algorithm, workers are put in a queue and may be called an unlimited number of times. + * Whenever a worker returns, it is added to the queue, and it becomes available for the next call to get(). + * The available workers are all located in the queue at any time, so the number of available workers is + * directly equal to the size of the queue. + * + * This kind of assignment is adapted for tasks whose execution time is stochastic or unknown, but without any + * warranty to be faster than other assignments. + * + * @ingroup Parallel + */ struct DynamicAssignmentAlgorithm : public AssignmentAlgorithm { public: + /** + * @brief Uses all the hosts whose rank is higher to 1, inclusive, as workers. + */ DynamicAssignmentAlgorithm( ) { for(int i = 1; i < Node::comm().size(); ++i) @@ -31,16 +121,33 @@ namespace eo } } + /** + * @brief Uses the unique host with given rank as a worker. + * + * @param unique MPI rank of the unique worker. + */ DynamicAssignmentAlgorithm( int unique ) { availableWrk.push_back( unique ); } + /** + * @brief Uses the workers whose ranks are present in the argument as workers. + * + * @param workers std::vector containing MPI ranks of workers. + */ DynamicAssignmentAlgorithm( const std::vector & workers ) { availableWrk = workers; } + /** + * @brief Uses a range of ranks as workers. + * + * @param first The first worker to be included (inclusive) + * @param last The last worker to be included (inclusive). If last == eo::mpi::REST_OF_THE_WORLD, all + * hosts whose rank is higher than first are taken. + */ DynamicAssignmentAlgorithm( int first, int last ) { if( last == REST_OF_THE_WORLD ) @@ -90,14 +197,43 @@ namespace eo std::vector< int > availableWrk; }; + /** + * @brief Assignment algorithm which gives to each worker a precise number of tasks to do, in a round robin + * fashion. + * + * This scheduling algorithm attributes, at initialization or when calling reinit(), a fixed amount of runs to + * distribute to the workers. The amount of runs is then equally distributed between all workers ; if total + * number of runs is not a direct multiple of workers number, then remainding unaffected runs are distributed to + * workers from the first to the last, in a round-robin fashion. + * + * This scheduling should be used when the amount of runs can be computed or is fixed and when we guess that the + * duration of processing task will be the same for each run. There is no warranty that this algorithm is more + * or less efficient that another one. When having a doubt, use DynamicAssignmentAlgorithm. + * + * @ingroup Parallel + */ struct StaticAssignmentAlgorithm : public AssignmentAlgorithm { public: + /** + * @brief Uses a given precise set of workers. + * + * @param workers std::vector of MPI ranks of workers which will be used. + * @param runs Fixed amount of runs, strictly positive. + */ StaticAssignmentAlgorithm( std::vector& workers, int runs ) { init( workers, runs ); } + /** + * @brief Uses a range of workers. + * + * @param first The first MPI rank of worker to use + * @param last The last MPI rank of worker to use. If it's equal to REST_OF_THE_WORLD, then all the + * workers from the first one are taken as workers. + * @param runs Fixed amount of runs, strictly positive. + */ StaticAssignmentAlgorithm( int first, int last, int runs ) { std::vector workers; @@ -114,6 +250,12 @@ namespace eo init( workers, runs ); } + /** + * @brief Uses all the hosts whose rank is higher than 1 (inclusive) as workers. + * + * @param runs Fixed amount of runs, strictly positive. If it's not set, you'll have to call reinit() + * later. + */ StaticAssignmentAlgorithm( int runs = 0 ) { std::vector workers; @@ -125,6 +267,12 @@ namespace eo init( workers, runs ); } + /** + * @brief Uses an unique host as worker. + * + * @param unique The MPI rank of the host which will be the worker. + * @param runs Fixed amount of runs, strictly positive. + */ StaticAssignmentAlgorithm( int unique, int runs ) { std::vector workers; @@ -133,15 +281,31 @@ namespace eo } private: + /** + * @brief Initializes the static scheduling. + * + * Gives to each worker an equal attribution number, equal to runs / workers.size(), eventually plus one + * if number of workers is not a divisor of runs. + * + * @param workers Vector of hosts' ranks + * @param runs Fixed amount of runs, strictly positive. + */ void init( std::vector & workers, int runs ) { unsigned int nbWorkers = workers.size(); freeWorkers = nbWorkers; - attributions.clear(); - attributions.reserve( nbWorkers ); busy.clear(); busy.resize( nbWorkers, false ); + realRank = workers; + + if( runs <= 0 ) + { + return; + } + + attributions.clear(); + attributions.reserve( nbWorkers ); // Let be the euclidean division of runs by nbWorkers : // runs == q * nbWorkers + r, 0 <= r < nbWorkers @@ -151,8 +315,6 @@ namespace eo // r requests to workers, in ascending order unsigned int diff = runs - (runs / nbWorkers) * nbWorkers; for (unsigned int i = 0; i < diff; ++attributions[i++]); - - realRank = workers; } public: diff --git a/eo/src/mpi/eoMpiNode.h b/eo/src/mpi/eoMpiNode.h index 9f1ea7b5..e836637f 100644 --- a/eo/src/mpi/eoMpiNode.h +++ b/eo/src/mpi/eoMpiNode.h @@ -1,3 +1,24 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __MPI_NODE_H__ # define __MPI_NODE_H__ @@ -8,15 +29,34 @@ namespace eo { namespace mpi { + /** + * @brief Global object used to reach boost::mpi::communicator everywhere. + * + * boost::mpi::communicator is the main object used to send and receive messages between the different hosts of + * a MPI algorithm. + * + * @ingroup Parallel + */ class Node { public: + /** + * @brief Initializes the MPI environment with argc and argv. + * + * Should be called at the beginning of every parallel program. + * + * @param argc Main's argc + * @param argv Main's argv + */ static void init( int argc, char** argv ) { static bmpi::environment env( argc, argv ); } + /** + * @brief Returns the global boost::mpi::communicator + */ static bmpi::communicator& comm() { return _comm; diff --git a/eo/src/mpi/eoParallelApply.h b/eo/src/mpi/eoParallelApply.h index d0435cea..6f5120f8 100644 --- a/eo/src/mpi/eoParallelApply.h +++ b/eo/src/mpi/eoParallelApply.h @@ -1,28 +1,100 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EO_PARALLEL_APPLY_H__ # define __EO_PARALLEL_APPLY_H__ # include "eoMpi.h" -# include -# include +# include // eoUF +# include // std::vector population + +/** + * @file eoParallelApply.h + * + * @brief Applies a functor with single parameter to elements of a table, in a parallel fashion. + * + * This file contains all the required classes to do a parallel apply of a table, in a parallel fashion. This can be + * very useful when applying the function can be made without any dependances within the data. In EO, it occurs in + * particular during the evaluation: the number of individuals to evaluate can be really high, and the evaluation of one + * individual is independant from the evaluation of other individuals. + * + * Elements in the table are directly replaced, as the table is given by reference. No new table is made during the + * process. + * + * User can tune this job, indicating how many elements of the table should be sent and evaluated by a worker, at a + * time: this is called the "packet size", as individuals are groupped into a packet of individuals which are sent to + * the worker before evaluation. The problem of choosing the optimal packet size is beyond the purposes of this documentation + * and deserves a theoritical study. + * + * This job is the parallel equivalent to the function apply, defined in apply.h. It just applies the function to + * every element of a table. In Python or Javascript, it's the equivalent of the function Map. + */ namespace eo { namespace mpi { + /** + * @brief Structure used to save assignment to a worker, i.e which slice of the table it has to process. + * + * This slice is defined by the index of the first evaluated argument and the number of processed elements. + */ struct ParallelApplyAssignment { int index; int size; }; + /** + * @brief Data useful for a parallel apply (map). + * + * A parallel apply needs at least the functor to apply to every element of the table, and the table itself, + * whereas it can be set later with the function init(). Master rank is also needed, to send it informations and + * receive informations from it, inside the functors (the job knows these values, but the functors don't). The + * size of a packet can be tuned here. + * + * Internal attributes contain: + * - (useful for master) the index of the next element to be evaluated. + * - (useful for master) a map containing links between MPI ranks and slices of the table which the worker with + * this rank has evaluated. Without this map, when receiving results from a worker, the master couldn't be + * able to replace the right elements in the table. + * + * @ingroup Parallel + */ template struct ParallelApplyData { + /** + * @brief Ctor for Parallel apply (map) data. + * + * @param _proc The functor to apply on each element in the table + * @param _masterRank The MPI rank of the master + * @param _packetSize The number of elements on which the function will be applied by the worker, at a time. + * @param _pop The table to apply. If this value is NULL, user will have to call init() before launching the + * job. + */ ParallelApplyData( eoUF & _proc, int _masterRank, - // long _maxTime = 0, int _packetSize, std::vector * _pop = 0 ) : @@ -39,6 +111,9 @@ namespace eo } } + /** + * @brief Reinitializes the data for a new table to evaluate. + */ void init( std::vector& _pop ) { index = 0; @@ -52,11 +127,12 @@ namespace eo return *_data; } + // All elements are public since functors will often use them. std::vector * _data; eoUF & func; int index; int size; - std::map< int /* worker rank */, ParallelApplyAssignment /* min indexes in vector */> assignedTasks; + std::map< int /* worker rank */, ParallelApplyAssignment /* last assignment */> assignedTasks; int packetSize; std::vector tempArray; @@ -64,6 +140,15 @@ namespace eo bmpi::communicator& comm; }; + /** + * @brief Send task functor implementation for the parallel apply (map) job. + * + * Master side: Sends a slice of the table to evaluate to the worker. + * + * Implementation details: + * Finds the next slice of data to send to the worker, sends first the size and then the data, and memorizes + * that this slice has been distributed to the worker, then updates the next position of element to evaluate. + */ template< class EOT > class SendTaskParallelApply : public SendTaskFunction< ParallelApplyData > { @@ -100,6 +185,11 @@ namespace eo } }; + /** + * @brief Handle response functor implementation for the parallel apply (map) job. + * + * Master side: Replaces the slice of data attributed to the worker in the table. + */ template< class EOT > class HandleResponseParallelApply : public HandleResponseFunction< ParallelApplyData > { @@ -117,6 +207,14 @@ namespace eo } }; + /** + * @brief Process task functor implementation for the parallel apply (map) job. + * + * Worker side: apply the function to the given slice of data. + * + * Implementation details: retrieves the number of elements to evaluate, retrieves them, applies the function + * and then returns the results. + */ template< class EOT > class ProcessTaskParallelApply : public ProcessTaskFunction< ParallelApplyData > { @@ -145,6 +243,12 @@ namespace eo } }; + /** + * @brief Is finished functor implementation for the parallel apply (map) job. + * + * Master side: returns true if and only if the whole table has been evaluated. The job is also terminated only + * when the whole table has been evaluated. + */ template< class EOT > class IsFinishedParallelApply : public IsFinishedFunction< ParallelApplyData > { @@ -162,6 +266,14 @@ namespace eo } }; + /** + * @brief Store containing all the datas and the functors for the parallel apply (map) job. + * + * User can tune functors when constructing the object. For each functor which is not given, a default one is + * generated. + * + * @ingroup Parallel + */ template< class EOT > struct ParallelApplyStore : public JobStore< ParallelApplyData > { @@ -170,6 +282,17 @@ namespace eo using JobStore< ParallelApplyData >::_ptf; using JobStore< ParallelApplyData >::_iff; + /** + * @brief Main constructor for the parallel apply (map) job. + * + * @param _proc The procedure to apply to each element of the table. + * @param _masterRank The rank of the master process. + * @param _packetSize The number of elements of the table to be evaluated at a time, by the worker. + * @param stpa Pointer to Send Task parallel apply functor descendant. If null, a default one is used. + * @param hrpa Pointer to Handle Response parallel apply functor descendant. If null, a default one is used. + * @param ptpa Pointer to Process Task parallel apply functor descendant. If null, a default one is used. + * @param ifpa Pointer to Is Finished parallel apply functor descendant. If null, a default one is used. + */ ParallelApplyStore( eoUF & _proc, int _masterRank, @@ -210,12 +333,17 @@ namespace eo ParallelApplyData* data() { return &_data; } + /** + * @brief Reinits the store with a new table to evaluate. + * + * @param _pop The table of elements to be evaluated. + */ void data( std::vector& _pop ) { _data.init( _pop ); } - virtual ~ParallelApplyStore() + virtual ~ParallelApplyStore() // for inheritance purposes only { } @@ -223,8 +351,15 @@ namespace eo ParallelApplyData _data; }; - // TODO commentaire : impossible de faire un typedef sur un template sans passer - // par un traits => complique la tâche de l'utilisateur pour rien. + /** + * @brief Parallel apply job. Present for convenience only. + * + * A typedef wouldn't have been working, as typedef on templates don't work in C++. Traits would be a + * disgraceful overload for the user. + * + * @ingroup Parallel + * @see eoParallelApply.h + */ template< typename EOT > class ParallelApply : public MultiJob< ParallelApplyData > { diff --git a/eo/src/mpi/eoTerminateJob.h b/eo/src/mpi/eoTerminateJob.h index ed45ca05..ccbd3521 100644 --- a/eo/src/mpi/eoTerminateJob.h +++ b/eo/src/mpi/eoTerminateJob.h @@ -1,3 +1,24 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ # ifndef __EO_TERMINATE_H__ # define __EO_TERMINATE_H__ @@ -7,6 +28,14 @@ namespace eo { namespace mpi { + /** + * @ingroup Parallel + * @{ + */ + + /** + * @brief Send task functor which does nothing. + */ struct DummySendTaskFunction : public SendTaskFunction { void operator()( int _ ) @@ -15,6 +44,9 @@ namespace eo } }; + /** + * @brief Handle response functor which does nothing. + */ struct DummyHandleResponseFunction : public HandleResponseFunction { void operator()( int _ ) @@ -23,6 +55,9 @@ namespace eo } }; + /** + * @brief Process task functor which does nothing. + */ struct DummyProcessTaskFunction : public ProcessTaskFunction { void operator()() @@ -31,6 +66,9 @@ namespace eo } }; + /** + * @brief Is finished functor which returns true everytime. + */ struct DummyIsFinishedFunction : public IsFinishedFunction { bool operator()() @@ -39,6 +77,9 @@ namespace eo } }; + /** + * @brief Job store containing all dummy functors and containing no data. + */ struct DummyJobStore : public JobStore { using JobStore::_stf; @@ -61,11 +102,20 @@ namespace eo void* data() { return 0; } }; + /** + * @brief Job to run after a Multi Job, so as to indicate that every workers should terminate. + */ struct EmptyJob : public OneShotJob { + /** + * @brief Main EmptyJob ctor + * + * @param algo Assignment (scheduling) algorithm used. + * @param masterRank The rank of the master process. + */ EmptyJob( AssignmentAlgorithm& algo, int masterRank ) : OneShotJob( algo, masterRank, *(new DummyJobStore) ) - // FIXME memory leak => will be corrected by using const correctness + // FIXME memory leak, meaningless but present { // empty } @@ -79,6 +129,10 @@ namespace eo } } }; + + /** + * @} + */ } } From f9a0084f959033ab896fe2ad19ca42a7adc7079a Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 13 Jul 2012 14:45:56 +0200 Subject: [PATCH 14/19] Taking in account Doxygen remarks, fixed some documentation errors. --- eo/src/mpi/eoMpi.h | 3 +++ eo/src/mpi/eoMpiAssignmentAlgorithm.h | 2 +- eo/src/serial/Object.h | 2 +- eo/src/serial/Parser.h | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/eo/src/mpi/eoMpi.h b/eo/src/mpi/eoMpi.h index 23039904..a040e362 100644 --- a/eo/src/mpi/eoMpi.h +++ b/eo/src/mpi/eoMpi.h @@ -514,12 +514,15 @@ namespace eo * AssignmentAlgorithm for more details. * * @param _masterRank The MPI rank of the master. + * * @param _workerStopCondition Number of the message which will cause the workers to terminate. It could * be one of the constants defined in eo::mpi::Commands, or any other integer. The user has to be sure * that a message containing this integer will be sent to each worker on the Commands channel, otherwise * deadlock will happen. Master sends Finish messages at the end of a simple job, but as a job can * happen multiples times (multi job), workers don't have to really finish on these messages but on * another message. This is here where you can configurate it. See also OneShotJob and MultiJob. + * + * @param store The JobStore containing functors and data for this job. */ Job( AssignmentAlgorithm& _algo, int _masterRank, diff --git a/eo/src/mpi/eoMpiAssignmentAlgorithm.h b/eo/src/mpi/eoMpiAssignmentAlgorithm.h index d42ec723..e052d8ab 100644 --- a/eo/src/mpi/eoMpiAssignmentAlgorithm.h +++ b/eo/src/mpi/eoMpiAssignmentAlgorithm.h @@ -78,7 +78,7 @@ namespace eo * the workers which are currently processing data, and the other ones who could be waiting : the idles. * This function indicates to the master which worker aren't doing anything. * - * @param A std::vector containing all the MPI ranks of the idles workers. + * @return A std::vector containing all the MPI ranks of the idles workers. */ virtual std::vector idles( ) = 0; diff --git a/eo/src/serial/Object.h b/eo/src/serial/Object.h index 20fbdbf7..9424f1c4 100644 --- a/eo/src/serial/Object.h +++ b/eo/src/serial/Object.h @@ -46,7 +46,7 @@ namespace eoserial /** * @brief Adds a pair into the JSON object. * @param key The key associated with the eoserial object - * @param eoserial The JSON object as created with framework. + * @param json The JSON object as created with framework. */ void add( const std::string& key, eoserial::Entity* json ) { diff --git a/eo/src/serial/Parser.h b/eo/src/serial/Parser.h index 5bcdc597..20f6a1bb 100644 --- a/eo/src/serial/Parser.h +++ b/eo/src/serial/Parser.h @@ -84,7 +84,7 @@ class Parser * * @param str The string we're parsing. * @param pos The index of the current position in the string. - * @param eoserial The current JSON object for which we're adding a key-value pair. + * @param json The current JSON object for which we're adding a key-value pair. */ static void parseLeft(const std::string & str, size_t & pos, eoserial::Object* json); From d54e88dd165feaef3858585b03740f4ba0249c73 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 13 Jul 2012 15:07:42 +0200 Subject: [PATCH 15/19] eoMultiParallelApply is no more used, replaced with eo::mpi::MultiJob instead. --- eo/src/mpi/eoMultiParallelApply.h | 57 ------------------------------- 1 file changed, 57 deletions(-) delete mode 100644 eo/src/mpi/eoMultiParallelApply.h diff --git a/eo/src/mpi/eoMultiParallelApply.h b/eo/src/mpi/eoMultiParallelApply.h deleted file mode 100644 index f4fe47b5..00000000 --- a/eo/src/mpi/eoMultiParallelApply.h +++ /dev/null @@ -1,57 +0,0 @@ -# ifndef __EO_MULTI_PARALLEL_APPLY_H__ -# define __EO_MULTI_PARALLEL_APPLY_H__ - -# include "eoParallelApply.h" - -namespace eo -{ - namespace mpi - { - template< class EOT > - class ProcessTaskParallelEval : public ProcessTaskParallelApply - { - public: - - using ProcessTaskParallelApply::_wrapped; - using ProcessTaskParallelApply::d; - - void operator()() - { - int order = Message::Continue; - while( order != Message::Finish ) - { - _wrapped->operator()(); - d->comm.recv( d->masterRank, Channel::Commands, order ); - } - } - - ~ProcessTaskParallelEval() - { - } - }; - - template< class EOT > - struct ParallelEvalStore : public ParallelApplyStore< EOT > - { - using ParallelApplyStore::wrapProcessTask; - - ParallelEvalStore( - eoUF & _proc, - int _masterRank, - int _packetSize = 1 - ) : - ParallelApplyStore< EOT >( _proc, *( new std::vector ), _masterRank, _packetSize ) - // FIXME memory leak because of vector ==> use const correctness - { - wrapProcessTask( new ProcessTaskParallelEval ); - } - - void data( std::vector& _pop ) - { - ParallelApplyStore::_data.init( _pop ); - } - }; - } -} -# endif // __EO_PARALLEL_APPLY_H__ - From ad74755d8e1e81676acf43a7f764eab0efce68b2 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 13 Jul 2012 15:50:51 +0200 Subject: [PATCH 16/19] Precised group MPI instead of group Parallel for Mpi classes in documentation. --- eo/src/mpi/eoMpi.h | 24 ++++++++++++++++++++++++ eo/src/mpi/eoMpiAssignmentAlgorithm.h | 8 +++++--- eo/src/mpi/eoMpiNode.h | 2 +- eo/src/mpi/eoParallelApply.h | 6 +++--- eo/src/mpi/eoTerminateJob.h | 2 +- eo/src/serial/Entity.h | 3 ++- 6 files changed, 36 insertions(+), 9 deletions(-) diff --git a/eo/src/mpi/eoMpi.h b/eo/src/mpi/eoMpi.h index a040e362..0293c43d 100644 --- a/eo/src/mpi/eoMpi.h +++ b/eo/src/mpi/eoMpi.h @@ -137,6 +137,8 @@ namespace eo * These tags are used for framework communication and fits "channels", so as to differentiate when we're * sending an order to a worker (Commands) or data (Messages). They are not reserved by the framework and can be * used by the user, but he is not bound to. + * + * @ingroup MPI */ namespace Channel { @@ -149,6 +151,8 @@ namespace eo * * These orders are sent by the master to the workers, to indicate to them if they should receive another task * to do (Continue), if an one shot job is done (Finish) or if a multi job is done (Kill). + * + * @ingroup MPI */ namespace Message { @@ -159,6 +163,8 @@ namespace eo /** * @brief If the job only has one master, the user can use this constant, so as not to worry with integer ids. + * + * @ingroup MPI */ const int DEFAULT_MASTER = 0; @@ -204,6 +210,8 @@ namespace eo * }; * @endcode * This makes the code easier to write for the user. + * + * @ingroup MPI */ template< typename JobData, typename Wrapped > struct SharedDataFunction @@ -277,6 +285,8 @@ namespace eo * * This is a functor implementing void operator()(int), and also a shared data function, containing wrapper on its * own type. + * + * @ingroup MPI */ template< typename JobData > struct SendTaskFunction : public eoUF, public SharedDataFunction< JobData, SendTaskFunction > @@ -300,6 +310,8 @@ namespace eo * * This is a functor implementing void operator()(int), and also a shared data function, containing wrapper on * its own type. + * + * @ingroup MPI */ template< typename JobData > struct HandleResponseFunction : public eoUF, public SharedDataFunction< JobData, HandleResponseFunction > @@ -325,6 +337,8 @@ namespace eo * * This is a functor implementing void operator()(), and also a shared data function, containing wrapper on its * own type. + * + * @ingroup MPI */ template< typename JobData > struct ProcessTaskFunction : public eoF, public SharedDataFunction< JobData, ProcessTaskFunction > @@ -348,6 +362,8 @@ namespace eo * * This is a functor implementing bool operator()(), and also a shared function, containing wrapper on its own * type. + * + * @ingroup MPI */ template< typename JobData > struct IsFinishedFunction : public eoF, public SharedDataFunction< JobData, IsFinishedFunction > @@ -377,6 +393,8 @@ namespace eo * The user has to implement data(), which is the getter for retrieving JobData. We don't have any idea of who * owns the data, moreover it is impossible to initialize it in this generic JobStore, as we don't know its * form. As a matter of fact, the user has to define this in the JobStore subclasses. + * + * @ingroup MPI */ template< typename JobData > struct JobStore @@ -501,6 +519,8 @@ namespace eo * * Any of the 3 master functors can launch exception, it will be catched and rethrown as a std::runtime_exception * to the higher layers. + * + * @ingroup MPI */ template< class JobData > class Job @@ -755,6 +775,8 @@ namespace eo * times. The job will be terminated on both sides (master and worker) once the master would have said it. * * It uses the message Message::Finish as the termination message. + * + * @ingroup MPI */ template< class JobData > class OneShotJob : public Job< JobData > @@ -780,6 +802,8 @@ namespace eo * It uses the message Message::Kill as the termination message. This message can be launched with an EmptyJob, * launched only by the master. If no Message::Kill is sent on the Channels::Commands, the worker will wait * forever, which will cause a deadlock. + * + * @ingroup MPI */ template< class JobData > class MultiJob : public Job< JobData > diff --git a/eo/src/mpi/eoMpiAssignmentAlgorithm.h b/eo/src/mpi/eoMpiAssignmentAlgorithm.h index e052d8ab..07f67415 100644 --- a/eo/src/mpi/eoMpiAssignmentAlgorithm.h +++ b/eo/src/mpi/eoMpiAssignmentAlgorithm.h @@ -32,6 +32,8 @@ namespace eo /** * @brief Constant indicating to use all the resting available workers, in assignment algorithms constructor * using an interval. + * + * @ingroup MPI */ const int REST_OF_THE_WORLD = -1; @@ -42,7 +44,7 @@ namespace eo * from the state "available" to the state "busy", and the master has to wait for their response for considering * them available again. * - * @ingroup Parallel + * @ingroup MPI */ struct AssignmentAlgorithm { @@ -104,7 +106,7 @@ namespace eo * This kind of assignment is adapted for tasks whose execution time is stochastic or unknown, but without any * warranty to be faster than other assignments. * - * @ingroup Parallel + * @ingroup MPI */ struct DynamicAssignmentAlgorithm : public AssignmentAlgorithm { @@ -210,7 +212,7 @@ namespace eo * duration of processing task will be the same for each run. There is no warranty that this algorithm is more * or less efficient that another one. When having a doubt, use DynamicAssignmentAlgorithm. * - * @ingroup Parallel + * @ingroup MPI */ struct StaticAssignmentAlgorithm : public AssignmentAlgorithm { diff --git a/eo/src/mpi/eoMpiNode.h b/eo/src/mpi/eoMpiNode.h index e836637f..27c03312 100644 --- a/eo/src/mpi/eoMpiNode.h +++ b/eo/src/mpi/eoMpiNode.h @@ -35,7 +35,7 @@ namespace eo * boost::mpi::communicator is the main object used to send and receive messages between the different hosts of * a MPI algorithm. * - * @ingroup Parallel + * @ingroup MPI */ class Node { diff --git a/eo/src/mpi/eoParallelApply.h b/eo/src/mpi/eoParallelApply.h index 6f5120f8..98f113f4 100644 --- a/eo/src/mpi/eoParallelApply.h +++ b/eo/src/mpi/eoParallelApply.h @@ -78,7 +78,7 @@ namespace eo * this rank has evaluated. Without this map, when receiving results from a worker, the master couldn't be * able to replace the right elements in the table. * - * @ingroup Parallel + * @ingroup MPI */ template struct ParallelApplyData @@ -272,7 +272,7 @@ namespace eo * User can tune functors when constructing the object. For each functor which is not given, a default one is * generated. * - * @ingroup Parallel + * @ingroup MPI */ template< class EOT > struct ParallelApplyStore : public JobStore< ParallelApplyData > @@ -357,7 +357,7 @@ namespace eo * A typedef wouldn't have been working, as typedef on templates don't work in C++. Traits would be a * disgraceful overload for the user. * - * @ingroup Parallel + * @ingroup MPI * @see eoParallelApply.h */ template< typename EOT > diff --git a/eo/src/mpi/eoTerminateJob.h b/eo/src/mpi/eoTerminateJob.h index ccbd3521..e0b4a5bd 100644 --- a/eo/src/mpi/eoTerminateJob.h +++ b/eo/src/mpi/eoTerminateJob.h @@ -29,7 +29,7 @@ namespace eo namespace mpi { /** - * @ingroup Parallel + * @ingroup MPI * @{ */ diff --git a/eo/src/serial/Entity.h b/eo/src/serial/Entity.h index 6fec3485..50155e13 100644 --- a/eo/src/serial/Entity.h +++ b/eo/src/serial/Entity.h @@ -24,6 +24,7 @@ Authors: # include // ostream + /** * @brief Contains all the necessary entities to serialize eo objects into JSON objects. * @@ -36,7 +37,7 @@ Authors: * * @ingroup Utilities * @defgroup Serialization Serialization helpers - */ +**/ namespace eoserial { From d2816d530a58f731aab6192db958df46a6d09d8a Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Fri, 13 Jul 2012 16:47:28 +0200 Subject: [PATCH 17/19] Included MPI tests into compilation, if WITH_MPI is defined. --- eo/CMakeLists.txt | 5 ++ eo/test/CMakeLists.txt | 15 ++++- eo/test/mpi/CMakeLists.txt | 55 +++++++++++++++++++ eo/test/mpi/{eval.cpp => t-mpi-eval.cpp} | 0 ...tipleRoles.cpp => t-mpi-multipleRoles.cpp} | 0 ...allelApply.cpp => t-mpi-parallelApply.cpp} | 0 .../mpi/{wrapper.cpp => t-mpi-wrapper.cpp} | 0 7 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 eo/test/mpi/CMakeLists.txt rename eo/test/mpi/{eval.cpp => t-mpi-eval.cpp} (100%) rename eo/test/mpi/{multipleRoles.cpp => t-mpi-multipleRoles.cpp} (100%) rename eo/test/mpi/{parallelApply.cpp => t-mpi-parallelApply.cpp} (100%) rename eo/test/mpi/{wrapper.cpp => t-mpi-wrapper.cpp} (100%) diff --git a/eo/CMakeLists.txt b/eo/CMakeLists.txt index 78cab0a5..df128f13 100644 --- a/eo/CMakeLists.txt +++ b/eo/CMakeLists.txt @@ -120,6 +120,11 @@ ENDIF (ENABLE_CMAKE_TESTING) ADD_SUBDIRECTORY(doc) ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(test) + +IF(WITH_MPI) + ADD_SUBDIRECTORY(test/mpi) +ENDIF() + ADD_SUBDIRECTORY(tutorial) ###################################################################################### diff --git a/eo/test/CMakeLists.txt b/eo/test/CMakeLists.txt index 4e68b02a..8bf8b500 100644 --- a/eo/test/CMakeLists.txt +++ b/eo/test/CMakeLists.txt @@ -14,12 +14,23 @@ INCLUDE_DIRECTORIES(${EO_SOURCE_DIR}/contrib) INCLUDE_DIRECTORIES(${EO_SOURCE_DIR}/contrib/MGE) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +IF(WITH_MPI) + INCLUDE_DIRECTORIES(${BOOST_DIR}/include) + INCLUDE_DIRECTORIES(${MPI_DIR}/include) +ENDIF() + ###################################################################################### ### 2) Specify where CMake can find the libraries ###################################################################################### LINK_DIRECTORIES(${EO_BINARY_DIR}/lib) +IF(WITH_MPI) + LINK_DIRECTORIES(${BOOST_DIR}/lib) + LINK_DIRECTORIES(${MPI_DIR}/lib) + SET(CMAKE_CXX_COMPILER "${MPI_DIR}/bin/mpicxx") +ENDIF() + ###################################################################################### ### 3) Define your targets and link the librairies ###################################################################################### @@ -65,8 +76,8 @@ SET (TEST_LIST t-eoExtendedVelocity t-eoLogger t-eoIQRStat - t-eoParallel - t-openmp + #t-eoParallel + #t-openmp #t-eoDualFitness t-eoParser ) diff --git a/eo/test/mpi/CMakeLists.txt b/eo/test/mpi/CMakeLists.txt new file mode 100644 index 00000000..37b1227f --- /dev/null +++ b/eo/test/mpi/CMakeLists.txt @@ -0,0 +1,55 @@ +############################################################################### +## +## CMakeLists file for eo/test/mpi +## +############################################################################### + +###################################################################################### +### 1) Include the sources +###################################################################################### + +MESSAGE("EO SOURCE DIR: ${EO_SOURCE_DIR}") +MESSAGE("OMPI: ${MPI_DIR}") +MESSAGE("BOOST: ${BOOST_DIR}") + +INCLUDE_DIRECTORIES(${MPI_DIR}/include) +INCLUDE_DIRECTORIES(${BOOST_DIR}/include) +INCLUDE_DIRECTORIES(${EO_SOURCE_DIR}/src) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +###################################################################################### +### 2) Specify where CMake can find the libraries +###################################################################################### + +LINK_DIRECTORIES(${EO_BINARY_DIR}/lib) +LINK_DIRECTORIES(${MPI_DIR}/lib) +LINK_DIRECTORIES(${BOOST_DIR}/lib) + +###################################################################################### +### 3) Define your targets and link the librairies +###################################################################################### + +SET (TEST_LIST + t-mpi-parallelApply + t-mpi-wrapper + t-mpi-multipleRoles + t-mpi-eval + ) + +FOREACH (test ${TEST_LIST}) + SET ("T_${test}_SOURCES" "${test}.cpp") +ENDFOREACH (test) + +SET(CMAKE_CXX_COMPILER "${MPI_DIR}/bin/mpicxx") +ADD_DEFINITIONS(-DWITH_MPI) + +IF(ENABLE_CMAKE_TESTING) + FOREACH (test ${TEST_LIST}) + ADD_EXECUTABLE(${test} ${T_${test}_SOURCES}) + ADD_TEST(${test} ${test}) + TARGET_LINK_LIBRARIES(${test} boost_mpi boost_serialization eoutils eompi eoserial eo) + INSTALL(TARGETS ${test} RUNTIME DESTINATION share/eo/test COMPONENT test) + ENDFOREACH (test) +ENDIF() + +###################################################################################### diff --git a/eo/test/mpi/eval.cpp b/eo/test/mpi/t-mpi-eval.cpp similarity index 100% rename from eo/test/mpi/eval.cpp rename to eo/test/mpi/t-mpi-eval.cpp diff --git a/eo/test/mpi/multipleRoles.cpp b/eo/test/mpi/t-mpi-multipleRoles.cpp similarity index 100% rename from eo/test/mpi/multipleRoles.cpp rename to eo/test/mpi/t-mpi-multipleRoles.cpp diff --git a/eo/test/mpi/parallelApply.cpp b/eo/test/mpi/t-mpi-parallelApply.cpp similarity index 100% rename from eo/test/mpi/parallelApply.cpp rename to eo/test/mpi/t-mpi-parallelApply.cpp diff --git a/eo/test/mpi/wrapper.cpp b/eo/test/mpi/t-mpi-wrapper.cpp similarity index 100% rename from eo/test/mpi/wrapper.cpp rename to eo/test/mpi/t-mpi-wrapper.cpp From b92f17fce55b7c602b796b7d796f0eccc4494648 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Mon, 16 Jul 2012 15:07:48 +0200 Subject: [PATCH 18/19] Documentation of MPI examples. --- eo/test/mpi/t-mpi-eval.cpp | 51 ++++++++++++++++--- eo/test/mpi/t-mpi-multipleRoles.cpp | 66 ++++++++++++++++++++++-- eo/test/mpi/t-mpi-parallelApply.cpp | 78 ++++++++++++++++++++++++++--- eo/test/mpi/t-mpi-wrapper.cpp | 45 ++++++++++++++++- 4 files changed, 219 insertions(+), 21 deletions(-) diff --git a/eo/test/mpi/t-mpi-eval.cpp b/eo/test/mpi/t-mpi-eval.cpp index 72fca755..e5678104 100644 --- a/eo/test/mpi/t-mpi-eval.cpp +++ b/eo/test/mpi/t-mpi-eval.cpp @@ -1,12 +1,35 @@ -//----------------------------------------------------------------------------- -// t-eoMpiParallel.cpp +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ + +/* + * This file shows an example of parallel evaluation of a population, when using an eoEasyEA algorithm. + * Moreover, we add a basic wrapper on the parallel evaluation, so as to show how to retrieve the best solutions. + */ //----------------------------------------------------------------------------- #include #include #include -// #include #include "../real_value.h" #include @@ -80,6 +103,16 @@ class eoRealSerializable : public eoReal< eoMinimizingFitness >, public eoserial typedef eoRealSerializable EOT; +/* + * Wrapper for HandleResponse: shows the best answer, as it is found. + * + * Finding the best solution is an associative operation (as it is based on a "min" function, which is associative too) + * and that's why we can perform it here. Indeed, the min element of 5 elements is the min element of the 3 first + * elements and the min element of the 2 last elements: + * min(1, 2, 3, 4, 5) = min( min(1, 2, 3), min(4, 5) ) + * + * This is a reduction. See MapReduce example to have another examples of reduction. + */ struct CatBestAnswers : public eo::mpi::HandleResponseParallelApply { CatBestAnswers() @@ -87,14 +120,15 @@ struct CatBestAnswers : public eo::mpi::HandleResponseParallelApply best.fitness( 1000000000. ); } - using eo::mpi::HandleResponseParallelApply::_wrapped; - using eo::mpi::HandleResponseParallelApply::d; + // if EOT were a template, we would have to do: (thank you C++ :) + // using eo::mpi::HandleResponseParallelApply::_wrapped; + // using eo::mpi::HandleResponseParallelApply::d; void operator()(int wrkRank) { int index = d->assignedTasks[wrkRank].index; int size = d->assignedTasks[wrkRank].size; - (*_wrapped)( wrkRank ); + (*_wrapped)( wrkRank ); // call to the wrapped function HERE for(int i = index; i < index+size; ++i) { if( best.fitness() < d->data()[ i ].fitness() ) @@ -134,6 +168,9 @@ int main(int ac, char** av) eoEvalFuncPtr< EOT, double, const std::vector< double >& > mainEval( real_value ); eoEvalFuncCounter< EOT > eval( mainEval ); + // until this point, everything (but eo::mpi::Node::init) is exactly as in an sequential version. + // We then instanciate the parallel algorithm. The store is directly used by the eoParallelPopLoopEval, which + // internally uses parallel apply. int rank = eo::mpi::Node::comm().rank(); eo::mpi::DynamicAssignmentAlgorithm assign; if( rank == eo::mpi::DEFAULT_MASTER ) @@ -157,7 +194,7 @@ int main(int ac, char** av) eo::log << eo::quiet << "DONE!" << std::endl; } else { - eoPop< EOT > pop( popSize, init ); + eoPop< EOT > pop; // the population doesn't have to be initialized, as it is not used by workers. eoParallelPopLoopEval< EOT > popEval( assign, eo::mpi::DEFAULT_MASTER, eval ); popEval( pop, pop ); } diff --git a/eo/test/mpi/t-mpi-multipleRoles.cpp b/eo/test/mpi/t-mpi-multipleRoles.cpp index 232a6110..36d588b8 100644 --- a/eo/test/mpi/t-mpi-multipleRoles.cpp +++ b/eo/test/mpi/t-mpi-multipleRoles.cpp @@ -1,3 +1,42 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ + +/* + * This file shows an example of how to make a hierarchy between nodes, when using a parallel apply. In this basic + * test, the master delegates the charge of finding workers to 2 "sub" masters, which then send part of the table to + * their workers. + * + * It's convenient to establish a role map, so as to clearly identify every role: + * - The node 0 is the general master, that delegates the job. It sends the table to the 2 submasters, and waits for the + * results. + * - Nodes 1 and 2 are the worker of the first job: the delegates. They receive the elements of the table and + * retransmit them to the subworkers. They play the roles of worker in the delegating job, and master in the plus one + * job. + * - Following nodes (3 to 6) are workers of the plus one job. They do the real job. Nodes 3 and 5 are attached to + * submaster 1, 4 and 6 to submaster 2. + * + * This test requires exactly 7 hosts. If the size is bigger, an exception will be thrown at the beginning. + **/ + # include # include # include @@ -11,11 +50,7 @@ using namespace std; using namespace eo::mpi; -// Role map -// 0 : general master -// 1, 2 : worker of general job, master of subjob -// 3 to 7 : workers of subjob - +// The real job to execute, for the subworkers: add one to each element of a table. struct SubWork: public eoUF< int&, void > { void operator() ( int & x ) @@ -25,14 +60,20 @@ struct SubWork: public eoUF< int&, void > } }; +// Function called by both subworkers and delegates. +// v is the vector to process, rank is the MPI rank of the sub master void subtask( vector& v, int rank ) { + // Attach workers according to nodes. + // Submaster with rank 1 will have ranks 3 and 5 as subworkers. + // Submaster with rank 2 will have ranks 4 and 6 as subworkers. vector workers; workers.push_back( rank + 2 ); workers.push_back( rank + 4 ); DynamicAssignmentAlgorithm algo( workers ); SubWork sw; + // Launch the job! ParallelApplyStore store( sw, rank ); store.data( v ); ParallelApply job( algo, rank, store ); @@ -40,6 +81,10 @@ void subtask( vector& v, int rank ) EmptyJob stop( algo, rank ); } +// Functor applied by submasters. Wait for the subworkers responses and then add some random processing (here, multiply +// each result by two). +// Note that this work receives a vector of integers as an entry, while subworkers task's operator receives a simple +// integer. struct Work: public eoUF< vector&, void > { void operator() ( vector& v ) @@ -57,6 +102,10 @@ int main(int argc, char** argv) { // eo::log << eo::setlevel( eo::debug ); Node::init( argc, argv ); + if( Node::comm().size() != 7 ) { + throw std::runtime_error("World size should be 7."); + } + vector v; v.push_back(1); @@ -65,12 +114,18 @@ int main(int argc, char** argv) v.push_back(7); v.push_back(42); + // As submasters' operator receives a vector as an input, and ParallelApply takes a vector of + // operator's input as an input, we have to deal with a vector of vector of integers for the master task. vector< vector > metaV; + // Here, we send twice the same vector. We could also have splitted the first vector into two vectors, one + // containing the beginning and another one containing the end. metaV.push_back( v ); metaV.push_back( v ); + // Assigning roles is done by comparing MPI ranks. switch( Node::comm().rank() ) { + // Nodes from 0 to 2 are implicated into the delegating task. case 0: case 1: case 2: @@ -95,6 +150,7 @@ int main(int argc, char** argv) } break; + // Other nodes are implicated into the subwork task. default: { // all the other nodes are sub workers diff --git a/eo/test/mpi/t-mpi-parallelApply.cpp b/eo/test/mpi/t-mpi-parallelApply.cpp index 26b0b30e..7cefa203 100644 --- a/eo/test/mpi/t-mpi-parallelApply.cpp +++ b/eo/test/mpi/t-mpi-parallelApply.cpp @@ -1,3 +1,41 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ + +/* + * This file shows an example of use of parallel apply, in the following context: each element of a table is + * incremented... in a parallel fashion. While this operation is very easy to perform even on a single host, it's just + * an example for parallel apply use. + * + * Besides, this is also a test for assignment (scheduling) algorithms, in different cases. The test succeeds if and + * only if the program terminates without any segfault ; otherwise, there could be a deadlock which prevents the end or + * a segfault at any time. + * + * One important thing is to instanciate an EmptyJob after having launched a ParallelApplyJob, so as the workers to be + * aware that the job is done (as it's a MultiJob). + * + * This test needs at least 3 processes to be launched. Under this size, it will directly throw an exception, at the + * beginning; + */ + # include # include # include @@ -9,6 +47,9 @@ using namespace std; using namespace eo::mpi; +/* + * The function to be called on each element of the table: just increment the value. + */ struct plusOne : public eoUF< int&, void > { void operator() ( int & x ) @@ -17,22 +58,26 @@ struct plusOne : public eoUF< int&, void > } }; +/* + * Internal structure representating a test. + */ struct Test { - AssignmentAlgorithm * assign; - string description; - int requiredNodesNumber; // nb : chosen nodes ranks must be sequential + AssignmentAlgorithm * assign; // used assignment algorithm for this test. + string description; // textual description of the test + int requiredNodesNumber; // number of required nodes. NB : chosen nodes ranks must be sequential }; -// These tests require at least 3 processes to be launched. int main(int argc, char** argv) { - // eo::log << eo::setlevel( eo::debug ); + // eo::log << eo::setlevel( eo::debug ); // if you like tty full of rainbows, decomment this line and comment the following one. eo::log << eo::setlevel( eo::quiet ); + bool launchOnlyOne = false ; // Set this to true if you wanna launch only the first test. Node::init( argc, argv ); + // Initializes a vector with random values. srand( time(0) ); vector v; for( int i = 0; i < 1000; ++i ) @@ -40,18 +85,27 @@ int main(int argc, char** argv) v.push_back( rand() ); } + // We need to be sure the values are correctly incremented between each test. So as to check this, we save the + // original vector into a variable originalV, and put an offset variable to 0. After each test, the offset is + // incremented and we can compare the returned value of each element to the value of each element in originalV + + // offset. If the two values are different, there has been a problem. int offset = 0; vector originalV = v; + // Instanciates the functor to apply on each element plusOne plusOneInstance; vector< Test > tests; const int ALL = Node::comm().size(); + if( ALL < 3 ) { + throw std::runtime_error("Needs at least 3 processes to be launched!"); + } + // Tests are auto described thanks to member "description" Test tIntervalStatic; tIntervalStatic.assign = new StaticAssignmentAlgorithm( 1, REST_OF_THE_WORLD, v.size() ); - tIntervalStatic.description = "Correct static assignment with interval."; + tIntervalStatic.description = "Correct static assignment with interval."; // workers have ranks from 1 to size - 1 tIntervalStatic.requiredNodesNumber = ALL; tests.push_back( tIntervalStatic ); @@ -111,23 +165,32 @@ int main(int argc, char** argv) for( unsigned int i = 0; i < tests.size(); ++i ) { + // Instanciates a store with the functor, the master rank and size of packet (see ParallelApplyStore doc). ParallelApplyStore< int > store( plusOneInstance, eo::mpi::DEFAULT_MASTER, 3 ); + // Updates the contained data store.data( v ); + // Creates the job with the assignment algorithm, the master rank and the store ParallelApply< int > job( *(tests[i].assign), eo::mpi::DEFAULT_MASTER, store ); + // Only master writes information if( job.isMaster() ) { cout << "Test : " << tests[i].description << endl; } + // Workers whose rank is inferior to required nodes number have to run the test, the other haven't anything to + // do. if( Node::comm().rank() < tests[i].requiredNodesNumber ) { job.run(); } + // After the job run, the master checks the result with offset and originalV if( job.isMaster() ) { - EmptyJob stop( *(tests[i].assign), eo::mpi::DEFAULT_MASTER ); + // This job has to be instanciated, not launched, so as to tell the workers they're done with the parallel + // job. + EmptyJob stop( *(tests[i].assign), eo::mpi::DEFAULT_MASTER ); ++offset; for(int i = 0; i < v.size(); ++i) { @@ -141,6 +204,7 @@ int main(int argc, char** argv) cout << endl; } + // MPI synchronization (all the processes wait to be here). Node::comm().barrier(); delete tests[i].assign; diff --git a/eo/test/mpi/t-mpi-wrapper.cpp b/eo/test/mpi/t-mpi-wrapper.cpp index 527c7cc6..dbe70261 100644 --- a/eo/test/mpi/t-mpi-wrapper.cpp +++ b/eo/test/mpi/t-mpi-wrapper.cpp @@ -1,3 +1,34 @@ +/* +(c) Thales group, 2012 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; + version 2 of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Contact: http://eodev.sourceforge.net + +Authors: + Benjamin Bouvier +*/ + +/* + * This file shows an example of how to wrap a handler of a job store. Here, the wrapped handler is the "IsFinished" + * one. The only function that has been added is that the wrapper prints a message on standard output, indicating what + * the wrapped function returns as a result. + * + * This test is performed on a parallel apply job, the same as in parallelApply. The main difference is when + * instanciating the store. + */ + # include # include # include @@ -9,6 +40,7 @@ using namespace std; using namespace eo::mpi; +// Job functor. struct plusOne : public eoUF< int&, void > { void operator() ( int & x ) @@ -17,6 +49,10 @@ struct plusOne : public eoUF< int&, void > } }; +/* + * Shows the wrapped result of IsFinished, prints a message and returns the wrapped value. + * times is an integer counting how many time the wrapper (hence the wrapped too) has been called. + */ template< class EOT > struct ShowWrappedResult : public IsFinishedParallelApply { @@ -39,7 +75,6 @@ struct ShowWrappedResult : public IsFinishedParallelApply int times; }; -// These tests require at least 3 processes to be launched. int main(int argc, char** argv) { // eo::log << eo::setlevel( eo::debug ); @@ -63,7 +98,11 @@ int main(int argc, char** argv) ParallelApplyStore< int > store( plusOneInstance, eo::mpi::DEFAULT_MASTER, 1 ); store.data( v ); - store.wrapIsFinished( new ShowWrappedResult ); + // This is the only thing which changes: we wrap the IsFinished function. + // According to RAII, we'll delete the invokated wrapper at the end of the main ; the store won't delete it + // automatically. + IsFinishedParallelApply* wrapper = new ShowWrappedResult; + store.wrapIsFinished( wrapper ); ParallelApply job( assign, eo::mpi::DEFAULT_MASTER, store ); // Equivalent to: @@ -86,6 +125,8 @@ int main(int argc, char** argv) cout << endl; } + delete wrapper; + return 0; } From fb75279012e0a7591cb40bfb5d990fe54278f83e Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Mon, 16 Jul 2012 15:08:53 +0200 Subject: [PATCH 19/19] Added doxygen links to examples on MPI classes --- eo/src/eoPopEvalFunc.h | 4 ++++ eo/src/mpi/eoMpi.h | 7 ++++++- eo/src/mpi/eoParallelApply.h | 5 +++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/eo/src/eoPopEvalFunc.h b/eo/src/eoPopEvalFunc.h index b96e8cac..7fcad146 100644 --- a/eo/src/eoPopEvalFunc.h +++ b/eo/src/eoPopEvalFunc.h @@ -217,6 +217,10 @@ class eoParallelPopLoopEval : public eoPopEvalFunc // Do we have to delete the store by ourselves ? bool needToDeleteStore; }; + +/** + * @example t-mpi-eval.cpp + */ #endif ///////////////////////////////////////////////////////////// diff --git a/eo/src/mpi/eoMpi.h b/eo/src/mpi/eoMpi.h index 0293c43d..cf5cbeb5 100644 --- a/eo/src/mpi/eoMpi.h +++ b/eo/src/mpi/eoMpi.h @@ -36,7 +36,8 @@ namespace eo { /** * @ingroup Parallel - * @defgroup MPI Message Passing Interface parallelization + * @defgroup MPI Message Passing Interface + * @brief See namespace eo::mpi to have all explanations about this module. * @{ */ @@ -507,6 +508,10 @@ namespace eo IsFinishedFunction< JobData >* _iff; }; + /** + * @example t-mpi-wrapper.cpp + */ + /** * @brief Class implementing the centralized job algorithm. * diff --git a/eo/src/mpi/eoParallelApply.h b/eo/src/mpi/eoParallelApply.h index 98f113f4..3cfd729c 100644 --- a/eo/src/mpi/eoParallelApply.h +++ b/eo/src/mpi/eoParallelApply.h @@ -375,6 +375,11 @@ namespace eo // empty } }; + + /** + * @example t-mpi-parallelApply.cpp + * @example t-mpi-multipleRoles.cpp + */ } } # endif // __EO_PARALLEL_APPLY_H__