# ifndef __EO_MPI_H__ # define __EO_MPI_H__ # include # include # include # include # include # include # include # include # include "eoMpiNode.h" # include "eoMpiAssignmentAlgorithm.h" // TODO TODOB comment! namespace eo { namespace mpi { extern eoTimerStat timerStat; namespace Channel { const int Commands = 0; const int Messages = 1; } namespace Message { const int Continue = 0; const int Finish = 1; // TODO commentaire : différence entre finir une tâche et arrêter le worker à expliciter. const int Kill = 2; } const int DEFAULT_MASTER = 0; template< typename JobData, typename Wrapped > struct SharedDataFunction { SharedDataFunction( Wrapped * w = 0 ) : _wrapped( w ) { // empty } void wrapped( Wrapped * w ) { _wrapped = w; } void data( JobData* _d ) { d = _d; if( _wrapped ) { _wrapped->data( _d ); } } protected: JobData* d; Wrapped* _wrapped; }; template< typename JobData > struct SendTaskFunction : public eoUF, public SharedDataFunction< JobData, SendTaskFunction > { public: SendTaskFunction( SendTaskFunction* w = 0 ) : SharedDataFunction >( w ) { // empty } virtual ~SendTaskFunction() {} }; template< typename JobData > struct HandleResponseFunction : public eoUF, public SharedDataFunction< JobData, HandleResponseFunction > { public: HandleResponseFunction( HandleResponseFunction* w = 0 ) : SharedDataFunction >( w ) { // empty } virtual ~HandleResponseFunction() {} }; template< typename JobData > struct ProcessTaskFunction : public eoF, public SharedDataFunction< JobData, ProcessTaskFunction > { public: ProcessTaskFunction( ProcessTaskFunction* w = 0 ) : SharedDataFunction >( w ) { // empty } virtual ~ProcessTaskFunction() {} }; template< typename JobData > struct IsFinishedFunction : public eoF, public SharedDataFunction< JobData, IsFinishedFunction > { public: IsFinishedFunction( IsFinishedFunction* w = 0 ) : SharedDataFunction >( w ) { // empty } virtual ~IsFinishedFunction() {} }; template< typename JobData > struct JobStore { JobStore( SendTaskFunction* stf, HandleResponseFunction* hrf, ProcessTaskFunction* ptf, IsFinishedFunction* iff ) : _stf( stf ), _hrf( hrf ), _ptf( ptf ), _iff( iff ) { // empty } JobStore() { // empty } SendTaskFunction & sendTask() { return *_stf; } HandleResponseFunction & handleResponse() { return *_hrf; } ProcessTaskFunction & processTask() { return *_ptf; } IsFinishedFunction & isFinished() { return *_iff; } void sendTask( SendTaskFunction* stf ) { _stf = stf; } void handleResponse( HandleResponseFunction* hrf ) { _hrf = hrf; } void processTask( ProcessTaskFunction* ptf ) { _ptf = ptf; } void isFinished( IsFinishedFunction* iff ) { _iff = iff; } void wrapSendTask( SendTaskFunction* stf ) { if( stf ) { stf->wrapped( _stf ); _stf = stf; } } void wrapHandleResponse( HandleResponseFunction* hrf ) { if( hrf ) { hrf->wrapped( _hrf ); _hrf = hrf; } } void wrapProcessTask( ProcessTaskFunction* ptf ) { if( ptf ) { ptf->wrapped( _ptf ); _ptf = ptf; } } void wrapIsFinished( IsFinishedFunction* iff ) { if( iff ) { iff->wrapped( _iff ); _iff = iff; } } // TODO commenter : laissé à la couche d'en dessous car impossible d'initialiser une donnée membre d'une classe mère depuis une classe fille. virtual JobData* data() = 0; protected: // TODO commenter : Utiliser des pointeurs pour éviter d'écraser les fonctions wrappées SendTaskFunction< JobData >* _stf; HandleResponseFunction< JobData >* _hrf; ProcessTaskFunction< JobData >* _ptf; IsFinishedFunction< JobData >* _iff; }; template< class JobData > class Job { public: Job( AssignmentAlgorithm& _algo, int _masterRank, JobStore & store ) : assignmentAlgo( _algo ), comm( Node::comm() ), masterRank( _masterRank ), // Functors sendTask( store.sendTask() ), handleResponse( store.handleResponse() ), processTask( store.processTask() ), isFinished( store.isFinished() ) { _isMaster = Node::comm().rank() == _masterRank; sendTask.data( store.data() ); handleResponse.data( store.data() ); processTask.data( store.data() ); isFinished.data( store.data() ); } protected: SendTaskFunction & sendTask; HandleResponseFunction & handleResponse; ProcessTaskFunction & processTask; IsFinishedFunction & isFinished; struct FinallyBlock { FinallyBlock( int _totalWorkers, AssignmentAlgorithm& _algo, Job< JobData > & _that ) : totalWorkers( _totalWorkers ), assignmentAlgo( _algo ), comm( Node::comm() ), that( _that ) { // empty } ~FinallyBlock() { # ifndef NDEBUG eo::log << eo::debug; eo::log << "[M" << comm.rank() << "] Frees all the idle." << std::endl; # endif // frees all the idle workers timerStat.start("master_wait_for_idles"); std::vector idles = assignmentAlgo.idles(); for(unsigned int i = 0; i < idles.size(); ++i) { comm.send( idles[i], Channel::Commands, Message::Finish ); } timerStat.stop("master_wait_for_idles"); # ifndef NDEBUG eo::log << "[M" << comm.rank() << "] Waits for all responses." << std::endl; # endif // wait for all responses timerStat.start("master_wait_for_all_responses"); while( assignmentAlgo.availableWorkers() != totalWorkers ) { bmpi::status status = comm.probe( bmpi::any_source, bmpi::any_tag ); int wrkRank = status.source(); that.handleResponse( wrkRank ); comm.send( wrkRank, Channel::Commands, Message::Finish ); assignmentAlgo.confirm( wrkRank ); } timerStat.stop("master_wait_for_all_responses"); # ifndef NDEBUG eo::log << "[M" << comm.rank() << "] Leaving master task." << std::endl; # endif } protected: int totalWorkers; bmpi::communicator & comm; Job< JobData > & that; AssignmentAlgorithm& assignmentAlgo; }; void master( ) { int totalWorkers = assignmentAlgo.availableWorkers(); # ifndef NDEBUG eo::log << eo::debug; eo::log << "[M" << comm.rank() << "] Have " << totalWorkers << " workers." << std::endl; # endif try { FinallyBlock finally( totalWorkers, assignmentAlgo, *this ); while( ! isFinished() ) { timerStat.start("master_wait_for_assignee"); int assignee = assignmentAlgo.get( ); while( assignee <= 0 ) { # ifndef NDEBUG eo::log << "[M" << comm.rank() << "] Waitin' for node..." << std::endl; # endif bmpi::status status = comm.probe( bmpi::any_source, bmpi::any_tag ); int wrkRank = status.source(); # ifndef NDEBUG eo::log << "[M" << comm.rank() << "] Node " << wrkRank << " just terminated." << std::endl; # endif handleResponse( wrkRank ); assignmentAlgo.confirm( wrkRank ); assignee = assignmentAlgo.get( ); } timerStat.stop("master_wait_for_assignee"); # ifndef NDEBUG eo::log << "[M" << comm.rank() << "] Assignee : " << assignee << std::endl; # endif timerStat.start("master_wait_for_send"); comm.send( assignee, Channel::Commands, Message::Continue ); sendTask( assignee ); timerStat.stop("master_wait_for_send"); } } catch( const std::exception & e ) { std::string s = e.what(); s.append( " in eoMpi loop"); throw std::runtime_error( s ); } } void worker( ) { int order; # ifndef NDEBUG eo::log << eo::debug; # endif timerStat.start("worker_wait_for_order"); comm.recv( masterRank, Channel::Commands, order ); timerStat.stop("worker_wait_for_order"); while( true ) { # ifndef NDEBUG eo::log << "[W" << comm.rank() << "] Waiting for an order..." << std::endl; # endif if ( order == Message::Kill ) { # ifndef NDEBUG eo::log << "[W" << comm.rank() << "] Leaving worker task." << std::endl; # endif return; } else if( order == Message::Continue ) { # ifndef NDEBUG eo::log << "[W" << comm.rank() << "] Processing task..." << std::endl; # endif processTask( ); } timerStat.start("worker_wait_for_order"); comm.recv( masterRank, Channel::Commands, order ); timerStat.stop("worker_wait_for_order"); } } public: void run( ) { ( _isMaster ) ? master( ) : worker( ); } bool isMaster( ) { return _isMaster; } protected: AssignmentAlgorithm& assignmentAlgo; bmpi::communicator& comm; int masterRank; bool _isMaster; struct rusage _usage; long _current; }; } } # endif // __EO_MPI_H__