paradiseo/smp/src/MPI_IslandModel.cpp
Eremey Valetov 2bfd27007f feat(smp): add MPI-distributed island model
Add MPI_IslandModel: a distributed island model where each MPI rank
runs one island with asynchronous migration via MPI send/receive.

Key features:
- Single-message MPI protocol: MPI_Isend (data) + MPI_Get_count/
  MPI_Recv on receive side. Avoids the two-message (size + data)
  pattern that can deadlock under oversubscription.
- MPI_Iprobe for non-blocking message detection (upstream's
  communicator lacks iprobe())
- Non-blocking sends with PendingSend tracking and MPI_Test cleanup
- Receive-before-send ordering to prevent message accumulation
- Post-loop drain with safe shutdown (cancel pending sends)
- Configurable poll interval (default 1s)
- Thread-safe update method for asynchronous migration
- Debug-level migration logging via eo::log

Extended Island with:
- IslandType enum (HOMOGENEOUS/HETEROGENEOUS) for conditional
  immigrant re-evaluation
- IslandModelKind enum (None/Shared/MPI) replacing string dispatch
- algoEOT template parameter for algorithm/individual type mismatch
- finalize() call after population integration

Extended islandModelWrapper with MPI_IslandModel overloads,
initial_values parameter, and algoEOT support.
2026-02-28 19:55:21 -05:00

272 lines
9.2 KiB
C++

/*
<MPI_IslandModel.cpp>
Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2012
Alexandre Quemy, Thibault Lasnier - INSA Rouen
Eremey Valetov
This software is governed by the CeCILL license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/ or redistribute the software under the terms of the CeCILL
license as circulated by CEA, CNRS and INRIA at the following URL
"http://www.cecill.info".
ParadisEO WebSite : http://paradiseo.gforge.inria.fr
Contact: paradiseo-help@lists.gforge.inria.fr
*/
#include <functional>
#include <algorithm>
#include <mpi.h>
#include <SerializableBase.h>
#include <serial/Parser.h>
#include <utils/eoLogger.h>
template<class EOT>
MPI_IslandModel<EOT>::MPI_IslandModel(AbstractTopology& _topo, int _pollIntervalMs)
: topo(_topo), pollIntervalMs(_pollIntervalMs), running(false)
{
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
MPI_Comm_size(MPI_COMM_WORLD, &num_mpi_ranks);
}
template<class EOT>
void paradiseo::smp::MPI_IslandModel<EOT>::add(AIsland<EOT>& _island)
{
islands.push_back(std::pair<AIsland<EOT>*, bool>(&_island, false));
islands.back().first->setModel(this);
}
template<class EOT>
void MPI_IslandModel<EOT>::operator()() {
running = true;
initModel();
std::thread islandThread;
if (mpi_rank < static_cast<int>(islands.size())) {
auto& it = islands[mpi_rank];
it.first->setRunning();
islandThread = std::thread(&AIsland<EOT>::operator(), it.first);
}
int localIslandRunning = 1;
int globalIslandRunning = 0;
do {
localIslandRunning = (mpi_rank < static_cast<int>(islands.size()) &&
!islands[mpi_rank].first->isStopped()) ? 1 : 0;
send();
MPI_Allreduce(&localIslandRunning, &globalIslandRunning, 1,
MPI_INT, MPI_SUM, MPI_COMM_WORLD);
if (globalIslandRunning > 0)
std::this_thread::sleep_for(std::chrono::milliseconds(pollIntervalMs));
} while (globalIslandRunning > 0);
if (islandThread.joinable())
islandThread.join();
for (auto& message : sentMessages)
message.wait();
sentMessages.clear();
// Discard remaining outgoing emigrants — all islands have stopped,
// so migrated individuals would never be processed by recipients.
// Using send() here risks MPI deadlock when multiple ranks each
// have pending emigrants to send but no matching receives posted.
{
std::lock_guard<std::mutex> lock(m);
while (!listEmigrants.empty()) listEmigrants.pop();
}
std::thread lastIntegrationThread;
if (mpi_rank < static_cast<int>(islands.size())) {
lastIntegrationThread = std::thread([&]() {
islands[mpi_rank].first->receive();
});
}
if (lastIntegrationThread.joinable())
lastIntegrationThread.join();
// Wait for any async island.update() tasks launched during the drain phase.
// Without this, islands may be deleted while tasks still reference them.
for (auto& message : sentMessages)
message.wait();
sentMessages.clear();
// Cancel remaining non-blocking sends. After the polling loop and drain,
// any unsent data targets stopped islands — safe to discard.
for (auto& ps : pendingSends) {
MPI_Cancel(&ps.request);
MPI_Wait(&ps.request, MPI_STATUS_IGNORE);
}
pendingSends.clear();
running = false;
}
template<class EOT>
bool paradiseo::smp::MPI_IslandModel<EOT>::update(eoPop<EOT> _data, AIsland<EOT>* _island)
{
std::lock_guard<std::mutex> lock(m);
listEmigrants.push(std::pair<eoPop<EOT>,AIsland<EOT>*>(_data, _island));
return true;
}
template<class EOT>
void paradiseo::smp::MPI_IslandModel<EOT>::setTopology(AbstractTopology& _topo)
{
std::lock_guard<std::mutex> lock(m);
topo = _topo;
if (running) {
topo.construct(islands.size());
for (auto it : islands)
if (!it.second)
topo.isolateNode(table.getLeft()[it.first]);
}
}
template<class EOT>
void paradiseo::smp::MPI_IslandModel<EOT>::send(void) {
// Receive first, then send — prevents accumulation of unprocessed messages.
if (!islands.empty() && mpi_rank < static_cast<int>(islands.size())) {
unsigned myId = mpi_rank;
std::vector<unsigned> neighbors = topo.getIdNeighbors(myId);
for (unsigned idFromNeighbor : neighbors) {
int tag = idFromNeighbor * 1000 + mpi_rank;
int flag = 0;
MPI_Status mpiStatus;
MPI_Iprobe(idFromNeighbor, tag, MPI_COMM_WORLD, &flag, &mpiStatus);
if (flag) {
// Single-message receive: MPI_Iprobe confirmed the complete
// message is available, so MPI_Recv returns immediately.
// This avoids the two-message protocol (size + data) in
// comm.recv(), which can deadlock when the size message
// arrives before the data message is progressed by MPI.
int count = 0;
MPI_Get_count(&mpiStatus, MPI_CHAR, &count);
std::string serialized(count, '\0');
MPI_Recv(&serialized[0], count, MPI_CHAR, idFromNeighbor, tag,
MPI_COMM_WORLD, MPI_STATUS_IGNORE);
eoserial::Object* obj = eoserial::Parser::parse(serialized);
SerializableBase<eoPop<EOT>> receivedSerializablePop;
receivedSerializablePop.unpack(obj);
delete obj;
eoPop<EOT> receivedPop = receivedSerializablePop;
eo::log << eo::debug << "MPI_IslandModel: rank " << mpi_rank
<< " received " << receivedPop.size() << " migrant(s) from island "
<< idFromNeighbor << " (tag=" << tag << ")" << std::endl;
sentMessages.push_back(std::async(std::launch::async,
&AIsland<EOT>::update,
table.getRight()[myId],
std::move(receivedPop)));
}
}
}
// Clean up completed async tasks
sentMessages.erase(std::remove_if(sentMessages.begin(), sentMessages.end(),
[](std::shared_future<bool>& i) -> bool
{ return i.wait_for(std::chrono::nanoseconds(0)) == std::future_status::ready; }
),
sentMessages.end());
// Then send outgoing emigrants
eoPop<EOT> migPop;
unsigned idFrom = 0;
bool hasMigrant = false;
{
std::lock_guard<std::mutex> lock(m);
if (!listEmigrants.empty()) {
idFrom = table.getLeft()[listEmigrants.front().second];
migPop = std::move(listEmigrants.front().first);
listEmigrants.pop();
hasMigrant = true;
}
}
if (hasMigrant) {
std::vector<unsigned> neighbors = topo.getIdNeighbors(idFrom);
// Serialize once for all neighbors (same protocol as comm.send for Persistent)
SerializableBase<eoPop<EOT>> serializablePop(migPop);
eoserial::Object* obj = serializablePop.pack();
std::stringstream ss;
obj->print(ss);
delete obj;
std::string serialized = ss.str();
int size = static_cast<int>(serialized.size()) + 1;
for (unsigned idTo : neighbors) {
int tag = idFrom * 1000 + idTo;
eo::log << eo::debug << "MPI_IslandModel: rank " << mpi_rank
<< " sending " << migPop.size() << " migrant(s) from island "
<< idFrom << " to island " << idTo << " (tag=" << tag << ")" << std::endl;
// Single non-blocking send. The receiver uses MPI_Get_count
// from MPI_Iprobe to determine the size, eliminating the
// separate blocking size message that could deadlock.
pendingSends.emplace_back();
PendingSend& ps = pendingSends.back();
ps.buffer = serialized;
MPI_Isend(ps.buffer.data(), size, MPI_CHAR, idTo, tag,
MPI_COMM_WORLD, &ps.request);
}
}
completePendingSends();
}
template<class EOT>
void paradiseo::smp::MPI_IslandModel<EOT>::completePendingSends() {
auto it = pendingSends.begin();
while (it != pendingSends.end()) {
int completed = 0;
MPI_Test(&it->request, &completed, MPI_STATUS_IGNORE);
if (completed)
it = pendingSends.erase(it);
else
++it;
}
}
template<class EOT>
bool paradiseo::smp::MPI_IslandModel<EOT>::isRunning() const
{
return (bool)running;
}
template<class EOT>
void paradiseo::smp::MPI_IslandModel<EOT>::initModel(void)
{
if (num_mpi_ranks > static_cast<int>(islands.size()) + 1) {
eo::log << eo::errors << "MPI_IslandModel: number of MPI ranks ("
<< num_mpi_ranks << ") exceeds number of islands + 1 ("
<< islands.size() + 1 << ")" << std::endl;
MPI_Abort(MPI_COMM_WORLD, 1);
}
for (auto& it : islands)
it.second = true;
topo.construct(islands.size());
table = createTable();
}
template<class EOT>
Bimap<unsigned, AIsland<EOT>*> paradiseo::smp::MPI_IslandModel<EOT>::createTable()
{
Bimap<unsigned, AIsland<EOT>*> table;
unsigned islandId = 0;
for (auto it : islands) {
table.add(islandId, it.first);
islandId++;
}
return table;
}