From 95787e3a24bf31a5ddab434f1ecebcd02232de2a Mon Sep 17 00:00:00 2001 From: boufaras Date: Mon, 9 Jan 2012 10:29:30 +0000 Subject: [PATCH] independent ParadisEO-GPU package git-svn-id: svn://scm.gforge.inria.fr/svnroot/paradiseo@2589 331e1502-861f-0410-8da2-ba01fb791d7f --- .../paradiseo-gpu/src/eval/moGPUEvalByCpy.h | 200 +++++++++++++++++ .../paradiseo-gpu/src/eval/moGPUEvalByModif.h | 199 +++++++++++++++++ .../paradiseo-gpu/src/eval/moGPUEvalFunc.h | 84 ++++++++ .../src/eval/moGPUKernelEvalByCpy.h | 71 ++++++ .../src/eval/moGPUKernelEvalByModif.h | 68 ++++++ .../src/eval/moGPUMappingEvalByCpy.h | 202 ++++++++++++++++++ .../src/eval/moGPUMappingEvalByModif.h | 200 +++++++++++++++++ .../src/eval/moGPUMappingKernelEvalByCpy.h | 76 +++++++ .../src/eval/moGPUMappingKernelEvalByModif.h | 73 +++++++ 9 files changed, 1173 insertions(+) create mode 100644 branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalByCpy.h create mode 100644 branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalByModif.h create mode 100644 branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalFunc.h create mode 100644 branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUKernelEvalByCpy.h create mode 100644 branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUKernelEvalByModif.h create mode 100644 branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingEvalByCpy.h create mode 100644 branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingEvalByModif.h create mode 100644 branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingKernelEvalByCpy.h create mode 100644 branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingKernelEvalByModif.h diff --git a/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalByCpy.h b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalByCpy.h new file mode 100644 index 000000000..b15e32f44 --- /dev/null +++ b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalByCpy.h @@ -0,0 +1,200 @@ +/* + + Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2010 + + Karima Boufaras, Thé Van LUONG + + This software is governed by the CeCILL license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited liability. + + In this respect, the user's attention is drawn to the risks associated + with loading, using, modifying and/or developing or reproducing the + software by the user in light of its specific status of free software, + that may mean that it is complicated to manipulate, and that also + therefore means that it is reserved for developers and experienced + professionals having in-depth computer knowledge. Users are therefore + encouraged to load and test the software's suitability as regards their + requirements in conditions enabling the security of their systems and/or + data to be ensured and, more generally, to use and operate it in the + same conditions as regards security. + The fact that you are presently reading this means that you have had + knowledge of the CeCILL license and that you accept its terms. + + ParadisEO WebSite : http://paradiseo.gforge.inria.fr + Contact: paradiseo-help@lists.gforge.inria.fr + */ + +#ifndef __moGPUEvalByCpy_H +#define __moGPUEvalByCpy_H + +#include +#include +#include + +/** + * class for the parallel evaluation of neighborhood + */ + +template +class moGPUEvalByCpy: public moGPUEval { + +public: + + /** + * Define type of a solution corresponding to Neighbor + **/ + typedef typename Neighbor::EOT EOT; + /** + * Define vector type of vector corresponding to Solution + **/ + typedef typename EOT::ElemType T; + /** + * Define type of a fitness corresponding to Solution + **/ + typedef typename EOT::Fitness Fitness; + + using moGPUEval::neighborhoodSize; + using moGPUEval::host_FitnessArray; + using moGPUEval::device_FitnessArray; + using moGPUEval::device_solution; + using moGPUEval::NEW_kernel_Dim; + using moGPUEval::NEW_BLOCK_SIZE; + using moGPUEval::mutex; + + /** + * Constructor + * @param _neighborhoodSize the size of the neighborhood + * @param _eval how to evaluate a neighbor + */ + + moGPUEvalByCpy(unsigned int _neighborhoodSize, Eval & _eval) : + moGPUEval (_neighborhoodSize), eval(_eval) { + + } + + /** + * Compute fitness for all solution neighbors in device + * @param _sol the solution that generate the neighborhood to evaluate parallely + * @param _cpySolution Launch kernel with local copy option of solution in each thread + * @param _withCalibration an automatic configuration of kernel to launch( NB_THREAD BY BLOCK & NB_BLOCK BY KERNEL ),default (1) + */ + + void neighborhoodEval(EOT & _sol, bool _cpySolution, bool _withCalibration) { + + if (_cpySolution) { + unsigned size=_sol.size(); + if (!mutex) { + //Allocate the space for solution in the global memory of device + cudaMalloc((void**) &device_solution.vect, size * sizeof(T)); + if (_withCalibration) + calibration(_sol); + mutex = true; + } + + // Get Current solution fitness + Fitness fitness = _sol.fitness(); + //Copy the solution vector from the host to device + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + + //Launch the Kernel to compute all neighbors fitness with local copy of solution + moGPUKernelEvalByCpy<<>>(eval,device_solution.vect,device_FitnessArray,fitness,neighborhoodSize); + + //Copy the result from device to host + cudaMemcpy(host_FitnessArray, device_FitnessArray, neighborhoodSize + * sizeof(Fitness), cudaMemcpyDeviceToHost); + + } else + cout << "It's evaluation by copy set cpySolution to true" << endl; + } + + /** + * Compute the best combination of number of block by grid and number of thread within block + * @param _sol the solution that generate the neighborhood to evaluate parallely + */ + + virtual void calibration(EOT & _sol) { + + unsigned size = _sol.size(); + Fitness fitness = _sol.fitness(); + unsigned NB_THREAD[6] = { 16, 32, 64, 128, 256, 512 }; + double mean_time[7] = { 0, 0, 0, 0, 0, 0 }; + unsigned i = 0; + double best_time = RAND_MAX; + unsigned tmp_kernel_Dim; + +#ifndef BLOCK_SIZE + + do { + tmp_kernel_Dim = neighborhoodSize / NB_THREAD[i] + + ((neighborhoodSize % NB_THREAD[i] == 0) ? 0 : 1); + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUKernelEvalByCpy<<>>(eval,device_solution.vect,device_FitnessArray,fitness,neighborhoodSize); + timer.stop(); + mean_time[i] += (timer.getTime()); + } + if (best_time >= (mean_time[i] / 5)) { + best_time = mean_time[i] / 5; + NEW_BLOCK_SIZE = NB_THREAD[i]; + NEW_kernel_Dim = tmp_kernel_Dim; + } + i++; + } while (i < 6); + +#else + + tmp_kernel_Dim =NEW_kernel_Dim; + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUKernelEvalByCpy<<>>(eval,device_solution.vect,device_FitnessArray,fitness,neighborhoodSize); + timer.stop(); + mean_time[6] += (timer.getTime()); + } + if (best_time >= (mean_time[6] / 5)) + best_time = mean_time[6] / 5; + do { + tmp_kernel_Dim = neighborhoodSize / NB_THREAD[i] + + ((neighborhoodSize % NB_THREAD[i] == 0) ? 0 : 1); + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUKernelEvalByCpy<<>>(eval,device_solution.vect,device_FitnessArray,fitness,neighborhoodSize); + timer.stop(); + mean_time[i] += (timer.getTime()); + } + if (best_time >= (mean_time[i] / 5)) { + best_time = mean_time[i] / 5; + NEW_BLOCK_SIZE = NB_THREAD[i]; + NEW_kernel_Dim = tmp_kernel_Dim; + } + i++; + }while (i < 6); + +#endif + + } + +protected: + + Eval & eval; + +}; + +#endif diff --git a/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalByModif.h b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalByModif.h new file mode 100644 index 000000000..935faa90a --- /dev/null +++ b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalByModif.h @@ -0,0 +1,199 @@ +/* + + Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2010 + + Karima Boufaras, Thé Van LUONG + + This software is governed by the CeCILL license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited liability. + + In this respect, the user's attention is drawn to the risks associated + with loading, using, modifying and/or developing or reproducing the + software by the user in light of its specific status of free software, + that may mean that it is complicated to manipulate, and that also + therefore means that it is reserved for developers and experienced + professionals having in-depth computer knowledge. Users are therefore + encouraged to load and test the software's suitability as regards their + requirements in conditions enabling the security of their systems and/or + data to be ensured and, more generally, to use and operate it in the + same conditions as regards security. + The fact that you are presently reading this means that you have had + knowledge of the CeCILL license and that you accept its terms. + + ParadisEO WebSite : http://paradiseo.gforge.inria.fr + Contact: paradiseo-help@lists.gforge.inria.fr + */ + +#ifndef __moGPUEvalByModif_H +#define __moGPUEvalByModif_H + +#include +#include +#include + +/** + * class for the parallel evaluation of neighborhood + */ + +template +class moGPUEvalByModif: public moGPUEval { + +public: + + /** + * Define type of a solution corresponding to Neighbor + **/ + typedef typename Neighbor::EOT EOT; + /** + * Define vector type of vector corresponding to Solution + **/ + typedef typename EOT::ElemType T; + /** + * Define type of a fitness corresponding to Solution + **/ + typedef typename EOT::Fitness Fitness; + + using moGPUEval::neighborhoodSize; + using moGPUEval::host_FitnessArray; + using moGPUEval::device_FitnessArray; + using moGPUEval::device_solution; + using moGPUEval::NEW_kernel_Dim; + using moGPUEval::NEW_BLOCK_SIZE; + using moGPUEval::mutex; + + /** + * Constructor + * @param _neighborhoodSize the size of the neighborhood + * @param _eval the incremental evaluation + */ + + moGPUEvalByModif(unsigned int _neighborhoodSize, Eval & _eval) : + moGPUEval (_neighborhoodSize), eval(_eval) { + + } + + /** + * Compute fitness for all solution neighbors in device + * @param _sol the solution that generate the neighborhood to evaluate parallely + * @param _cpySolution Launch kernel with local copy option of solution in each thread + * @param _withCalibration an automatic configuration of kernel to launch( NB_THREAD BY BLOCK & NB_BLOCK BY KERNEL ),default (1) + */ + + void neighborhoodEval(EOT & _sol, bool _cpySolution, bool _withCalibration) { + if (!_cpySolution) { + unsigned size = _sol.size(); + if (!mutex) { + //Allocate the space for solution in the global memory of device + cudaMalloc((void**) &device_solution.vect, size * sizeof(T)); + if (_withCalibration) + calibration(_sol); + mutex = true; + } + + // Get Current solution fitness + Fitness fitness = _sol.fitness(); + + //Copy the solution vector from the host to device + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + + //Launch the Kernel to compute all neighbors fitness + moGPUKernelEvalByModif<<>>(eval,device_solution.vect,device_FitnessArray,fitness,neighborhoodSize); + + //Copy the result from device to host + cudaMemcpy(host_FitnessArray, device_FitnessArray, neighborhoodSize + * sizeof(Fitness), cudaMemcpyDeviceToHost); + } else + cout << "It's evaluation by Modif set cpySolution to false" + << endl; + } + + /** + * Compute the best combination of number of block by grid and number of thread within block + * @param _sol the solution that generate the neighborhood to evaluate parallely + */ + + virtual void calibration(EOT & _sol) { + + unsigned size = _sol.size(); + Fitness fitness = _sol.fitness(); + unsigned NB_THREAD[6] = { 16, 32, 64, 128, 256, 512 }; + double mean_time[7] = { 0, 0, 0, 0, 0, 0 }; + unsigned i = 0; + double best_time = RAND_MAX; + unsigned tmp_kernel_Dim; +#ifndef BLOCK_SIZE + + do { + tmp_kernel_Dim = neighborhoodSize / NB_THREAD[i] + + ((neighborhoodSize % NB_THREAD[i] == 0) ? 0 : 1); + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUKernelEvalByModif<<>>(eval,device_solution.vect,device_FitnessArray,fitness,neighborhoodSize); + timer.stop(); + mean_time[i] += (timer.getTime()); + } + if (best_time >= (mean_time[i] / 5)) { + best_time = mean_time[i] / 5; + NEW_BLOCK_SIZE = NB_THREAD[i]; + NEW_kernel_Dim = tmp_kernel_Dim; + } + i++; + } while (i < 6); + +#else + + tmp_kernel_Dim =NEW_kernel_Dim; + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUKernelEvalByModif<<>>(eval,device_solution.vect,device_FitnessArray,fitness,neighborhoodSize); + timer.stop(); + mean_time[6] += (timer.getTime()); + } + if (best_time >= (mean_time[6] / 5)) + best_time = mean_time[6] / 5; + do { + tmp_kernel_Dim = neighborhoodSize / NB_THREAD[i] + + ((neighborhoodSize % NB_THREAD[i] == 0) ? 0 : 1); + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUKernelEvalByModif<<>>(eval,device_solution.vect,device_FitnessArray,fitness,neighborhoodSize); + timer.stop(); + mean_time[i] += (timer.getTime()); + } + if (best_time >= (mean_time[i] / 5)) { + best_time = mean_time[i] / 5; + NEW_BLOCK_SIZE = NB_THREAD[i]; + NEW_kernel_Dim = tmp_kernel_Dim; + } + i++; + }while (i < 6); + +#endif + + } + +protected: + + Eval & eval; + +}; + +#endif diff --git a/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalFunc.h b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalFunc.h new file mode 100644 index 000000000..25eb907fa --- /dev/null +++ b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUEvalFunc.h @@ -0,0 +1,84 @@ +/* + + Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2010 + + Karima Boufaras, Thé Van LUONG + + This software is governed by the CeCILL license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited liability. + + In this respect, the user's attention is drawn to the risks associated + with loading, using, modifying and/or developing or reproducing the + software by the user in light of its specific status of free software, + that may mean that it is complicated to manipulate, and that also + therefore means that it is reserved for developers and experienced + professionals having in-depth computer knowledge. Users are therefore + encouraged to load and test the software's suitability as regards their + requirements in conditions enabling the security of their systems and/or + data to be ensured and, more generally, to use and operate it in the + same conditions as regards security. + The fact that you are presently reading this means that you have had + knowledge of the CeCILL license and that you accept its terms. + + ParadisEO WebSite : http://paradiseo.gforge.inria.fr + Contact: paradiseo-help@lists.gforge.inria.fr + */ + +#ifndef __moGPUEvalFunc_H +#define __moGPUEvalFunc_H + +/** + * Abstract class for GPU evaluation of neighbor + */ + +template +class moGPUEvalFunc { + +public: + + /** + * Define type of a solution corresponding to Neighbor + */ + + typedef typename Neighbor::EOT EOT; + typedef typename EOT::ElemType T; + typedef typename EOT::Fitness Fitness; + + /** + * Constructor + */ + + moGPUEvalFunc() { + } + + /** + * Destructor + */ + + virtual ~moGPUEvalFunc() { + } + + /** + *Virtual functor to compute fitness of a solution neighbor + *@param _solution the solution which generate the neighborhood + *@param _fitness the current solution fitness + *@param _index an array that contains a set of indexes corresponding to the current thread identifier neighbor + * the last element of this array contains neighborhood size + */ + +virtual inline __host__ __device__ Fitness operator() (T * _solution,Fitness _fitness, unsigned int * _index) { + + return _fitness; + +} + +}; +#endif diff --git a/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUKernelEvalByCpy.h b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUKernelEvalByCpy.h new file mode 100644 index 000000000..858b132ac --- /dev/null +++ b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUKernelEvalByCpy.h @@ -0,0 +1,71 @@ +/* + + Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2010 + + Karima Boufaras, Thé Van LUONG + + This software is governed by the CeCILL license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited liability. + + In this respect, the user's attention is drawn to the risks associated + with loading, using, modifying and/or developing or reproducing the + software by the user in light of its specific status of free software, + that may mean that it is complicated to manipulate, and that also + therefore means that it is reserved for developers and experienced + professionals having in-depth computer knowledge. Users are therefore + encouraged to load and test the software's suitability as regards their + requirements in conditions enabling the security of their systems and/or + data to be ensured and, more generally, to use and operate it in the + same conditions as regards security. + The fact that you are presently reading this means that you have had + knowledge of the CeCILL license and that you accept its terms. + + ParadisEO WebSite : http://paradiseo.gforge.inria.fr + Contact: paradiseo-help@lists.gforge.inria.fr + */ + +#ifndef __moGPUKernelEvalByCpy_H +#define __moGPUKernelEvalByCpy_H +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * The kernel function called from the host and executed in device to compute all neighbors fitness at one time + * without mapping, each thread id compute one fitness by copying localy the solution + * @param _eval how to evaluate each neighbor + * @param _solution the representation of solution( vector of int,float....) + * @param _allFitness the array of Fitness to save all neighbors fitness + * @param _fitness the current solution fitness + * @param _neighborhoodsize the size of the neighborhood + */ + +template + +__global__ void moGPUKernelEvalByCpy(Eval _eval, T * _solution, Fitness* _allFitness, + Fitness _fitness, unsigned _neighborhoodsize) { + + // The thread identifier within a grid block's + int id = blockIdx.x * blockDim.x + threadIdx.x; + // array to save index to be changed + unsigned index[2]; + T sol_tmp[SIZE]; + // In this representation each id identify one and only one neighbor in neighborhood + if (id < _neighborhoodsize) { + for(unsigned i=0;i + Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2010 + + Karima Boufaras, Thé Van LUONG + + This software is governed by the CeCILL license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited liability. + + In this respect, the user's attention is drawn to the risks associated + with loading, using, modifying and/or developing or reproducing the + software by the user in light of its specific status of free software, + that may mean that it is complicated to manipulate, and that also + therefore means that it is reserved for developers and experienced + professionals having in-depth computer knowledge. Users are therefore + encouraged to load and test the software's suitability as regards their + requirements in conditions enabling the security of their systems and/or + data to be ensured and, more generally, to use and operate it in the + same conditions as regards security. + The fact that you are presently reading this means that you have had + knowledge of the CeCILL license and that you accept its terms. + + ParadisEO WebSite : http://paradiseo.gforge.inria.fr + Contact: paradiseo-help@lists.gforge.inria.fr + */ + +#ifndef __moGPUKernelEvalByModif_H +#define __moGPUKernelEvalByModif_H +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * The kernel function called from the host and executed in device to compute all neighbors fitness at one time + * without mapping, each thread id compute one fitness by modif of solution + * @param _eval how to evaluate each neighbor + * @param _solution the representation of solution( vector of int,float....) + * @param _allFitness the array of Fitness to save all neighbors fitness + * @param _fitness the current solution fitness + * @param _neighborhoodsize the size of the neighborhood + */ + +template + +__global__ void moGPUKernelEvalByModif(Eval _eval, T * _solution, Fitness* _allFitness, + Fitness _fitness, unsigned _neighborhoodsize) { + + // The thread identifier within a grid block's + int id = blockIdx.x * blockDim.x + threadIdx.x; + // array to save index to be changed + unsigned index[2]; + // In this representation each id identify one and only one neighbor in neighborhood + if (id < _neighborhoodsize) { + //Change the id'th element of solution + index[0]=id; + index[1]=_neighborhoodsize; + //Compute fitness for id'th neighbor + _allFitness[id] = _eval(_solution,_fitness,index); + } +} + +#endif diff --git a/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingEvalByCpy.h b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingEvalByCpy.h new file mode 100644 index 000000000..46a883f25 --- /dev/null +++ b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingEvalByCpy.h @@ -0,0 +1,202 @@ +/* + + Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2010 + + Karima Boufaras, Thé Van LUONG + + This software is governed by the CeCILL license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited liability. + + In this respect, the user's attention is drawn to the risks associated + with loading, using, modifying and/or developing or reproducing the + software by the user in light of its specific status of free software, + that may mean that it is complicated to manipulate, and that also + therefore means that it is reserved for developers and experienced + professionals having in-depth computer knowledge. Users are therefore + encouraged to load and test the software's suitability as regards their + requirements in conditions enabling the security of their systems and/or + data to be ensured and, more generally, to use and operate it in the + same conditions as regards security. + The fact that you are presently reading this means that you have had + knowledge of the CeCILL license and that you accept its terms. + + ParadisEO WebSite : http://paradiseo.gforge.inria.fr + Contact: paradiseo-help@lists.gforge.inria.fr + */ + +#ifndef __moGPUMappingEvalByCpy_H +#define __moGPUMappingEvalByCpy_H +#include +#include +#include + +/** + * class for the Mapping neighborhood evaluation + */ + +template +class moGPUMappingEvalByCpy: public moGPUEval { + +public: + + /** + * Define type of a solution corresponding to Neighbor + */ + typedef typename Neighbor::EOT EOT; + /** + * Define type of a vector corresponding to Solution + */ + typedef typename EOT::ElemType T; + /** + * Define type of a fitness corresponding to Solution + */ + typedef typename EOT::Fitness Fitness; + + using moGPUEval::neighborhoodSize; + using moGPUEval::host_FitnessArray; + using moGPUEval::device_FitnessArray; + using moGPUEval::device_solution; + using moGPUEval::NEW_BLOCK_SIZE; + using moGPUEval::NEW_kernel_Dim; + using moGPUEval::mutex; + + /** + * Constructor + * @param _neighborhoodSize the size of the neighborhood + * @param _eval how to evaluate a neighbor + */ + + moGPUMappingEvalByCpy(unsigned int _neighborhoodSize, Eval & _eval) : + moGPUEval (_neighborhoodSize), eval(_eval) { + } + + /** + * Destructor + */ + ~moGPUMappingEvalByCpy() { + } + + /** + * Compute fitness for all solution neighbors in device with associated mapping + * @param _sol the solution that generate the neighborhood to evaluate parallely + * @param _mapping the array of mapping indexes that associate a neighbor identifier to X-position + * @param _cpySolution Launch kernel with local copy option of solution in each thread if it's set to true + * @param _withCalibration an automatic kernel configuration, fix nbr of thread by block and nbr of grid by kernel + */ + + void neighborhoodEval(EOT & _sol, unsigned int * _mapping, + bool _cpySolution, bool _withCalibration) { + if (_cpySolution) { + unsigned size = _sol.size(); + // Get Current solution fitness + Fitness fitness = _sol.fitness(); + if (!mutex) { + //Allocate the space for solution in the device global memory + cudaMalloc((void**) &device_solution.vect, size * sizeof(T)); + if (_withCalibration) + calibration(_sol, _mapping); + mutex = true; + } + //Copy the solution vector from the host to device + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + //Launch the Kernel to compute all neighbors fitness,using a given mapping + moGPUMappingKernelEvalByCpy<<>>(eval,device_solution.vect,device_FitnessArray,fitness,_mapping,neighborhoodSize); + cudaMemcpy(host_FitnessArray, device_FitnessArray, neighborhoodSize + * sizeof(Fitness), cudaMemcpyDeviceToHost); + + } else + cout << "It's evaluation by copy set cpySolution to true" << endl; + } + + /** + * Compute the best combination of number of block by grid and number of thread within block + * @param _sol the solution that generate the neighborhood to evaluate parallely + * @param _mapping the array of mapping indexes that associate a neighbor identifier to X-position + */ + + virtual void calibration(EOT & _sol, unsigned int * _mapping) { + + unsigned size = _sol.size(); + Fitness fitness = _sol.fitness(); + unsigned NB_THREAD[6] = { 16, 32, 64, 128, 256, 512 }; + double mean_time[7] = { 0, 0, 0, 0, 0, 0 }; + unsigned i = 0; + double best_time = 0; + unsigned tmp_kernel_Dim; + best_time = RAND_MAX; +#ifndef BLOCK_SIZE + + do { + tmp_kernel_Dim = neighborhoodSize / NB_THREAD[i] + + ((neighborhoodSize % NB_THREAD[i] == 0) ? 0 : 1); + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUMappingKernelEvalByCpy<<>>(eval,device_solution.vect,device_FitnessArray,fitness,_mapping,neighborhoodSize); + timer.stop(); + mean_time[i] += (timer.getTime()); + } + if (best_time >= (mean_time[i] / 5)) { + best_time = mean_time[i] / 5; + NEW_BLOCK_SIZE = NB_THREAD[i]; + NEW_kernel_Dim = tmp_kernel_Dim; + } + i++; + } while (i < 6); + +#else + + tmp_kernel_Dim =NEW_kernel_Dim; + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUMappingKernelEvalByCpy<<>>(eval,device_solution.vect,device_FitnessArray,fitness,_mapping,neighborhoodSize); + timer.stop(); + mean_time[6] += (timer.getTime()); + } + if (best_time >= (mean_time[6] / 5)) + best_time = mean_time[6] / 5; + do { + tmp_kernel_Dim = neighborhoodSize / NB_THREAD[i] + + ((neighborhoodSize % NB_THREAD[i] == 0) ? 0 : 1); + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUMappingKernelEvalByCpy<<>>(eval,device_solution.vect,device_FitnessArray,fitness,_mapping,neighborhoodSize); + timer.stop(); + mean_time[i] += (timer.getTime()); + } + if (best_time >= (mean_time[i] / 5)) { + best_time = mean_time[i] / 5; + NEW_BLOCK_SIZE = NB_THREAD[i]; + NEW_kernel_Dim = tmp_kernel_Dim; + } + i++; + }while (i < 6); + +#endif + + } + +protected: + + Eval & eval; + +}; + +#endif diff --git a/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingEvalByModif.h b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingEvalByModif.h new file mode 100644 index 000000000..eba3a5fa4 --- /dev/null +++ b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingEvalByModif.h @@ -0,0 +1,200 @@ +/* + + Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2010 + + Karima Boufaras, Thé Van LUONG + + This software is governed by the CeCILL license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited liability. + + In this respect, the user's attention is drawn to the risks associated + with loading, using, modifying and/or developing or reproducing the + software by the user in light of its specific status of free software, + that may mean that it is complicated to manipulate, and that also + therefore means that it is reserved for developers and experienced + professionals having in-depth computer knowledge. Users are therefore + encouraged to load and test the software's suitability as regards their + requirements in conditions enabling the security of their systems and/or + data to be ensured and, more generally, to use and operate it in the + same conditions as regards security. + The fact that you are presently reading this means that you have had + knowledge of the CeCILL license and that you accept its terms. + + ParadisEO WebSite : http://paradiseo.gforge.inria.fr + Contact: paradiseo-help@lists.gforge.inria.fr + */ + +#ifndef __moGPUMappingEvalByModif_H +#define __moGPUMappingEvalByModif_H +#include +#include +#include + +/** + * class for the Mapping neighborhood evaluation + */ + +template +class moGPUMappingEvalByModif: public moGPUEval { + +public: + + /** + * Define type of a solution corresponding to Neighbor + */ + typedef typename Neighbor::EOT EOT; + /** + * Define type of a vector corresponding to Solution + */ + typedef typename EOT::ElemType T; + /** + * Define type of a fitness corresponding to Solution + */ + typedef typename EOT::Fitness Fitness; + + using moGPUEval::neighborhoodSize; + using moGPUEval::host_FitnessArray; + using moGPUEval::device_FitnessArray; + using moGPUEval::device_solution; + using moGPUEval::NEW_BLOCK_SIZE; + using moGPUEval::NEW_kernel_Dim; + using moGPUEval::mutex; + + /** + * Constructor + * @param _neighborhoodSize the size of the neighborhood + * @param _eval the incremental evaluation + */ + + moGPUMappingEvalByModif(unsigned int _neighborhoodSize, Eval & _eval) : + moGPUEval (_neighborhoodSize), eval(_eval) { + } + + /** + * Destructor + */ + ~moGPUMappingEvalByModif() { + } + + /** + * Compute fitness for all solution neighbors in device with associated mapping + * @param _sol the solution that generate the neighborhood to evaluate parallely + * @param _mapping the array of mapping indexes that associate a neighbor identifier to X-position + * @param _cpySolution Launch kernel with local copy option of solution in each thread if it's set to true + * @param _withCalibration an automatic kernel configuration, fix nbr of thread by block and nbr of grid by kernel + */ + + void neighborhoodEval(EOT & _sol, unsigned int * _mapping, + bool _cpySolution, bool _withCalibration) { + if (!_cpySolution) { + unsigned size = _sol.size(); + // Get Current solution fitness + Fitness fitness = _sol.fitness(); + if (!mutex) { + //Allocate the space for solution in the device global memory + cudaMalloc((void**) &device_solution.vect, size * sizeof(T)); + if (_withCalibration) + calibration(_sol, _mapping); + mutex = true; + } + //Copy the solution vector from the host to device + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + //Launch the Kernel to compute all neighbors fitness,using a given mapping + moGPUMappingKernelEvalByModif<<>>(eval,device_solution.vect,device_FitnessArray,fitness,_mapping,neighborhoodSize); + cudaMemcpy(host_FitnessArray, device_FitnessArray, neighborhoodSize + * sizeof(Fitness), cudaMemcpyDeviceToHost); + } else + cout << "It's evaluation by Modif set cpySolution to false" << endl; + } + + /** + * Compute the best combination of number of block by grid and number of thread within block + * @param _sol the solution that generate the neighborhood to evaluate parallely + * @param _mapping the array of mapping indexes that associate a neighbor identifier to X-position + */ + + + virtual void calibration(EOT & _sol, unsigned int * _mapping) { + + unsigned size = _sol.size(); + Fitness fitness = _sol.fitness(); + unsigned NB_THREAD[6] = { 16, 32, 64, 128, 256, 512 }; + double mean_time[7] = { 0, 0, 0, 0, 0, 0 }; + unsigned i = 0; + double best_time = 0; + unsigned tmp_kernel_Dim; + best_time = RAND_MAX; +#ifndef BLOCK_SIZE + do { + tmp_kernel_Dim = neighborhoodSize / NB_THREAD[i] + + ((neighborhoodSize % NB_THREAD[i] == 0) ? 0 : 1); + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUMappingKernelEvalByModif<<>>(eval,device_solution.vect,device_FitnessArray,fitness,_mapping,neighborhoodSize); + timer.stop(); + mean_time[i] += (timer.getTime()); + } + if (best_time >= (mean_time[i] / 5)) { + best_time = mean_time[i] / 5; + NEW_BLOCK_SIZE = NB_THREAD[i]; + NEW_kernel_Dim = tmp_kernel_Dim; + } + i++; + } while (i < 6); +#else + + tmp_kernel_Dim =NEW_kernel_Dim; + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUMappingKernelEvalByModif<<>>(eval,device_solution.vect,device_FitnessArray,fitness,_mapping,neighborhoodSize); + timer.stop(); + mean_time[6] += (timer.getTime()); + } + if (best_time >= (mean_time[6] / 5)) + best_time = mean_time[6] / 5; + do { + tmp_kernel_Dim = neighborhoodSize / NB_THREAD[i] + + ((neighborhoodSize % NB_THREAD[i] == 0) ? 0 : 1); + for (unsigned k = 0; k < 5; k++) { + cudaMemcpy(device_solution.vect, _sol.vect, size * sizeof(T), + cudaMemcpyHostToDevice); + moGPUTimer timer; + timer.start(); + moGPUMappingKernelEvalByModif<<>>(eval,device_solution.vect,device_FitnessArray,fitness,_mapping,neighborhoodSize); + timer.stop(); + mean_time[i] += (timer.getTime()); + } + if (best_time >= (mean_time[i] / 5)) { + best_time = mean_time[i] / 5; + NEW_BLOCK_SIZE = NB_THREAD[i]; + NEW_kernel_Dim = tmp_kernel_Dim; + } + i++; + }while (i < 6); + +#endif + + } + +protected: + + Eval & eval; + +}; + +#endif diff --git a/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingKernelEvalByCpy.h b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingKernelEvalByCpy.h new file mode 100644 index 000000000..7d204c337 --- /dev/null +++ b/branches/ParadisEO-GPU/paradiseo-gpu/src/eval/moGPUMappingKernelEvalByCpy.h @@ -0,0 +1,76 @@ +/* + + Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2010 + + Karima Boufaras, Thé Van LUONG + + This software is governed by the CeCILL license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited liability. + + In this respect, the user's attention is drawn to the risks associated + with loading, using, modifying and/or developing or reproducing the + software by the user in light of its specific status of free software, + that may mean that it is complicated to manipulate, and that also + therefore means that it is reserved for developers and experienced + professionals having in-depth computer knowledge. Users are therefore + encouraged to load and test the software's suitability as regards their + requirements in conditions enabling the security of their systems and/or + data to be ensured and, more generally, to use and operate it in the + same conditions as regards security. + The fact that you are presently reading this means that you have had + knowledge of the CeCILL license and that you accept its terms. + + ParadisEO WebSite : http://paradiseo.gforge.inria.fr + Contact: paradiseo-help@lists.gforge.inria.fr + */ + +#ifndef __moGPUMappingKernelEvalByCpy_H +#define __moGPUMappingKernelEvalByCpy_H +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * The kernel function called from the host and executed in device to compute all neighbors fitness at one time + * without mapping, each thread id compute one fitness by modif of solution + * @param _eval how to evaluate each neighbor + * @param _solution the representation of solution( vector of int,float....) + * @param _allFitness the array of Fitness to save all neighbors fitness + * @param _fitness the current solution fitness + * @param _mapping associate to each threadID a set of correspondent indexes + * @param _neighborhoodsize the size of the neighborhood + */ + +template + +__global__ void moGPUMappingKernelEvalByCpy(Eval _eval, T * _solution, Fitness* _allFitness, + Fitness _fitness,unsigned * _mapping,unsigned _neighborhoodsize) { + + // The thread identifier within a grid block's + int id = blockIdx.x * blockDim.x + threadIdx.x; + //counter of number of x-change + unsigned i; + // array to save set a set of indexes corresponding to the current thread identifier + unsigned index[NB_POS+2]; + T sol_tmp[SIZE]; + // In this representation each id identify one and only one neighbor in neighborhood + if (id < _neighborhoodsize) { + for(i=0;i + Copyright (C) DOLPHIN Project-Team, INRIA Lille - Nord Europe, 2006-2010 + + Karima Boufaras, Thé Van LUONG + + This software is governed by the CeCILL license under French law and + abiding by the rules of distribution of free software. You can use, + modify and/ or redistribute the software under the terms of the CeCILL + license as circulated by CEA, CNRS and INRIA at the following URL + "http://www.cecill.info". + + As a counterpart to the access to the source code and rights to copy, + modify and redistribute granted by the license, users are provided only + with a limited warranty and the software's author, the holder of the + economic rights, and the successive licensors have only limited liability. + + In this respect, the user's attention is drawn to the risks associated + with loading, using, modifying and/or developing or reproducing the + software by the user in light of its specific status of free software, + that may mean that it is complicated to manipulate, and that also + therefore means that it is reserved for developers and experienced + professionals having in-depth computer knowledge. Users are therefore + encouraged to load and test the software's suitability as regards their + requirements in conditions enabling the security of their systems and/or + data to be ensured and, more generally, to use and operate it in the + same conditions as regards security. + The fact that you are presently reading this means that you have had + knowledge of the CeCILL license and that you accept its terms. + + ParadisEO WebSite : http://paradiseo.gforge.inria.fr + Contact: paradiseo-help@lists.gforge.inria.fr + */ + +#ifndef __moGPUMappingKernelEvalByModif_H +#define __moGPUMappingKernelEvalByModif_H +/////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * The kernel function called from the host and executed in device to compute all neighbors fitness at one time + * without mapping, each thread id compute one fitness by modif of solution + * @param _eval how to evaluate each neighbor + * @param _solution the representation of solution( vector of int,float....) + * @param _allFitness the array of Fitness to save all neighbors fitness + * @param _fitness the current solution fitness + * @param _mapping associate to each threadID a set of correspondent indexes + * @param _neighborhoodsize the size of the neighborhood + */ + +template + +__global__ void moGPUMappingKernelEvalByModif(Eval _eval, T * _solution, Fitness* _allFitness, + Fitness _fitness,unsigned int * _mapping,unsigned _neighborhoodsize) { + + // The thread identifier within a grid block's + int id = blockIdx.x * blockDim.x + threadIdx.x; + //counter of number of x-change + unsigned i; + // array to save set a set of indexes corresponding to the current thread identifier + unsigned index[NB_POS+2]; + // In this representation each id identify one and only one neighbor in neighborhood + if (id < _neighborhoodsize) { + for(i=0;i