Commit aa12183c authored by breuera's avatar breuera

Added the possibility to set CUDA device ids.

Added a printing routine for CUDA device information.
parent 3b60cab1
......@@ -25,11 +25,18 @@
* TODO
*/
#include <math.h>
#include <cassert>
#include <cmath>
#include "tools/help.hh"
#include "SWE_BlockCUDA.hh"
#include "SWE_BlockCUDA_kernels.hh"
#ifndef STATICLOGGER
#define STATICLOGGER
#include "tools/Logger.hpp"
static tools::Logger s_sweLogger;
#endif
//const int TILE_SIZE=16;
//const int TILE_SIZE=8;
......@@ -73,10 +80,27 @@ void tryCUDA(cudaError_t err, const char *msg)
* cells with index (i,j) and (i+1,j) or (i,j+1)
*
* bathymetry source terms are defined for cells with indices [1,..,nx]*[1,..,ny]
*
*
* @param _offsetX offset in x-direction.
* @param _offsetY offset in y-direction.
* @param i_cudaDevice ID of the CUDA-device, which should be used.
*/
SWE_BlockCUDA::SWE_BlockCUDA(float _offsetX, float _offsetY)
SWE_BlockCUDA::SWE_BlockCUDA(float _offsetX, float _offsetY, const int i_cudaDevice )
: SWE_Block(_offsetX,_offsetY)
{
s_sweLogger.setProcessRank(i_cudaDevice);
cudaSetDevice(i_cudaDevice);
// check for a valid CUDA device id
#ifndef NDEBUG
int l_deviceCount;
cudaGetDeviceCount(&l_deviceCount);
assert( (i_cudaDevice >= 0) && (i_cudaDevice < l_deviceCount) );
#endif
printDeviceInformation();
if (nx % TILE_SIZE != 0) {
cout << "WARNING: nx not a multiple of TILE_SIZE -> will lead to crashes!"
......@@ -433,6 +457,41 @@ SWE_Block1D* SWE_BlockCUDA::grabGhostLayer(BoundaryEdge edge){
return NULL;
}
/**
* Print some available information about the CUDA devices.
*/
void SWE_BlockCUDA::printDeviceInformation() const {
s_sweLogger.printString("Printing device information");
//! id of the CUDA device.
int l_deviceId;
cudaGetDevice(&l_deviceId);
//! total number of CUDA devices on this host.
int l_deviceCount;
cudaGetDeviceCount(&l_deviceCount);
//! drive and runtime version
int l_driverVersion, l_runtimeVersion;
cudaDriverGetVersion(&l_driverVersion);
cudaRuntimeGetVersion(&l_runtimeVersion);
//! device properties
cudaDeviceProp l_deviceProperty;
cudaGetDeviceProperties(&l_deviceProperty, l_deviceId);
// print information about the current device
s_sweLogger.cout() << "Current CUDA device (relative to host): " << l_deviceId
<< " ( " << l_deviceCount << " in total)" << std::endl;
s_sweLogger.cout() << "CUDA device properties: "
<< l_deviceProperty.name << " (name), "
<< l_driverVersion << "/" << l_runtimeVersion << " (driver/runtime version), "
<< l_deviceProperty.major << "." << l_deviceProperty.minor << " (compute capability)"
<< std::endl;
}
//==================================================================
......
......@@ -53,7 +53,7 @@ class SWE_BlockCUDA : public SWE_Block {
public:
// Constructor und Destructor
SWE_BlockCUDA(float _offsetX = 0, float _offsetY = 0);
SWE_BlockCUDA(float _offsetX = 0, float _offsetY = 0, const int i_cudaDevice = 0);
virtual ~SWE_BlockCUDA();
// object methods
......@@ -62,7 +62,7 @@ class SWE_BlockCUDA : public SWE_Block {
// // determine maximum possible time step
// virtual float getMaxTimestep();
// deliver a pointer to proxy class that represents
// deliver a pointer to proxy class that represents
// the layer that is copied to an external ghost layer
virtual SWE_Block1D* registerCopyLayer(BoundaryEdge edge);
// "grab" the ghost layer in order to set these values externally
......@@ -78,6 +78,9 @@ class SWE_BlockCUDA : public SWE_Block {
*/
const float* getCUDA_bathymetry() { return bd; };
// print information about the CUDA device
void printDeviceInformation() const;
protected:
// synchronisation Methods
......
......@@ -50,8 +50,8 @@
*
* bathymetry source terms are defined for cells with indices [1,..,nx]*[1,..,ny]
*/
SWE_RusanovBlockCUDA::SWE_RusanovBlockCUDA(float _offsetX, float _offsetY)
: SWE_BlockCUDA(_offsetX,_offsetY)
SWE_RusanovBlockCUDA::SWE_RusanovBlockCUDA(float _offsetX, float _offsetY, const int i_cudaDevice)
: SWE_BlockCUDA(_offsetX,_offsetY, i_cudaDevice)
#ifdef DBG
, Fh(nx+1,ny+1), Fhu(nx+1,ny+1), Fhv(nx+1,ny+1),
Gh(nx+1,ny+1), Ghu(nx+1,ny+1), Ghv(nx+1,ny+1)
......
......@@ -48,7 +48,7 @@ class SWE_RusanovBlockCUDA : public SWE_BlockCUDA {
public:
// Constructor und Destructor
SWE_RusanovBlockCUDA(float _offsetX = 0, float _offsetY = 0);
SWE_RusanovBlockCUDA(float _offsetX = 0, float _offsetY = 0, const int i_cudaDevice = 0);
virtual ~SWE_RusanovBlockCUDA();
// object methods
......
......@@ -92,9 +92,11 @@ static tools::Logger s_sweLogger;
*
* @param i_offsetX spatial offset of the block in x-direction.
* @param i_offsetY spatial offset of the offset in y-direction.
* @param i_cudaDevice ID of the CUDA-device, which should be used.
*/
SWE_WavePropagationBlockCuda::SWE_WavePropagationBlockCuda( const float i_offsetX,
const float i_offsetY ): SWE_BlockCUDA(i_offsetX,i_offsetY) {
const float i_offsetY,
const int i_cudaDevice ): SWE_BlockCUDA(i_offsetX, i_offsetY, i_cudaDevice) {
// compute the size of one 1D net-update array.
int sizeOfNetUpdates = (nx+1)*(ny+1)*sizeof(float);
......@@ -334,4 +336,9 @@ void SWE_WavePropagationBlockCuda::updateUnknowns(const float i_deltaT) {
hd, hud, hvd,
l_updateWidthX, l_updateWidthY,
nx, ny);
// synchronize the copy layer for MPI communication
#ifdef USEMPI
synchCopyLayerBeforeRead();
#endif
}
......@@ -65,7 +65,8 @@ class SWE_WavePropagationBlockCuda: public SWE_BlockCUDA {
public:
// constructor of SWE_WavePropagationBlockCuda
SWE_WavePropagationBlockCuda( const float i_offsetX = 0,
const float i_offsetY = 0 );
const float i_offsetY = 0,
const int i_cudaDevice = 0 );
// destructor of SWE_WavePropagationBlockCuda
~SWE_WavePropagationBlockCuda();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment