Added the possibility to set CUDA device ids.

Added a printing routine for CUDA device information.

Added the possibility to set CUDA device ids.
Added a printing routine for CUDA device information.
aa12183c · breuera · 3b60cab1 · aa12183c · aa12183c · aa12183c
Commit aa12183c authored Jul 04, 2012 by breuera
6 changed files
--- a/src/SWE_BlockCUDA.cu
+++ b/src/SWE_BlockCUDA.cu
@@ -25,11 +25,18 @@
 * TODO
 */

-#include <math.h>
+#include <cassert>
+#include <cmath>
 #include "tools/help.hh"
 #include "SWE_BlockCUDA.hh"
 #include "SWE_BlockCUDA_kernels.hh"

+#ifndef STATICLOGGER
+#define STATICLOGGER
+#include "tools/Logger.hpp"
+static tools::Logger s_sweLogger;
+#endif
+
 //const int TILE_SIZE=16;
 //const int TILE_SIZE=8;

@@ -73,10 +80,27 @@ void tryCUDA(cudaError_t err, const char *msg)
 * cells with index (i,j) and (i+1,j) or (i,j+1)
 *
 * bathymetry source terms are defined for cells with indices [1,..,nx]*[1,..,ny]
+ *
+ *
+ * @param _offsetX offset in x-direction.
+ * @param _offsetY offset in y-direction.
+ * @param i_cudaDevice ID of the CUDA-device, which should be used.
 */
-SWE_BlockCUDA::SWE_BlockCUDA(float _offsetX, float _offsetY)
+SWE_BlockCUDA::SWE_BlockCUDA(float _offsetX, float _offsetY, const int i_cudaDevice )
 : SWE_Block(_offsetX,_offsetY)
 {
+  s_sweLogger.setProcessRank(i_cudaDevice);
+
+  cudaSetDevice(i_cudaDevice);
+
+  // check for a valid CUDA device id
+  #ifndef NDEBUG
+  int l_deviceCount;
+  cudaGetDeviceCount(&l_deviceCount);
+  assert( (i_cudaDevice >= 0) && (i_cudaDevice < l_deviceCount) );
+  #endif
+
+  printDeviceInformation();

  if (nx % TILE_SIZE != 0) {
    cout << "WARNING: nx not a multiple of TILE_SIZE  -> will lead to crashes!" 
@@ -433,6 +457,41 @@ SWE_Block1D* SWE_BlockCUDA::grabGhostLayer(BoundaryEdge edge){
  return NULL;
 }

+/**
+ * Print some available information about the CUDA devices.
+ */
+void SWE_BlockCUDA::printDeviceInformation() const {
+  s_sweLogger.printString("Printing device information");
+
+  //! id of the CUDA device.
+  int l_deviceId;
+  cudaGetDevice(&l_deviceId);
+
+  //! total number of CUDA devices on this host.
+  int l_deviceCount;
+  cudaGetDeviceCount(&l_deviceCount);
+
+  //! drive and runtime version
+  int l_driverVersion, l_runtimeVersion;
+  cudaDriverGetVersion(&l_driverVersion);
+  cudaRuntimeGetVersion(&l_runtimeVersion);
+
+  //! device properties
+  cudaDeviceProp l_deviceProperty;
+  cudaGetDeviceProperties(&l_deviceProperty, l_deviceId);
+
+  // print information about the current device
+
+  s_sweLogger.cout() << "Current CUDA device (relative to host): " << l_deviceId
+                     << " ( " << l_deviceCount << " in total)" << std::endl;
+
+  s_sweLogger.cout() << "CUDA device properties: "
+                     << l_deviceProperty.name << " (name), "
+                     << l_driverVersion << "/" << l_runtimeVersion << " (driver/runtime version), "
+                     << l_deviceProperty.major << "." << l_deviceProperty.minor << " (compute capability)"
+                     << std::endl;
+}
+


 //==================================================================

--- a/src/SWE_BlockCUDA.hh
+++ b/src/SWE_BlockCUDA.hh
@@ -53,7 +53,7 @@ class SWE_BlockCUDA : public SWE_Block {

  public:
    // Constructor und Destructor
-    SWE_BlockCUDA(float _offsetX = 0, float _offsetY = 0);
+    SWE_BlockCUDA(float _offsetX = 0, float _offsetY = 0, const int i_cudaDevice = 0);
    virtual ~SWE_BlockCUDA();
    
  // object methods
@@ -62,7 +62,7 @@ class SWE_BlockCUDA : public SWE_Block {
 //     // determine maximum possible time step
 //     virtual float getMaxTimestep();

-    // deliver a pointer to proxy class that represents 
+    // deliver a pointer to proxy class that represents
    // the layer that is copied to an external ghost layer 
    virtual SWE_Block1D* registerCopyLayer(BoundaryEdge edge);
    // "grab" the ghost layer in order to set these values externally
@@ -78,6 +78,9 @@ class SWE_BlockCUDA : public SWE_Block {
     */
    const float* getCUDA_bathymetry() { return bd; };

+    // print information about the CUDA device
+    void printDeviceInformation() const;
+
  protected:
     
    // synchronisation Methods

--- a/src/SWE_RusanovBlockCUDA.cu
+++ b/src/SWE_RusanovBlockCUDA.cu
@@ -50,8 +50,8 @@
 *
 * bathymetry source terms are defined for cells with indices [1,..,nx]*[1,..,ny]
 */
-SWE_RusanovBlockCUDA::SWE_RusanovBlockCUDA(float _offsetX, float _offsetY)
- : SWE_BlockCUDA(_offsetX,_offsetY)
+SWE_RusanovBlockCUDA::SWE_RusanovBlockCUDA(float _offsetX, float _offsetY, const int i_cudaDevice)
+ : SWE_BlockCUDA(_offsetX,_offsetY, i_cudaDevice)
 #ifdef DBG
 , Fh(nx+1,ny+1), Fhu(nx+1,ny+1), Fhv(nx+1,ny+1),
   Gh(nx+1,ny+1), Ghu(nx+1,ny+1), Ghv(nx+1,ny+1)

--- a/src/SWE_RusanovBlockCUDA.hh
+++ b/src/SWE_RusanovBlockCUDA.hh
@@ -48,7 +48,7 @@ class SWE_RusanovBlockCUDA : public SWE_BlockCUDA {

  public:
    // Constructor und Destructor
-    SWE_RusanovBlockCUDA(float _offsetX = 0, float _offsetY = 0);
+    SWE_RusanovBlockCUDA(float _offsetX = 0, float _offsetY = 0, const int i_cudaDevice = 0);
    virtual ~SWE_RusanovBlockCUDA();
    
  // object methods

--- a/src/SWE_WavePropagationBlockCuda.cu
+++ b/src/SWE_WavePropagationBlockCuda.cu
@@ -92,9 +92,11 @@ static tools::Logger s_sweLogger;
 *
 * @param i_offsetX spatial offset of the block in x-direction.
 * @param i_offsetY spatial offset of the offset in y-direction.
+ * @param i_cudaDevice ID of the CUDA-device, which should be used.
 */
 SWE_WavePropagationBlockCuda::SWE_WavePropagationBlockCuda( const float i_offsetX,
-                                                            const float i_offsetY ): SWE_BlockCUDA(i_offsetX,i_offsetY) {
+                                                            const float i_offsetY,
+                                                            const int i_cudaDevice ): SWE_BlockCUDA(i_offsetX, i_offsetY, i_cudaDevice) {
  // compute the size of one 1D net-update array.
  int sizeOfNetUpdates = (nx+1)*(ny+1)*sizeof(float);

@@ -334,4 +336,9 @@ void SWE_WavePropagationBlockCuda::updateUnknowns(const float i_deltaT) {
                                              hd, hud, hvd,
                                              l_updateWidthX, l_updateWidthY,
                                              nx, ny);
+
+  // synchronize the copy layer for MPI communication
+  #ifdef USEMPI
+  synchCopyLayerBeforeRead();
+  #endif
 }
--- a/src/SWE_WavePropagationBlockCuda.hh
+++ b/src/SWE_WavePropagationBlockCuda.hh
@@ -65,7 +65,8 @@ class SWE_WavePropagationBlockCuda: public SWE_BlockCUDA {
  public:
    // constructor of SWE_WavePropagationBlockCuda
    SWE_WavePropagationBlockCuda( const float i_offsetX = 0,
-                                  const float i_offsetY = 0 );
+                                  const float i_offsetY = 0,
+                                  const int i_cudaDevice = 0 );

    // destructor of SWE_WavePropagationBlockCuda
    ~SWE_WavePropagationBlockCuda();