Commit 92ed477e authored by breuera's avatar breuera

Added the CUDA-files. Changed a few routines accordingly.

parent f935a3b2
This diff is collapsed.
/**
* @file
* This file is part of SWE.
*
* @author Michael Bader, Kaveh Rahnema, Tobias Schnabel
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* TODO
*/
#ifndef __SWE_BLOCKCUDA_HH
#define __SWE_BLOCKCUDA_HH
#include <iostream>
#include <stdio.h>
#include <fstream>
#include <cuda_runtime.h>
#include "tools/help.hh"
#include "SWE_Block.hh"
using namespace std;
void checkCUDAError(const char *msg);
void tryCUDA(cudaError_t err, const char *msg);
const int TILE_SIZE=16;
//const int TILE_SIZE=8;
/**
* SWE_BlockCUDA extends the base class SWE_Block towards
* a base class for a CUDA implementation of the shallow water equations.
* It adds the respective variables in GPU memory, and provides
* methods for data transfer between main and GPU memory.
*/
class SWE_BlockCUDA : public SWE_Block {
public:
// Constructor und Destructor
SWE_BlockCUDA(float _offsetX = 0, float _offsetY = 0);
virtual ~SWE_BlockCUDA();
// object methods
// ---> COULD BE IMPLEMENTED TO PROVIDE A DEFAULT IMPLEMENTATION
// // determine maximum possible time step
// virtual float getMaxTimestep();
// deliver a pointer to proxy class that represents
// the layer that is copied to an external ghost layer
virtual SWE_Block1D* registerCopyLayer(BoundaryEdge edge);
// "grab" the ghost layer in order to set these values externally
virtual SWE_Block1D* grabGhostLayer(BoundaryEdge edge);
// access to CUDA variables
/**
* @return pointer to the array #hd (water height) in device memory
*/
const float* getCUDA_waterHeight() { return hd; };
/**
* @return pointer to the array #hb (bathymetry) in device memory
*/
const float* getCUDA_bathymetry() { return bd; };
protected:
// synchronisation Methods
virtual void synchAfterWrite();
virtual void synchWaterHeightAfterWrite();
virtual void synchDischargeAfterWrite();
virtual void synchBathymetryAfterWrite();
virtual void synchGhostLayerAfterWrite();
virtual void synchBeforeRead();
virtual void synchWaterHeightBeforeRead();
virtual void synchDischargeBeforeRead();
virtual void synchBathymetryBeforeRead();
virtual void synchCopyLayerBeforeRead();
// set boundary conditions in ghost layers (set boundary conditions)
virtual void setBoundaryConditions();
// define arrays for main unknowns in CUDA global memory:
// hd, hud, hvd, and bd are CUDA arrays corresp. to h, hu, hv, and b
float* hd;
float* hud;
float* hvd;
float* bd;
private:
// separate memory to hold bottom and top ghost and copy layer
// in main memory allowing non-strided access
float* bottomLayer;
float* topLayer;
SWE_Block1D* bottomGhostLayer;
SWE_Block1D* bottomCopyLayer;
SWE_Block1D* topGhostLayer;
SWE_Block1D* topCopyLayer;
// and resp. memory on the CUDA device:
float* bottomLayerDevice;
float* topLayerDevice;
// helper arrays: store maximum height and velocities to determine time step
float* maxhd;
float* maxvd;
// overload operator<< such that data can be written via cout <<
// -> needs to be declared as friend to be allowed to access private data
friend ostream& operator<< (ostream& os, const SWE_BlockCUDA& swe);
};
ostream& operator<< (ostream& os, const SWE_BlockCUDA& swe);
/**
Return index of hd[i][j] in linearised array
@param i,j x- and y-coordinate of grid cell
@param ny grid size in y-direction (without ghost layers)
*/
inline __device__
int getCellCoord(int x, int y, int ny) {
return x*(ny+2) + y;
}
/**
Return index of edge-data Fhd[i][j] or Ghd[i][j] in linearised array
@param i,j x- and y-coordinate of grid cell
@param ny grid size in y-direction (without ghost layers)
*/
inline __device__
int getEdgeCoord(int x, int y, int ny) {
return x*(ny+1) + y;
}
/**
Return index of a specific element in the arrays of bathymetry source terms
@param i,j x- and y-coordinate of grid cell
@param ny grid size in y-direction (without ghost layers)
*/
inline __device__
int getBathyCoord(int x, int y, int ny) {
return x*ny + y;
}
#endif
/**
* @file
* This file is part of SWE.
*
* @author Michael Bader (bader AT in.tum.de, http://www5.in.tum.de/wiki/index.php/Univ.-Prof._Dr._Michael_Bader)
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* TODO
*/
#include "SWE_BlockCUDA.hh"
#include "SWE_BlockCUDA_kernels.hh"
/**
Sets corner values of hd (only needed for visualization)
@param hd h-values on device
*/
__global__
void kernelHdBufferEdges(float* hd, int nx, int ny)
{
hd[getCellCoord(0 ,0 ,ny)] = hd[getCellCoord(1 ,1 ,ny)];
hd[getCellCoord(0 ,ny+1,ny)] = hd[getCellCoord(1 ,ny,ny)];
hd[getCellCoord(nx+1,0 ,ny)] = hd[getCellCoord(nx,1 ,ny)];
hd[getCellCoord(nx+1,ny+1,ny)] = hd[getCellCoord(nx,ny,ny)];
//Corresponding C-Code:
//h[0][0] = h[1][1];
//h[0][ny+1] = h[1][ny];
//h[nx+1][0] = h[nx][1];
//h[nx+1][ny+1] = h[nx][ny];
}
//******************************************************************
// kernels to implement boundary conditions
//******************************************************************
/**
* CUDA kernel to set left boundary layer for conditions WALL & OUTFLOW
* blockIdx.y and threadIdx.y loop over the boundary elements
* SWE_Block size ny is assumed to be a multiple of the TILE_SIZE
*/
__global__
void kernelLeftBoundary(float* hd, float* hud, float* hvd,
int nx, int ny, BoundaryType bound)
{
int j = 1 + TILE_SIZE*blockIdx.y + threadIdx.y;
int ghost = getCellCoord(0,j,ny);
int inner = getCellCoord(1,j,ny);
// consider only WALL & OUTFLOW boundary conditions
hd[ghost] = hd[inner];
hud[ghost] = (bound==WALL) ? -hud[inner] : hud[inner];
hvd[ghost] = hvd[inner];
}
/**
* CUDA kernel to set right boundary layer for conditions WALL & OUTFLOW
* blockIdx.y and threadIdx.y loop over the boundary elements
* SWE_Block size ny is assumed to be a multiple of the TILE_SIZE
*/
__global__
void kernelRightBoundary(float* hd, float* hud, float* hvd,
int nx, int ny, BoundaryType bound)
{
int j = 1 + TILE_SIZE*blockIdx.y + threadIdx.y;
int ghost = getCellCoord(nx+1,j,ny);
int inner = getCellCoord(nx ,j,ny);
// consider only WALL & OUTFLOW boundary conditions
hd[ghost] = hd[inner];
hud[ghost] = (bound==WALL) ? -hud[inner] : hud[inner];
hvd[ghost] = hvd[inner];
}
/**
* CUDA kernel to set bottom boundary layer for conditions WALL & OUTFLOW
* blockIdx.x and threadIdx.x loop over the boundary elements
* SWE_Block size ny is assumed to be a multiple of the TILE_SIZE
*/
__global__
void kernelBottomBoundary(float* hd, float* hud, float* hvd,
int nx, int ny, BoundaryType bound)
{
int i = 1 + TILE_SIZE*blockIdx.x + threadIdx.x;
int ghost = getCellCoord(i,0,ny);
int inner = getCellCoord(i,1,ny);
// consider only WALL & OUTFLOW boundary conditions
hd[ghost] = hd[inner];
hud[ghost] = hud[inner];
hvd[ghost] = (bound==WALL) ? -hvd[inner] : hvd[inner];
}
/**
* CUDA kernel to set bottom boundary layer for conditions WALL & OUTFLOW
* blockIdx.x and threadIdx.x loop over the boundary elements
*/
__global__
void kernelTopBoundary(float* hd, float* hud, float* hvd,
int nx, int ny, BoundaryType bound)
{
int i = 1 + TILE_SIZE*blockIdx.x + threadIdx.x;
int ghost = getCellCoord(i,ny+1,ny);
int inner = getCellCoord(i,ny ,ny);
// consider only WALL & OUTFLOW boundary conditions
hd[ghost] = hd[inner];
hud[ghost] = hud[inner];
hvd[ghost] = (bound==WALL) ? -hvd[inner] : hvd[inner];
}
/**
* CUDA kernel to set bottom boundary layer according to the external
* ghost layer status (conditions PASSIVE and CONNECT)
* blockIdx.x and threadIdx.x loop over the boundary elements.
* Note that diagonal elements are currently not copied!
* SWE_Block size ny is assumed to be a multiple of the TILE_SIZE
*/
__global__
void kernelBottomGhostBoundary(float* hd, float* hud, float* hvd,
float* bottomGhostLayer, int nx, int ny)
{
int i = 1 + TILE_SIZE*blockIdx.x + threadIdx.x;
int ghost = getCellCoord(i,0,ny);
hd[ghost] = bottomGhostLayer[i];
hud[ghost] = bottomGhostLayer[(nx+2)+i];
hvd[ghost] = bottomGhostLayer[2*(nx+2)+i];
}
/**
* CUDA kernel to set top boundary layer according to the external
* ghost layer status (conditions PASSIVE and CONNECT)
* blockIdx.x and threadIdx.x loop over the boundary elements
* Note that diagonal elements are currently not copied!
* SWE_Block size ny is assumed to be a multiple of the TILE_SIZE
*/
__global__
void kernelTopGhostBoundary(float* hd, float* hud, float* hvd,
float* topGhostLayer, int nx, int ny)
{
int i = 1 + TILE_SIZE*blockIdx.x + threadIdx.x;
int ghost = getCellCoord(i,ny+1,ny);
hd[ghost] = topGhostLayer[i];
hud[ghost] = topGhostLayer[(nx+2)+i];
hvd[ghost] = topGhostLayer[2*(nx+2)+i];
}
/**
* CUDA kernel to update bottom copy layer according
* (for boundary conditions PASSIVE and CONNECT)
* blockIdx.x and threadIdx.x loop over the boundary elements.
* Note that diagonal elements are currently not copied!
* SWE_Block size ny is assumed to be a multiple of the TILE_SIZE
*/
__global__
void kernelBottomCopyLayer(float* hd, float* hud, float* hvd,
float* bottomCopyLayer, int nx, int ny)
{
int i = 1 + TILE_SIZE*blockIdx.x + threadIdx.x;
int copy = getCellCoord(i,1,ny);
bottomCopyLayer[i] = hd[copy];
bottomCopyLayer[(nx+2)+i] = hud[copy];
bottomCopyLayer[2*(nx+2)+i] = hvd[copy];
}
/**
* CUDA kernel to set top boundary layer according to the external
* ghost layer status (conditions PASSIVE and CONNECT)
* blockIdx.x and threadIdx.x loop over the boundary elements
* Note that diagonal elements are currently not copied!
* SWE_Block size ny is assumed to be a multiple of the TILE_SIZE
*/
__global__
void kernelTopCopyLayer(float* hd, float* hud, float* hvd,
float* topCopyLayer, int nx, int ny)
{
int i = 1 + TILE_SIZE*blockIdx.x + threadIdx.x;
int copy = getCellCoord(i,ny,ny);
topCopyLayer[i] = hd[copy];
topCopyLayer[(nx+2)+i] = hud[copy];
topCopyLayer[2*(nx+2)+i] = hvd[copy];
}
// //******************************************************************
// // kernels to implement boundary conditions
// //******************************************************************
//
//
// /**
// * CUDA kernel for maximum reduction
// * required to compute maximum water height and velocities to determine
// * allow time step
// */
// __global__
// void kernelMaximum(float* maxhd, float* maxvd, int start, int size) {
// int tx = start+threadIdx.x;
// for (int i=size>>1; i>0; i>>=1) {
// __syncthreads();
// if (tx < i) {
// if( maxhd[tx] < maxhd[tx+i] ) maxhd[tx] = maxhd[tx+i];
// if( maxvd[tx] < maxvd[tx+i] ) maxvd[tx] = maxvd[tx+i];
// };
// };
// }
//
//
/**
* @file
* This file is part of SWE.
*
* @author Michael Bader (bader AT in.tum.de, http://www5.in.tum.de/wiki/index.php/Univ.-Prof._Dr._Michael_Bader)
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* TODO
*/
#ifndef __SWE_BLOCKCUDAKERNELS_HH
#define __SWE_BLOCKCUDAKERNELS_HH
// declaration of CUDA kernels
__global__
void kernelHdBufferEdges(float* hd, int nx, int ny);
__global__
void kernelMaximum(float* maxhd, float* maxvd, int start, int size);
__global__
void kernelLeftBoundary(float* hd, float* hud, float* hvd,
int nx, int ny, BoundaryType bound);
__global__
void kernelRightBoundary(float* hd, float* hud, float* hvd,
int nx, int ny, BoundaryType bound);
__global__
void kernelBottomBoundary(float* hd, float* hud, float* hvd,
int nx, int ny, BoundaryType bound);
__global__
void kernelTopBoundary(float* hd, float* hud, float* hvd,
int nx, int ny, BoundaryType bound);
__global__
void kernelBottomGhostBoundary(float* hd, float* hud, float* hvd,
float* bottomGhostLayer, int nx, int ny);
__global__
void kernelTopGhostBoundary(float* hd, float* hud, float* hvd,
float* topGhostLayer, int nx, int ny);
__global__
void kernelBottomCopyLayer(float* hd, float* hud, float* hvd,
float* bottomCopyLayer, int nx, int ny);
__global__
void kernelTopCopyLayer(float* hd, float* hud, float* hvd,
float* topCopyLayer, int nx, int ny);
#endif
This diff is collapsed.
/**
* @file
* This file is part of SWE.
*
* @author Michael Bader, Kaveh Rahnema, Tobias Schnabel
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* TODO
*/
#ifndef __SWE_RUSANOVBLOCKCUDA_HH
#define __SWE_RUSANOVBLOCKCUDA_HH
#include <iostream>
#include <stdio.h>
#include <fstream>
#include <cuda_runtime.h>
#include "tools/help.hh"
#include "SWE_Block.hh"
#include "SWE_BlockCUDA.hh"
using namespace std;
/**
* SWE_RusanovBlockCUDA extends the base class SWE_BlockCUDA,
* and provides a concrete CUDA implementation of a simple
* shallow water model based on Rusanov Flux computation on the
* edges and explicit time stepping.
*/
class SWE_RusanovBlockCUDA : public SWE_BlockCUDA {
public:
// Constructor und Destructor
SWE_RusanovBlockCUDA(float _offsetX = 0, float _offsetY = 0);
virtual ~SWE_RusanovBlockCUDA();
// object methods
virtual void computeNumericalFluxes();
// simulate for specified time range
// execute Euler time step
virtual void updateUnknowns(float dt);
/// execute a single time step of the simulation
virtual void simulateTimestep(float dt);
// compute flux terms on edges
virtual float simulate(float tStart, float tEnd);
private:
// compute bathymetry source terms
void computeBathymetrySources();
// determine maximum possible time step
void computeMaxTimestepCUDA();
// arrays to hold the values of the flux terms at cell edges
float* Fhd;
float* Fhud;
float* Fhvd;
float* Ghd;
float* Ghud;
float* Ghvd;
// arrays to hold the bathymetry source terms for the hu and hv equations
float* Bxd;
float* Byd;
// helper arrays: store maximum height and velocities to determine time step
float* maxhd;
float* maxvd;
// overload operator<< such that data can be written via cout <<
// -> needs to be declared as friend to be allowed to access private data
friend ostream& operator<< (ostream& os, const SWE_RusanovBlockCUDA& swe);
#ifdef DBG
// --- only required for debugging purposes ---
// arrays for fluxes for h,hu,hv in main memory
Float2D Fh;
Float2D Fhu;
Float2D Fhv;
Float2D Gh;
Float2D Ghu;
Float2D Ghv;
// dump fluxes for h,hu,hv from CUDA device memory into main memory
void cudaDumpFlux();
#endif
};
ostream& operator<< (ostream& os, const SWE_RusanovBlockCUDA& swe);
#endif
/**
* @file
* This file is part of SWE.
*
* @author Michael Bader (bader AT in.tum.de, http://www5.in.tum.de/wiki/index.php/Univ.-Prof._Dr._Michael_Bader)
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* TODO
*/
#include "SWE_BlockCUDA.hh"
#include "SWE_RusanovBlockCUDA_kernels.hh"
//******************************************************************
// kernels to implement Euler time-stepping
//******************************************************************
inline __device__
float computeFlux(float fLow, float fHigh, float xiLow, float xiHigh, float llf) {
// local Lax-Friedrich
return 0.5f*(fLow+fHigh) - 0.5f*llf*(xiHigh-xiLow);
}
/**
* computes the flux vector components Fhd, Fhud and Fhvd for a single
* edge by calling the function computeFlux
*/
__global__
void kernelComputeFluxesF(float* hd, float* hud, float* hvd,
float* Fhd, float* Fhud, float* Fhvd,
int ny, float g, float llf, int istart)
{
int i = istart + TILE_SIZE*blockIdx.x + threadIdx.x;
int j = 1 + TILE_SIZE*blockIdx.y + threadIdx.y;
int iL = getCellCoord(i,j,ny); // index of left cell
int iR = getCellCoord(i+1,j,ny); // index of right cell
int iEdge = getEdgeCoord(i,j,ny); // index of current edge
float upwind = max( fabs(hud[iL]/hd[iL]), fabs(hud[iR]/hd[iR]) );
Fhd[iEdge] = computeFlux( hud[iL], hud[iR], hd[iL], hd[iR], upwind );
Fhud[iEdge] = computeFlux( hud[iL]*hud[iL]/hd[iL] + 0.5f*g*hd[iL]*hd[iL],
hud[iR]*hud[iR]/hd[iR] + 0.5f*g*hd[iR]*hd[iR],
hud[iL],
hud[iR],
llf );
Fhvd[iEdge] = computeFlux( hud[iL]*hvd[iL]/hd[iL],hud[iR]*hvd[iR]/hd[iR],
hvd[iL], hvd[iR],
llf );
}
/**
* computes the flux vector components Ghd, Ghud and Ghvd for a single
* edge by calling the function computeFlux
*/
__global__
void kernelComputeFluxesG(float* hd, float* hud, float* hvd,
float* Ghd, float* Ghud, float* Ghvd,
int ny, float g, float llf, int jstart)
{
int i = 1 + TILE_SIZE*blockIdx.x + threadIdx.x;
int j = jstart + TILE_SIZE*blockIdx.y + threadIdx.y;
int iB = getCellCoord(i,j ,ny);
int iT = getCellCoord(i,j+1,ny);
int iEdge = getEdgeCoord(i,j,ny);
float upwind = max( fabs(hvd[iB]/hd[iB]), fabs(hvd[iT]/hd[iT]) );
Ghd[iEdge] = computeFlux( hvd[iB], hvd[iT], hd[iB], hd[iT], upwind );
Ghud[iEdge] = computeFlux( hud[iB]*hvd[iB]/hd[iB],hud[iT]*hvd[iT]/hd[iT],
hud[iB], hud[iT],
llf );
Ghvd[iEdge] = computeFlux( hvd[iB]*hvd[iB]/hd[iB] + 0.5f*g*hd[iB]*hd[iB],
hvd[iT]*hvd[iT]/hd[iT] + 0.5f*g*hd[iT]*hd[iT],
hvd[iB], hvd[iT],
llf );
}
/**
* computes the bathymetry source terms for the hu and hv equation for
* a given cell in the resp. array elements Bxd and Byd
*/
__global__
void kernelComputeBathymetrySources(float* hd, float* bd, float* Bxd, float* Byd,
int ny, float g)
{
// Note: different index ranges for h and b vs. Bxd, Byd:
// [0..nx+]x[0..ny+1] vs. [1..nx]x[1..ny]
// Note: indices for Bxd, Byd shifted to start with 0
int i = TILE_SIZE*blockIdx.x + threadIdx.x;
int j = TILE_SIZE*blockIdx.y + threadIdx.y;
// compute indices of involved array elements
int ij = getBathyCoord(i,j,ny);
int left = getCellCoord(i ,j+1,ny); // index of left cell (arrays hd,bd)
int right = getCellCoord(i+2,j+1,ny); // index of right cell (array hd,bb)
Bxd[ij] = g * 0.5f*(hd[right] + hd[left]) * 0.5f*(bd[right] - bd[left]);
int bot = getCellCoord(i+1,j,ny); // index of left cell (arrays hd,bd)
int top = getCellCoord(i+1,j+2,ny); // index of right cell (array hd,bb)
Byd[ij] = g * 0.5f*(hd[top] + hd[bot]) * 0.5f*(bd[top] - bd[bot]);
}
/**
* CUDA kernel for Euler time step
*/
__global__
void kernelEulerTimestep(float* hd, float* hud, float* hvd,
float* Fhd, float* Fhud, float* Fhvd,
float* Ghd, float* Ghud, float* Ghvd,
float* Bxd, float* Byd,
float* maxhd, float* maxvd,
int nx, int ny, float dt, float dxi, float dyi)
{
__shared__ float Fds[TILE_SIZE+1][TILE_SIZE+1];
__shared__ float Gds[TILE_SIZE+1][TILE_SIZE+1];
int tx = threadIdx.x;
int ty = threadIdx.y;
int i = 1 + TILE_SIZE*blockIdx.x + tx;
int j = 1 + TILE_SIZE*blockIdx.y + ty;
int iElem = getCellCoord(i,j,ny); // index of current cell
int iEdge = getEdgeCoord(i,j,ny); // index of right/top Edge
int iLeft = getEdgeCoord(i-1,j,ny); // index of left Edge
int iBot = getEdgeCoord(i,j-1,ny); // index of bottom Edge
float h;
float hu;
float hv;
// copy flux unknowns from global into local memory
// -> for fluxes corresponding to variable h
Fds[tx+1][ty] = Fhd[iEdge];
Gds[tx][ty+1] = Ghd[iEdge];
if (tx==0) Fds[tx][ty] = Fhd[iLeft];
if (ty==0) Gds[tx][ty] = Ghd[iBot];
__syncthreads();
// compute new value of h from fluxes
h = hd[iElem] - dt *( (Fds[tx+1][ty]-Fds[tx][ty])*dxi
+(Gds[tx][ty+1]-Gds[tx][ty])*dyi );
__syncthreads();
// copy flux unknowns from global into local memory
// -> for fluxes corresponding to variable hu
Fds[tx+1][ty] = Fhud[iEdge];
Gds[tx][ty+1] = Ghud[iEdge];
if (tx==0) Fds[tx][ty] = Fhud[iLeft];
if (ty==0) Gds[tx][ty] = Ghud[iBot];
__syncthreads();
// compute new value of hu from fluxes
hu = hud[iElem] - dt *( (Fds[tx+1][ty]-Fds[tx][ty])*dxi
+(Gds[tx][ty+1]-Gds[tx][ty])*dyi
+ Bxd[getBathyCoord(i-1,j-1,ny)]*dxi );
__syncthreads();
// copy flux unknowns from global into local memory
// -> for fluxes corresponding to variable hv
Fds[tx+1][ty] = Fhvd[iEdge];
Gds[tx][ty+1] = Ghvd[iEdge];
if (tx==0) Fds[tx][ty] = Fhvd[iLeft];
if (ty==0) Gds[tx][ty] = Ghvd[iBot];
__syncthreads();
// compute new value of hv from fluxes
hv = hvd[iElem] - dt *( (Fds[tx+1][ty]-Fds[tx][ty])*dxi
+(Gds[tx][ty+1]-Gds[tx][ty])*dyi
+ Byd[getBathyCoord(i-1,j-1,ny)]*dyi );
__syncthreads();
/* precompute maxmimal height and velocity per thread block
* (for computation of allowed time step size)
*/
// compute absolute values of h and absolute velocity
hd[iElem] = h; Fds[tx][ty] = h;
hud[iElem] = hu; hu = (h>0.0) ? fabs(hu/h) : 0.0;
hvd[iElem] = hv; hv = (h>0.0) ? fabs(hv/h) : 0.0;
Gds[tx][ty] = (hu>hv) ? hu : hv;
// parallel reduction on thread block:
// determine maximum wave height and velocity
// step 1: reduction in ty-direction
for (i=TILE_SIZE>>1; i>0; i>>=1) {
__syncthreads();
if (ty < i) {
if( Fds[tx][ty] < Fds[tx][ty+i]) Fds[tx][ty] = Fds[tx][ty+i];
if( Gds[tx][ty] < Gds[tx][ty+i]) Gds[tx][ty] = Gds[tx][ty+i];
};
};
// step 2: reduction in ty-direction
for (i=TILE_SIZE>>1; i>0; i>>=1) {
__syncthreads();
if ((tx < i) && (ty==0)) {
if( Fds[tx][ty] < Fds[tx+i][ty]) Fds[tx][ty] = Fds[tx+i][ty];
if( Gds[tx][ty] < Gds[tx+i][ty]) Gds[tx][ty] = Gds[tx+i][ty];
};
};
// save maxima in array maxhd and maxvd
if ((tx == 0) && (ty==0)) {
j = blockIdx.x*(nx/TILE_SIZE)+blockIdx.y;
maxhd[j] = Fds[0][0];
maxvd[j] = Gds[0][0];
};
}
//******************************************************************
// kernels to implement boundary conditions
//******************************************************************
/**
* CUDA kernel for maximum reduction
* required to compute maximum water height and velocities to determine
* allow time step
*/
__global__
void kernelMaximum(float* maxhd, float* maxvd, int start, int size) {
int tx = start+threadIdx.x;
for (int i=size>>1; i>0; i>>=1) {
__syncthreads();
if (tx < i) {
if( maxhd[tx] < maxhd[tx+i] ) maxhd[tx] = maxhd[tx+i];
if( maxvd[tx] < maxvd[tx+i] ) maxvd[tx] = maxvd[tx+i];
};
};
}
/**
* @file
* This file is part of SWE.
*
* @author Michael Bader, Kaveh Rahnema, Tobias Schnabel
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* TODO
*/
#ifndef __SWE_RUSANOVBLOCKCUDAKERNELS_HH
#define __SWE_RUSANOVBLOCKCUDAKERNELS_HH
//******************************************************************
// kernels to implement Euler time-stepping
//******************************************************************
__global__
void kernelComputeFluxesF(float* hd, float* hud, float* hvd,
float* Fhd, float* Fhud, float* Fhvd,
int ny, float g, float llf, int istart);
__global__
void kernelComputeFluxesG(float* hd, float* hud, float* hvd,
float* Ghd, float* Ghud, float* Ghvd,
int ny, float g, float llf, int jstart);
__global__
void kernelComputeBathymetrySources(float* hd, float* bd, float* Bxd, float* Byd,
int ny, float g);
__global__
void kernelEulerTimestep(float* hd, float* hud, float* hvd,
float* Fhd, float* Fhud, float* Fhvd,
float* Ghd, float* Ghud, float* Ghvd,
float* Bxd, float* Byd,
float* maxhd, float* maxvd,
int nx, int ny, float dt, float dxi, float dyi);
__global__
void kernelMaximum(float* maxhd, float* maxvd, int start, int size);
#endif
This diff is collapsed.
/**
* @file
* This file is part of SWE.
*
* @author Alexander Breuer (breuera AT in.tum.de, http://www5.in.tum.de/wiki/index.php/Dipl.-Math._Alexander_Breuer)
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* SWE_Block in CUDA, which uses solvers in the wave propagation formulation.
*/
#ifndef SWEWAVEPROPAGATIONBLOCKCUDA_HH_
#define SWEWAVEPROPAGATIONBLOCKCUDA_HH_
#include <cassert>
#include "SWE_BlockCUDA.hh"
/**
* SWE_WavePropagationBlockCuda is an implementation of the SWE_BlockCuda abstract class.
* It uses a wave propagation solver which is defined with the pre-compiler flag WAVE_PROPAGATION_SOLVER (see above).
*
* Possible wave propagation solvers are:
* F-Wave, <strike>Approximate Augmented Riemann, Hybrid (f-wave + augmented).</strike>
* (details can be found in the corresponding source files)
*/
class SWE_WavePropagationBlockCuda: public SWE_BlockCUDA {
//private:
//! "2D array" which holds the net-updates for the water height (wave propagating to the left).
float* hNetUpdatesLeftD;
//! "2D array" which holds the net-updates for the water height (wave propagating to the right).
float* hNetUpdatesRightD;
//! "2D array" which holds the net-updates for the momentum in x-direction (wave propagating to the left).
float* huNetUpdatesLeftD;
//! "2D array" which holds the net-updates for the momentum in x-direction (wave propagating to the right).
float* huNetUpdatesRightD;
//! "2D array" which holds the net-updates for the water height (wave propagating to the top).
float* hNetUpdatesBelowD;
//! "2D array" which holds the net-updates for the water height (wave propagating to the bottom).
float* hNetUpdatesAboveD;
//! "2D array" which holds the net-updates for the momentum in y-direction (wave propagating to the top).
float* hvNetUpdatesBelowD;
//! "2D array" which holds the net-updates for the momentum in y-direction (wave propagating to the bottom).
float* hvNetUpdatesAboveD;
public:
// constructor of SWE_WavePropagationBlockCuda
SWE_WavePropagationBlockCuda( const float i_offsetX = 0,
const float i_offsetY = 0 );
// destructor of SWE_WavePropagationBlockCuda
~SWE_WavePropagationBlockCuda();
// compute a single time step (net-updates + update of the cells).
void simulateTimestep( float i_dT );
// TODO: Not implemented
float simulate(float, float) {
assert(false);
return 0;
};
// TODO: not implemented, max time step reduction is done in each call of computeNumericalFluxes(...)
void computeMaxTimestep() {
assert(false);
};
// compute the numerical fluxes (net-update formulation here).
void computeNumericalFluxes();
// compute the new cell values.
void updateUnknowns(const float i_deltaT);
};
#endif /* SWEWAVEPROPAGATIONBLOCKCUDA_HH_ */
This diff is collapsed.
/**
* @file
* This file is part of SWE.
*
* @author Alexander Breuer (breuera AT in.tum.de, http://www5.in.tum.de/wiki/index.php/Dipl.-Math._Alexander_Breuer)
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* CUDA Kernels for a SWE_Block, which uses solvers in the wave propagation formulation.
*/
#ifndef SWEWAVEPROPAGATIONBLOCKCUDAKERNELS_HH_
#define SWEWAVEPROPAGATIONBLOCKCUDAKERNELS_HH_
// CUDA-kernel which computes the net-updates
__global__
void computeNetUpdatesKernel(
const float* i_h, const float* i_hu, const float* i_hv, const float* i_b,
float* o_hNetUpdatesLeftD, float* o_hNetUpdatesRightD,
float* o_huNetUpdatesLeftD, float* o_huNetUpdatesRightD,
float* o_hNetUpdatesBelowD, float* o_hNetUpdatesAboveD,
float* o_hvNetUpdatesBelowD, float* o_hvNetUpdatesAboveD,
float* o_maximumWaveSpeeds,
const int i_nx, const int i_ny,
const int i_offsetX = 0, const int i_offsetY = 0,
const int i_blockOffSetX = 0, const int i_blockOffSetY = 0
);
// CUDA-kernel which updates the unknowns
__global__
void updateUnknownsKernel(
const float* i_hNetUpdatesLeftD, const float* i_hNetUpdatesRightD,
const float* i_huNetUpdatesLeftD, const float* i_huNetUpdatesRightD,
const float* i_hNetUpdatesBelowD, const float* i_hNetUpdatesAboveD,
const float* i_hvNetUpdatesBelowD, const float* i_hvNetUpdatesAboveD,
float* io_h, float* io_hu, float* io_hv,
const float i_updateWidthX, const float i_updateWidthY,
const int i_nx, const int i_ny
);
// CUDA-kernel which computes the 1D position in an array from a given 2D index
__device__
inline int computeOneDPositionKernel(const int i_i, const int i_j, const int i_nx);
#endif /* SWEWAVEPROPAGATIONBLOCKCUDAKERNELS_HH_ */
......@@ -31,7 +31,11 @@
#include <string>
#include "../SWE_Block.hh"
#ifndef CUDA
#include "../SWE_WavePropagationBlock.hh"
#else
#include "../SWE_WavePropagationBlockCuda.hh"
#endif
#include "../scenarios/SWE_simple_scenarios.h"
#include "../tools/Logger.hpp"
......@@ -91,7 +95,11 @@ int main( int argc, char** argv ) {
l_originY = l_scenario.getBoundaryPos(BND_BOTTOM);
// create a single wave propagation block
#ifndef CUDA
SWE_WavePropagationBlock l_wavePropgationBlock(l_originX, l_originY);
#else
SWE_WavePropagationBlockCuda l_wavePropgationBlock(l_originX, l_originY);
#endif
// initialize the wave propgation block
l_wavePropgationBlock.initScenario(l_scenario);
......@@ -115,7 +123,7 @@ int main( int argc, char** argv ) {
/**
* Simulation.
*/
// print a start message and reset the wall clock time
// print the start message and reset the wall clock time
s_sweLogger.printStartMessage();
s_sweLogger.initWallClockTime(time(NULL));
......@@ -165,7 +173,7 @@ int main( int argc, char** argv ) {
s_sweLogger.printStatisticsMessage();
// print the cpu time
s_sweLogger.printCpuTime("CPU time");
s_sweLogger.printCpuTime("CPU/GPU time");
// print the wall clock time (includes plotting)
s_sweLogger.printWallClockTime(time(NULL));
......
......@@ -28,7 +28,7 @@
#ifndef __SWE_SIMPLE_SCENARIOS_H
#define __SWE_SIMPLE_SCENARIOS_H
#include <math.h>
#include <cmath>
#include "SWE_Scenario.h"
......@@ -56,12 +56,39 @@ class SWE_BathymetryDamBreakScenario : public SWE_Scenario {
public:
float getBathymetry(float x, float y) {
// return ( sqrt( (x-0.3f)*(x-0.3f) + (y-0.8f)*(y-0.8f) ) < 0.1f ) ? 0.1f: 0.0f;
return ( sqrt( (x-0.5f)*(x-0.5f) + (y-0.5f)*(y-0.5f) ) < 0.1f ) ? 0.1f: 0.0f;
return ( std::sqrt( (x-500.f)*(x-500.f) + (y-500.f)*(y-500.f) ) < 50.f ) ? -250.f: -260.f;
};
virtual float endSimulation() { return 0.2f; };
virtual float endSimulation() { return (float) 15; };
virtual BoundaryType getBoundaryType(BoundaryEdge edge) { return OUTFLOW; };
/** Get the boundary positions
*
* @param i_edge which edge
* @return value in the corresponding dimension
*/
float getBoundaryPos(BoundaryEdge i_edge) {
if ( i_edge == BND_LEFT )
return (float)0;
else if ( i_edge == BND_RIGHT)
return (float)1000;
else if ( i_edge == BND_BOTTOM )
return (float)0;
else
return (float)1000;
};
/**
* Get the water height at a specific location.
*
* @param i_positionX position relative to the origin of the bathymetry grid in x-direction
* @param i_positionY position relative to the origin of the bathymetry grid in y-direction
* @return water height (before the initial displacement)
*/
float getWaterHeight( float i_positionX,
float i_positionY ) {
return (float) 270;
}
};
/**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment