Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
S
SWE
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
SWE
Commits
dc73e40e
Commit
dc73e40e
authored
Jan 30, 2013
by
Sebastian Rettenberger
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Overwrite source files with skeleton files
parent
c880d637
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
11 additions
and
757 deletions
+11
-757
Readme.md
src/Readme.md
+0
-0
SWE_WavePropagationBlockCuda.cu
src/SWE_WavePropagationBlockCuda.cu
+9
-113
SWE_WavePropagationBlockCuda_kernels.cu
src/SWE_WavePropagationBlockCuda_kernels.cu
+2
-241
SWE_WavePropagationBlockCuda.cu
src_skeleton/SWE_WavePropagationBlockCuda.cu
+0
-270
SWE_WavePropagationBlockCuda_kernels.cu
src_skeleton/SWE_WavePropagationBlockCuda_kernels.cu
+0
-133
No files found.
src
_skeleton
/Readme.md
→
src/Readme.md
View file @
dc73e40e
File moved
src/SWE_WavePropagationBlockCuda.cu
View file @
dc73e40e
...
@@ -190,7 +190,6 @@ float SWE_WavePropagationBlockCuda::simulate(float tStart, float tEnd) {
...
@@ -190,7 +190,6 @@ float SWE_WavePropagationBlockCuda::simulate(float tStart, float tEnd) {
updateUnknowns(maxTimestep);
updateUnknowns(maxTimestep);
t += maxTimestep;
t += maxTimestep;
// cout << "Simulation at time " << t << endl << flush;
} while(t < tEnd);
} while(t < tEnd);
return t;
return t;
...
@@ -203,67 +202,15 @@ float SWE_WavePropagationBlockCuda::simulate(float tStart, float tEnd) {
...
@@ -203,67 +202,15 @@ float SWE_WavePropagationBlockCuda::simulate(float tStart, float tEnd) {
* The maximum wave speed is computed within the net-updates kernel for each CUDA-block.
* The maximum wave speed is computed within the net-updates kernel for each CUDA-block.
* To finalize the method the Thrust-library is called, which does the reduction over all blockwise maxima.
* To finalize the method the Thrust-library is called, which does the reduction over all blockwise maxima.
* In the wave speed reduction step the actual cell width in x- and y-direction is not taken into account.
* In the wave speed reduction step the actual cell width in x- and y-direction is not taken into account.
*
* TODO: A splitting or direct computation of the time step width might increase the total time step size.
* Example:
* dx = 11, dy = 6;
* max wave speed in x-direction: 10
* max wave speed in y-direction: 5.5
* max wave speed in both direction: 10
*
* => maximum time step (current implementation): min(11/10, 6/10) = 0.6
* => maximum time step (splitting the dimensions): min(11/10, 6/5.5) = 1.09..
*/
*/
void SWE_WavePropagationBlockCuda::computeNumericalFluxes() {
void SWE_WavePropagationBlockCuda::computeNumericalFluxes() {
/*
/*
* Initialization.
* Initialization.
*/
*/
//! definition of one CUDA-block. Typical size are 8*8 or 16*16
dim3 dimBlock(TILE_SIZE,TILE_SIZE);
/**
/*
* Definition of the "main" CUDA-grid.
* TODO: This part needs to be implemented.
* This grid covers only edges 0..#(edges in x-direction)-2 and 0..#(edges in y-direction)-2.
*
* An example with a computational domain of size
* nx = 24, ny = 16
* with a 1 cell ghost layer would result in a grid with
* (nx+2)*(ny+2) = (26*18)
* cells and
* (nx+1)*(ny+1) = (25*17)
* edges.
*
* The CUDA-blocks (here 8*8) mentioned above would cover all edges except
* the ones lying between the computational domain and the right/top ghost layer:
* <pre>
* *
* ** top ghost layer,
* ******** cell ids
* ******************************* ** = (*, ny+1)
* * * * * *
* * 8*8 * 8*8 * 8*8 *
* * block * block * block *
* * * * *
* *******************************
* * * * *
* * 8*8 * 8*8 * 8*8 *
* * * block * block * block *
* bottom ** * * * *
* ghost ******** *******************************
* layer, **
* cell ids * * *
* =(*,0) *** ***
* * *
* * *
* left ghost layer, right ghost layer,
* cell ids = (0,*) cell ids = (nx+1, *)
* </pre>
*/
*/
dim3 dimGrid(nx/TILE_SIZE,ny/TILE_SIZE);
// assert a valid tile size
assert(nx%TILE_SIZE==0);
assert(ny%TILE_SIZE==0);
// "2D array" which holds the blockwise maximum wave speeds
// "2D array" which holds the blockwise maximum wave speeds
float* l_maximumWaveSpeedsD;
float* l_maximumWaveSpeedsD;
...
@@ -276,42 +223,10 @@ void SWE_WavePropagationBlockCuda::computeNumericalFluxes() {
...
@@ -276,42 +223,10 @@ void SWE_WavePropagationBlockCuda::computeNumericalFluxes() {
/*
/*
* Compute the net updates for the 'main part and the two 'boundary' parts.
* Compute the net updates for the 'main part and the two 'boundary' parts.
*/
*/
// compute the net-updates for the "main" part.
computeNetUpdatesKernel<<<dimGrid,dimBlock>>>( hd, hud, hvd, bd,
/*
hNetUpdatesLeftD, hNetUpdatesRightD,
* TODO: This part needs to be implemented.
huNetUpdatesLeftD, huNetUpdatesRightD,
*/
hNetUpdatesBelowD, hNetUpdatesAboveD,
hvNetUpdatesBelowD, hvNetUpdatesAboveD,
l_maximumWaveSpeedsD,
nx,ny
);
// compute the "remaining" net updates (edges "simulation domain"/"top ghost layer" and "simulation domain"/"right ghost layer")
// edges between cell nx and ghost layer nx+1
dim3 dimRightBlock(1, TILE_SIZE);
dim3 dimRightGrid(1, ny/TILE_SIZE);
computeNetUpdatesKernel<<<dimRightGrid, dimRightBlock>>>( hd, hud, hvd, bd,
hNetUpdatesLeftD, hNetUpdatesRightD,
huNetUpdatesLeftD, huNetUpdatesRightD,
hNetUpdatesBelowD, hNetUpdatesAboveD,
hvNetUpdatesBelowD, hvNetUpdatesAboveD,
l_maximumWaveSpeedsD,
nx, ny,
nx, 0,
dimGrid.x, 0);
// edges between cell ny and ghost layer ny+1
dim3 dimTopBlock(TILE_SIZE, 1);
dim3 dimTopGrid(nx/TILE_SIZE, 1);
computeNetUpdatesKernel<<<dimTopGrid, dimTopBlock>>>( hd, hud, hvd, bd,
hNetUpdatesLeftD, hNetUpdatesRightD,
huNetUpdatesLeftD, huNetUpdatesRightD,
hNetUpdatesBelowD, hNetUpdatesAboveD,
hvNetUpdatesBelowD, hvNetUpdatesAboveD,
l_maximumWaveSpeedsD,
nx, ny,
0, ny,
0, dimGrid.y);
/*
/*
* Finalize (max reduction of the maximumWaveSpeeds-array.)
* Finalize (max reduction of the maximumWaveSpeeds-array.)
...
@@ -344,28 +259,9 @@ void SWE_WavePropagationBlockCuda::computeNumericalFluxes() {
...
@@ -344,28 +259,9 @@ void SWE_WavePropagationBlockCuda::computeNumericalFluxes() {
* @param i_deltaT time step size.
* @param i_deltaT time step size.
*/
*/
void SWE_WavePropagationBlockCuda::updateUnknowns(const float i_deltaT) {
void SWE_WavePropagationBlockCuda::updateUnknowns(const float i_deltaT) {
//! definition of one CUDA-block. Typical size are 8*8 or 16*16
/*
dim3 dimBlock(TILE_SIZE,TILE_SIZE);
* TODO: This part needs to be implemented.
*/
//! definition of the CUDA-grid.
dim3 dimGrid(nx/TILE_SIZE,ny/TILE_SIZE);
// assert a valid tile size
assert(nx%TILE_SIZE==0);
assert(ny%TILE_SIZE==0);
// compute the update width.
float l_updateWidthX = i_deltaT / dx;
float l_updateWidthY = i_deltaT / dy;
// update the unknowns (global time step)
updateUnknownsKernel<<<dimGrid,dimBlock>>>( hNetUpdatesLeftD, hNetUpdatesRightD,
huNetUpdatesLeftD, huNetUpdatesRightD,
hNetUpdatesBelowD, hNetUpdatesAboveD,
hvNetUpdatesBelowD, hvNetUpdatesAboveD,
hd, hud, hvd,
l_updateWidthX, l_updateWidthY,
nx, ny);
// synchronize the copy layer for MPI communication
// synchronize the copy layer for MPI communication
#ifdef USEMPI
#ifdef USEMPI
...
...
src/SWE_WavePropagationBlockCuda_kernels.cu
View file @
dc73e40e
This diff is collapsed.
Click to expand it.
src_skeleton/SWE_WavePropagationBlockCuda.cu
deleted
100644 → 0
View file @
c880d637
/**
* @file
* This file is part of SWE.
*
* @author Alexander Breuer (breuera AT in.tum.de, http://www5.in.tum.de/wiki/index.php/Dipl.-Math._Alexander_Breuer)
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* SWE_Block in CUDA, which uses solvers in the wave propagation formulation.
*/
#include "SWE_WavePropagationBlockCuda.hh"
#include "SWE_BlockCUDA.hh"
#include "SWE_WavePropagationBlockCuda_kernels.hh"
#include <cassert>
#ifndef STATICLOGGER
#define STATICLOGGER
#include "tools/Logger.hpp"
static tools::Logger s_sweLogger;
#endif
// CUDA-C includes
#include <cuda.h>
#include <cuda_runtime_api.h>
// Thrust library (used for the final maximum reduction in the method computeNumericalFluxes(...))
#include <thrust/device_vector.h>
/**
* Constructor of a SWE_WavePropagationBlockCuda.
*
* Allocates the variables for the simulation:
* Please note: The definition of indices changed in contrast to the CPU-Implementation.
*
* unknowns hd,hud,hvd,bd stored on the CUDA device are defined for grid indices [0,..,nx+1]*[0,..,ny+1] (-> Abstract class SWE_BlockCUDA)
* -> computational domain is [1,..,nx]*[1,..,ny]
* -> plus ghost cell layer
*
* net-updates are defined for edges with indices [0,..,nx]*[0,..,ny] for horizontal and vertical edges for simplicity (one layer is not necessary).
*
* A left/right net update with index (i-1,j) is located on the edge between
* cells with index (i-1,j) and (i,j):
* <pre>
* *********************
* * * *
* * (i-1,j) * (i,j) *
* * * *
* *********************
*
* *
* ***
* *****
* *
* *
* NetUpdatesLeft(i-1,j)
* or
* NetUpdatesRight(i-1,j)
* </pre>
*
* A below/above net update with index (i, j-1) is located on the edge between
* cells with index (i, j-1) and (i,j):
* <pre>
* ***********
* * *
* * (i, j) * *
* * * ** NetUpdatesBelow(i,j-1)
* *********** ***** or
* * * ** NetUpdatesAbove(i,j-1)
* * (i,j-1) * *
* * *
* ***********
* </pre>
* @param i_offsetX spatial offset of the block in x-direction.
* @param i_offsetY spatial offset of the offset in y-direction.
* @param i_cudaDevice ID of the CUDA-device, which should be used.
*/
SWE_WavePropagationBlockCuda::SWE_WavePropagationBlockCuda( const float i_offsetX,
const float i_offsetY,
const int i_cudaDevice ): SWE_BlockCUDA(i_offsetX, i_offsetY, i_cudaDevice) {
// compute the size of one 1D net-update array.
int sizeOfNetUpdates = (nx+1)*(ny+1)*sizeof(float);
// allocate CUDA memory for the net-updates
cudaMalloc((void**)&hNetUpdatesLeftD, sizeOfNetUpdates);
checkCUDAError("allocate device memory for hNetUpdatesLeftD");
cudaMalloc((void**)&hNetUpdatesRightD, sizeOfNetUpdates);
checkCUDAError("allocate device memory for hNetUpdatesRightD");
cudaMalloc((void**)&huNetUpdatesLeftD, sizeOfNetUpdates);
checkCUDAError("allocate device memory for huNetUpdatesLeftD");
cudaMalloc((void**)&huNetUpdatesRightD, sizeOfNetUpdates);
checkCUDAError("allocate device memory for huNetUpdatesRightD");
cudaMalloc((void**)&hNetUpdatesBelowD, sizeOfNetUpdates);
checkCUDAError("allocate device memory for hNetUpdatesBelowD");
cudaMalloc((void**)&hNetUpdatesAboveD, sizeOfNetUpdates);
checkCUDAError("allocate device memory for hNetUpdatesAboveD");
cudaMalloc((void**)&hvNetUpdatesBelowD, sizeOfNetUpdates);
checkCUDAError("allocate device memory for hvNetUpdatesBelowD");
cudaMalloc((void**)&hvNetUpdatesAboveD, sizeOfNetUpdates);
checkCUDAError("allocate device memory for hNetUpdatesAboveD");
}
/**
* Destructor of a SWE_WavePropagationBlockCuda.
*
* Frees all of the memory, which was allocated within the constructor.
* Resets the CUDA device: Useful if error occured and printf is used on the device (buffer).
*/
SWE_WavePropagationBlockCuda::~SWE_WavePropagationBlockCuda() {
// free the net-updates memory
cudaFree(hNetUpdatesLeftD);
cudaFree(hNetUpdatesRightD);
cudaFree(huNetUpdatesLeftD);
cudaFree(huNetUpdatesRightD);
cudaFree(hNetUpdatesBelowD);
cudaFree(hNetUpdatesAboveD);
cudaFree(hvNetUpdatesBelowD);
cudaFree(hvNetUpdatesAboveD);
// reset the cuda device
s_sweLogger.printString("Resetting the CUDA devices");
cudaDeviceReset();
}
/**
* Compute a single global time step of a given time step width.
* Remark: The user has to take care about the time step width. No additional check is done. The time step width typically available
* after the computation of the numerical fluxes (hidden in this method).
*
* First the net-updates are computed.
* Then the cells are updated with the net-updates and the given time step width.
*
* @param i_dT time step width in seconds.
*/
__host__
void SWE_WavePropagationBlockCuda::simulateTimestep(float i_dT) {
// Compute the numerical fluxes/net-updates in the wave propagation formulation.
computeNumericalFluxes();
// Update the unknowns with the net-updates.
updateUnknowns(i_dT);
}
/**
* perform forward-Euler time steps, starting with simulation time tStart,:
* until simulation time tEnd is reached;
* device-global variables hd, hud, hvd are updated;
* unknowns h, hu, hv in main memory are not updated.
* Ghost layers and bathymetry sources are updated between timesteps.
* intended as main simulation loop between two checkpoints
*/
__host__
float SWE_WavePropagationBlockCuda::simulate(float tStart, float tEnd) {
float t = tStart;
do {
// set values in ghost cells:
setGhostLayer();
// Compute the numerical fluxes/net-updates in the wave propagation formulation.
computeNumericalFluxes();
// Update the unknowns with the net-updates.
updateUnknowns(maxTimestep);
t += maxTimestep;
} while(t < tEnd);
return t;
}
/**
* Compute the numerical fluxes (net-update formulation here) on all edges.
*
* The maximum wave speed is computed within the net-updates kernel for each CUDA-block.
* To finalize the method the Thrust-library is called, which does the reduction over all blockwise maxima.
* In the wave speed reduction step the actual cell width in x- and y-direction is not taken into account.
*/
void SWE_WavePropagationBlockCuda::computeNumericalFluxes() {
/*
* Initialization.
*/
/*
* TODO: This part needs to be implemented.
*/
// "2D array" which holds the blockwise maximum wave speeds
float* l_maximumWaveSpeedsD;
// size of the maximum wave speed array (dimension of the grid + ghost layers, without the top right block), sizeof(float) not included
int l_sizeMaxWaveSpeeds = ((dimGrid.x+1)*(dimGrid.y+1)-1);
cudaMalloc((void**)&l_maximumWaveSpeedsD, (l_sizeMaxWaveSpeeds*sizeof(float)) );
/*
* Compute the net updates for the 'main part and the two 'boundary' parts.
*/
/*
* TODO: This part needs to be implemented.
*/
/*
* Finalize (max reduction of the maximumWaveSpeeds-array.)
*
* The Thrust library is used in this step.
* An optional kernel could be written for the maximum reduction.
*/
// Thrust pointer to the device array
thrust::device_ptr<float> l_thrustDevicePointer(l_maximumWaveSpeedsD);
// use Thrusts max_element-function for the maximum reduction
thrust::device_ptr<float> l_thrustDevicePointerMax = thrust::max_element(l_thrustDevicePointer, l_thrustDevicePointer+l_sizeMaxWaveSpeeds);
// get the result from the device
float l_maximumWaveSpeed = l_thrustDevicePointerMax[0];
// free the max wave speeds array on the device
cudaFree(l_maximumWaveSpeedsD);
// set the maximum time step for this SWE_WavePropagationBlockCuda
maxTimestep = std::min( dx/l_maximumWaveSpeed, dy/l_maximumWaveSpeed );
// CFL = 0.5
maxTimestep *= (float)0.4;
}
/**
* Update the cells with a given global time step.
*
* @param i_deltaT time step size.
*/
void SWE_WavePropagationBlockCuda::updateUnknowns(const float i_deltaT) {
/*
* TODO: This part needs to be implemented.
*/
// synchronize the copy layer for MPI communication
#ifdef USEMPI
synchCopyLayerBeforeRead();
#endif
}
src_skeleton/SWE_WavePropagationBlockCuda_kernels.cu
deleted
100644 → 0
View file @
c880d637
/**
* @file
* This file is part of SWE.
*
* @author Alexander Breuer (breuera AT in.tum.de, http://www5.in.tum.de/wiki/index.php/Dipl.-Math._Alexander_Breuer)
*
* @section LICENSE
*
* SWE is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* SWE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with SWE. If not, see <http://www.gnu.org/licenses/>.
*
*
* @section DESCRIPTION
*
* CUDA Kernels for a SWE_Block, which uses solvers in the wave propagation formulation.
*/
#include "SWE_BlockCUDA.hh"
#include "SWE_WavePropagationBlockCuda_kernels.hh"
#include <cmath>
#include <cstdio>
#include "solvers/FWaveCuda.h"
/**
* The compute net-updates kernel calls the solver for a defined CUDA-Block and does a reduction over the computed wave speeds within this block.
*
* Remark: In overall we have nx+1 / ny+1 edges.
* Therefore the edges "simulation domain"/"top ghost layer" and "simulation domain"/"right ghost layer"
* will not be computed in a typical call of the function:
* computeNetUpdatesKernel<<<dimGrid,dimBlock>>>( hd, hud, hvd, bd,
* hNetUpdatesLeftD, hNetUpdatesRightD,
* huNetUpdatesLeftD, huNetUpdatesRightD,
* hNetUpdatesBelowD, hNetUpdatesAboveD,
* hvNetUpdatesBelowD, hvNetUpdatesAboveD,
* l_maximumWaveSpeedsD,
* i_nx, i_ny
* );
* To reduce the effect of branch-mispredictions the kernel provides optional offsets, which can be used to compute the missing edges.
*
*
* @param i_h water heights (CUDA-array).
* @param i_hu momentums in x-direction (CUDA-array).
* @param i_hv momentums in y-direction (CUDA-array).
* @param i_b bathymetry values (CUDA-array).
* @param o_hNetUpdatesLeftD left going net-updates for the water height (CUDA-array).
* @param o_hNetUpdatesRightD right going net-updates for the water height (CUDA-array).
* @param o_huNetUpdatesLeftD left going net-updates for the momentum in x-direction (CUDA-array).
* @param o_huNetUpdatesRightD right going net-updates for the momentum in x-direction (CUDA-array).
* @param o_hNetUpdatesBelowD downwards going net-updates for the water height (CUDA-array).
* @param o_hNetUpdatesAboveD upwards going net-updates for the water height (CUDA-array).
* @param o_hvNetUpdatesBelowD downwards going net-updates for the momentum in y-direction (CUDA-array).
* @param o_hvNetUpdatesAboveD upwards going net-updates for the momentum in y-direction (CUDA-array).
* @param o_maximumWaveSpeeds maximum wave speed which occurred within the CUDA-block is written here (CUDA-array).
* @param i_nx number of cells within the simulation domain in x-direction (excludes ghost layers).
* @param i_ny number of cells within the simulation domain in y-direction (excludes ghost layers).
* @param i_offsetX cell/edge offset in x-direction.
* @param i_offsetY cell/edge offset in y-direction.
*/
__global__
void computeNetUpdatesKernel(
const float* i_h, const float* i_hu, const float* i_hv, const float* i_b,
float* o_hNetUpdatesLeftD, float* o_hNetUpdatesRightD,
float* o_huNetUpdatesLeftD, float* o_huNetUpdatesRightD,
float* o_hNetUpdatesBelowD, float* o_hNetUpdatesAboveD,
float* o_hvNetUpdatesBelowD, float* o_hvNetUpdatesAboveD,
float* o_maximumWaveSpeeds,
const int i_nX, const int i_nY,
const int i_offsetX, const int i_offsetY,
const int i_blockOffSetX, const int i_blockOffSetY
) {
/*
* TODO: This kernel needs to be implemented.
*/
}
/**
* The "update unknowns"-kernel updates the unknowns in the cells with precomputed net-updates.
*
* @param i_hNetUpdatesLeftD left going net-updates for the water height (CUDA-array).
* @param i_hNetUpdatesRightD right going net-updates for the water height (CUDA-array).
* @param i_huNetUpdatesLeftD left going net-updates for the momentum in x-direction (CUDA-array).
* @param i_huNetUpdatesRightD right going net-updates for the momentum in x-direction (CUDA-array).
* @param i_hNetUpdatesBelowD downwards going net-updates for the water height (CUDA-array).
* @param i_hNetUpdatesAboveD upwards going net-updates for the water height (CUDA-array).
* @param i_hvNetUpdatesBelowD downwards going net-updates for the momentum in y-direction (CUDA-array).
* @param i_hvNetUpdatesAboveD upwards going net-updates for the momentum in y-direction (CUDA-array).
* @param io_h water heights (CUDA-array).
* @param io_hu momentums in x-direction (CUDA-array).
* @param io_hv momentums in y-direction (CUDA-array).
* @param i_updateWidthX update width in x-direction.
* @param i_updateWidthY update width in y-direction.
* @param i_nx number of cells within the simulation domain in x-direction (excludes ghost layers).
* @param i_ny number of cells within the simulation domain in y-direction (excludes ghost layers).
*/
__global__
void updateUnknownsKernel(
const float* i_hNetUpdatesLeftD, const float* i_hNetUpdatesRightD,
const float* i_huNetUpdatesLeftD, const float* i_huNetUpdatesRightD,
const float* i_hNetUpdatesBelowD, const float* i_hNetUpdatesAboveD,
const float* i_hvNetUpdatesBelowD, const float* i_hvNetUpdatesAboveD,
float* io_h, float* io_hu, float* io_hv,
const float i_updateWidthX, const float i_updateWidthY,
const int i_nX, const int i_nY ) {
/*
* TODO: This kernel needs to be implemented.
*/
}
/**
* Compute the position of 2D coordinates in a 1D array.
* array[i][j] -> i * ny + j
*
* @param i_i row index.
* @param i_j column index.
* @param i_ny #(cells in y-direction).
* @return 1D index.
*/
__device__
inline int computeOneDPositionKernel(const int i_i, const int i_j, const int i_ny) {
return i_i*i_ny + i_j;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment