Commit af6de788 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Using MPI_Isend and MPI_Irecv, overlapped communication and computation

Signed-off-by: 's avatarGaurav Kukreja <gmkukreja@gmail.com>
parent dfbe3c4e
......@@ -380,6 +380,434 @@ SWE_WavePropagationBlock::computeNumericalFluxes ()
#endif
}
/**
* Compute net updates for the block.
* The member variable #maxTimestep will be updated with the
* maximum allowed time step size
*/
void
SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
{
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
const double time_begin = omp_get_wtime();
#else
const double time_begin = clock();
#endif
#endif
//maximum (linearized) wave speed within one iteration
float maxWaveSpeed = (float) 0.;
// compute the loop limits
// const int end_ny_1_1 = ny + 1;
const int end_ny_1_1 = ny;
// const int end_ny_1_2 = ny + 2;
const int end_ny_1_2 = ny + 1;
/***************************************************************************************
* compute the net-updates for the vertical edges
**************************************************************************************/
#ifdef LOOP_OPENMP
#pragma omp parallel
#endif // LOOP_OPENMP
{
float l_maxWaveSpeed = (float) 0.;
#if WAVE_PROPAGATION_SOLVER==4
solver::FWaveVec<float> wavePropagationSolver;
#else // WAVE_PROPAGATION_SOLVER==4
solver::AugRie_SIMD wavePropagationSolver;
#endif // WAVE_PROPAGATION_SOLVER==4
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
for (int i = 2; i < nx; i++) {
int j;
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
for (j = 2; j < end_ny_1_1; ++j) {
float maxEdgeSpeed;
wavePropagationSolver.computeNetUpdates (
h[i - 1][j], h[i][j],
hu[i - 1][j], hu[i][j],
b[i - 1][j], b[i][j],
hNetUpdatesLeft[i - 1][j - 1], hNetUpdatesRight[i - 1][j - 1],
huNetUpdatesLeft[i - 1][j - 1], huNetUpdatesRight[i - 1][j - 1],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
assert (j == ny + 1);
// }
/***************************************************************************************
* compute the net-updates for the horizontal edges
**************************************************************************************/
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4
for (j = 2; j < end_ny_1_2; j++) {
float maxEdgeSpeed;
wavePropagationSolver.computeNetUpdates (
h[i][j - 1], h[i][j],
hv[i][j - 1], hv[i][j],
b[i][j - 1], b[i][j],
hNetUpdatesBelow[i - 1][j - 1], hNetUpdatesAbove[i - 1][j - 1],
hvNetUpdatesBelow[i - 1][j - 1], hvNetUpdatesAbove[i - 1][j - 1],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
assert (j = ny + 2);
}
// vvvvv GKUKREJA : Fusing the loops for horizontal and vertical edges, adding this one extra iteration to avoid if looping.
{
int i = nx + 1;
int j = 1;
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
#ifdef LOOP_OPENMP
#pragma omp for schedule(static) nowait
#endif
for (j = 1; j < end_ny_1_1; ++j) {
float maxEdgeSpeed;
wavePropagationSolver.computeNetUpdates (
h[i - 1][j], h[i][j],
hu[i - 1][j], hu[i][j],
b[i - 1][j], b[i][j],
hNetUpdatesLeft[i - 1][j - 1], hNetUpdatesRight[i - 1][j - 1],
huNetUpdatesLeft[i - 1][j - 1], huNetUpdatesRight[i - 1][j - 1],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
assert (j == ny + 1);
}
// ^^^^^ GKUKREJA : Fusing the loops for horizontal and vertical edges, adding this one extra iteration to avoid if looping.
#ifdef LOOP_OPENMP
#pragma omp critical
{
maxWaveSpeed = std::max (l_maxWaveSpeed, maxWaveSpeed);
#ifdef COUNTFLOPS
flops += wavePropagationSolver.flops;
#endif
}
} // #pragma omp parallel
#endif
if (maxWaveSpeed > 0.00001) {
//TODO zeroTol
//compute the time step width
//CFL-Codition
//(max. wave speed) * dt / dx < .5
// => dt = .5 * dx/(max wave speed)
maxTimestep = std::min (dx / maxWaveSpeed, dy / maxWaveSpeed);
maxTimestep *= (float) .4; //CFL-number = .5
} else {
//might happen in dry cells
maxTimestep = std::numeric_limits<float>::max ();
}
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
time_needed += omp_get_wtime() - time_begin;
#else
time_needed += clock() - time_begin;
#endif
#endif
}
/**
* Compute net updates for the block.
* The member variable #maxTimestep will be updated with the
* maximum allowed time step size
*/
void
SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
{
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
const double time_begin = omp_get_wtime();
#else
const double time_begin = clock();
#endif
#endif
//maximum (linearized) wave speed within one iteration
float maxWaveSpeed = (float) 0.;
// compute the loop limits
const int end_ny_1_1 = ny + 1;
const int end_ny_1_2 = ny + 2;
#ifdef LOOP_OPENMP
#pragma omp parallel
#endif
{
float l_maxWaveSpeed = (float) 0.;
#if WAVE_PROPAGATION_SOLVER==4
solver::FWaveVec<float> wavePropagationSolver;
#else // WAVE_PROPAGATION_SOLVER==4
solver::AugRie_SIMD wavePropagationSolver;
#endif // WAVE_PROPAGATION_SOLVER==4
/***************************************************************************************
* compute the net-updates for the vertical edges
**************************************************************************************/
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute vertical edges, for the top and bottom border
for (j = 1; j < end_ny_1_1; ++j) {
float maxEdgeSpeed;
wavePropagationSolver.computeNetUpdates (
h[0][j], h[1][j],
hu[0][j], hu[1][j],
b[0][j], b[1][j],
hNetUpdatesLeft[0][j - 1], hNetUpdatesRight[0][j - 1],
huNetUpdatesLeft[0][j - 1], huNetUpdatesRight[0][j - 1],
maxEdgeSpeed
);
wavePropagationSolver.computeNetUpdates (
h[nx][j], h[nx + 1][j],
hu[nx][j], hu[nx + 1][j],
b[nx][j], b[nx + 1][j],
hNetUpdatesLeft[nx][j - 1], hNetUpdatesRight[nx][j - 1],
huNetUpdatesLeft[nx][j - 1], huNetUpdatesRight[nx][j - 1],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
// assert (j == ny + 1);
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute vertical edges, for the left and right border
for(int i = 1; i < nx + 2; i++)
{
wavePropagationSolver.computeNetUpdates (
h[i - 1][1], h[i][1],
hu[i - 1][1], hu[i][1],
b[i - 1][1], b[i][1],
hNetUpdatesLeft[i - 1][0], hNetUpdatesRight[i - 1][0],
huNetUpdatesLeft[i - 1][0], huNetUpdatesRight[i - 1][0],
maxEdgeSpeed
);
wavePropagationSolver.computeNetUpdates (
h[i - 1][end_ny_1_1 - 1], h[i][end_ny_1_1 - 1],
hu[i - 1][end_ny_1_1 - 1], hu[i][end_ny_1_1 - 1],
b[i - 1][end_ny_1_1 - 1], b[i][end_ny_1_1 - 1],
hNetUpdatesLeft[i - 1][end_ny_1_1 - 2], hNetUpdatesRight[i - 1][end_ny_1_1 - 2],
huNetUpdatesLeft[i - 1][end_ny_1_1 - 2], huNetUpdatesRight[i - 1][end_ny_1_1 - 2],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
/***************************************************************************************
* compute the net-updates for the horizontal edges
**************************************************************************************/
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute horizontal edges, for the top and bottom border
for (j = 1; j < end_ny_1_2; j++) {
float maxEdgeSpeed;
wavePropagationSolver.computeNetUpdates (
h[1][j - 1], h[1][j],
hv[1][j - 1], hv[1][j],
b[1][j - 1], b[1][j],
hNetUpdatesBelow[0][j - 1], hNetUpdatesAbove[0][j - 1],
hvNetUpdatesBelow[0][j - 1], hvNetUpdatesAbove[0][j - 1],
maxEdgeSpeed
);
wavePropagationSolver.computeNetUpdates (
h[nx + 1][j - 1], h[nx + 1][j],
hv[nx + 1][j - 1], hv[nx + 1][j],
b[nx + 1][j - 1], b[nx + 1][j],
hNetUpdatesBelow[nx][j - 1], hNetUpdatesAbove[nx][j - 1],
hvNetUpdatesBelow[nx][j - 1], hvNetUpdatesAbove[nx][j - 1],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute horizontal edges, for the top and bottom border
for (i = 1; i < nx + 2; i++) {
float maxEdgeSpeed;
wavePropagationSolver.computeNetUpdates (
h[i][0], h[i][1],
hv[i][0], hv[i][1],
b[i][0], b[i][1],
hNetUpdatesBelow[i - 1][0], hNetUpdatesAbove[i - 1][0],
hvNetUpdatesBelow[i - 1][0], hvNetUpdatesAbove[i - 1][0],
maxEdgeSpeed
);
wavePropagationSolver.computeNetUpdates (
h[i][end_ny_1_2 - 2], h[i][end_ny_1_2 - 1],
hv[i][end_ny_1_2 - 2], hv[i][end_ny_1_2 - 1],
b[i][end_ny_1_2 - 2], b[i][end_ny_1_2 - 1],
hNetUpdatesBelow[i - 1][end_ny_1_2 - 2], hNetUpdatesAbove[i - 1][end_ny_1_2 - 2],
hvNetUpdatesBelow[i - 1][end_ny_1_2 - 2], hvNetUpdatesAbove[i - 1][end_ny_1_2 - 2],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
// assert (j = ny + 2);
#ifdef LOOP_OPENMP
#pragma omp critical
{
maxWaveSpeed = std::max (l_maxWaveSpeed, maxWaveSpeed);
#ifdef COUNTFLOPS
flops += wavePropagationSolver.flops;
#endif
}
} // #pragma omp parallel
#endif
if (maxWaveSpeed > 0.00001) {
//TODO zeroTol
//compute the time step width
//CFL-Codition
//(max. wave speed) * dt / dx < .5
// => dt = .5 * dx/(max wave speed)
maxTimestep = std::min (dx / maxWaveSpeed, dy / maxWaveSpeed);
maxTimestep *= (float) .4; //CFL-number = .5
} else {
//might happen in dry cells
maxTimestep = std::numeric_limits<float>::max ();
}
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
time_needed += omp_get_wtime() - time_begin;
#else
time_needed += clock() - time_begin;
#endif
#endif
}
/**
* Updates the unknowns with the already computed net-updates.
*
......
......@@ -436,15 +436,17 @@ int main( int argc, char** argv ) {
// do time steps until next checkpoint is reached
while( l_t < l_checkPoints[c] ) {
MPI_Request l_request;
//reset CPU-Communication clock
tools::Logger::logger.resetClockToCurrentTime("CpuCommunication");
// exchange ghost and copy layers
exchangeLeftRightGhostLayers( l_leftNeighborRank, l_leftInflow, l_leftOutflow,
l_request = exchangeAsyncLeftRightGhostLayers( l_leftNeighborRank, l_leftInflow, l_leftOutflow,
l_rightNeighborRank, l_rightInflow, l_rightOutflow,
l_mpiCol );
exchangeBottomTopGhostLayers( l_bottomNeighborRank, l_bottomInflow, l_bottomOutflow,
l_request = exchangeAsyncBottomTopGhostLayers( l_bottomNeighborRank, l_bottomInflow, l_bottomOutflow,
l_topNeighborRank, l_topInflow, l_topOutflow,
l_mpiRow );
......@@ -454,8 +456,14 @@ int main( int argc, char** argv ) {
// set values in ghost cells
l_wavePropgationBlock.setGhostLayer();
// compute numerical flux on each edge
l_wavePropgationBlock.computeNumericalFluxes();
// compute numerical flux for inner edge
l_wavePropgationBlock.computeNumericalFluxes_innerBlock(); // Only Inner Block
// checking if the borders have been exchanged
MPI_Wait(&l_request, &status);
// compute numerical flux for the blocks
l_wavePropgationBlock.computeNumericalFluxes_borders(); // Only Borders
//! maximum allowed time step width within a block.
float l_maxTimeStepWidth = l_wavePropgationBlock.getMaxTimestep();
......@@ -555,9 +563,16 @@ void exchangeLeftRightGhostLayers( const int i_leftNeighborRank, SWE_Block1D* o
MPI_Status l_status;
// int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
// int dest, int sendtag,
// void *recvbuf, int recvcount, MPI_Datatype recvtype,
// int source, int recvtag,
// MPI_Comm comm, MPI_Status *status)
// send to left, receive from the right:
MPI_Sendrecv( i_leftOutflow->h.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 1,
o_rightInflow->h.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 1,
MPI_Sendrecv( i_leftOutflow->h.elemVector(), 1, i_mpiCol,
i_leftNeighborRank, 1,
o_rightInflow->h.elemVector(), 1, i_mpiCol,
i_rightNeighborRank, 1,
MPI_COMM_WORLD, &l_status );
MPI_Sendrecv( i_leftOutflow->hu.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 2,
......@@ -583,6 +598,92 @@ void exchangeLeftRightGhostLayers( const int i_leftNeighborRank, SWE_Block1D* o
}
/**
* Exchanges the left and right ghost layers with MPI's SendReceive.
*
* @param i_leftNeighborRank MPI rank of the left neighbor.
* @param o_leftInflow ghost layer, where the left neighbor writes into.
* @param i_leftOutflow layer where the left neighbor reads from.
* @param i_rightNeighborRank MPI rank of the right neighbor.
* @param o_rightInflow ghost layer, where the right neighbor writes into.
* @param i_rightOutflow layer, where the right neighbor reads form.
* @param i_mpiCol MPI data type for the vertical gost layers.
*/
MPI_Request* exchangeAsyncLeftRightGhostLayers( const int i_leftNeighborRank, SWE_Block1D* o_leftInflow, SWE_Block1D* i_leftOutflow,
const int i_rightNeighborRank, SWE_Block1D* o_rightInflow, SWE_Block1D* i_rightOutflow,
MPI_Datatype i_mpiCol) {
// MPI_Status l_status;
MPI_Request* l_request;
// int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
// int dest, int sendtag,
// void *recvbuf, int recvcount, MPI_Datatype recvtype,
// int source, int recvtag,
// MPI_Comm comm, MPI_Status *status)
// send to left, receive from the right:
MPI_Isend ( i_leftOutflow->h.elemVector(), 1, i_mpiCol,
i_leftNeighborRank, 1, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_rightInflow->h.elemVector(), 1, i_mpiCol,
i_rightNeighborRank, 1, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_leftOutflow->h.elemVector(), 1, i_mpiCol,
// i_leftNeighborRank, 1,
// o_rightInflow->h.elemVector(), 1, i_mpiCol,
// i_rightNeighborRank, 1,
// MPI_COMM_WORLD, &l_status );
MPI_Isend ( i_leftOutflow->hu.elemVector(), 1, i_mpiCol,
i_leftNeighborRank, 2, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_rightInflow->hu.elemVector(), 1, i_mpiCol,
i_rightNeighborRank, 2, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_leftOutflow->hu.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 2,
// o_rightInflow->hu.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 2,
// MPI_COMM_WORLD, &l_status );
MPI_Isend ( i_leftOutflow->hv.elemVector(), 1, i_mpiCol,
i_leftNeighborRank, 3, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_rightInflow->hv.elemVector(), 1, i_mpiCol,
i_rightNeighborRank, 3, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_leftOutflow->hv.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 3,
// o_rightInflow->hv.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 3,
// MPI_COMM_WORLD, &l_status );
// send to right, receive from the left:
MPI_Isend ( i_rightOutflow->h.elemVector(), 1, i_mpiCol,
i_rightNeighborRank, 4, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_leftInflow->h.elemVector(), 1, i_mpiCol,
i_leftNeighborRank, 4, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_rightOutflow->h.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 4,
// o_leftInflow->h.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 4,
// MPI_COMM_WORLD, &l_status );
MPI_Isend ( i_rightOutflow->hu.elemVector(), 1, i_mpiCol,
i_rightNeighborRank, 5, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_leftInflow->hu.elemVector(), 1, i_mpiCol,
i_leftNeighborRank, 5, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_rightOutflow->hu.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 5,
// o_leftInflow->hu.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 5,
// MPI_COMM_WORLD, &l_status);
MPI_Isend ( i_rightOutflow->hv.elemVector(), 1, i_mpiCol,
i_rightNeighborRank, 6, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_leftInflow->hv.elemVector(), 1, i_mpiCol,
i_leftNeighborRank, 6, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_rightOutflow->hv.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 6,
// o_leftInflow->hv.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 6,
// MPI_COMM_WORLD, &l_status );
return l_request;
}
/**
* Exchanges the bottom and top ghost layers with MPI's SendReceive.
*
......@@ -624,5 +725,82 @@ void exchangeBottomTopGhostLayers( const int i_bottomNeighborRank, SWE_Block1D*
MPI_Sendrecv( i_topNeighborOutflow->hv.elemVector(), 1, i_mpiRow, i_topNeighborRank, 16,
o_bottomNeighborInflow->hv.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 16,
MPI_COMM_WORLD, &l_status );
}
/**
* Exchanges the bottom and top ghost layers with MPI's SendReceive.
*
* @param i_bottomNeighborRank MPI rank of the bottom neighbor.
* @param o_bottomNeighborInflow ghost layer, where the bottom neighbor writes into.
* @param i_bottomNeighborOutflow host layer, where the bottom neighbor reads from.
* @param i_topNeighborRank MPI rank of the top neighbor.
* @param o_topNeighborInflow ghost layer, where the top neighbor writes into.
* @param i_topNeighborOutflow ghost layer, where the top neighbor reads from.
* @param i_mpiRow MPI data type for the horizontal ghost layers.
*/
MPI_Request* exchangeAsyncBottomTopGhostLayers( const int i_bottomNeighborRank, SWE_Block1D* o_bottomNeighborInflow, SWE_Block1D* i_bottomNeighborOutflow,
const int i_topNeighborRank, SWE_Block1D* o_topNeighborInflow, SWE_Block1D* i_topNeighborOutflow,
const MPI_Datatype i_mpiRow) {
// MPI_Status l_status;
MPI_Request* l_request;
l_request = (MPI_Request*) malloc(sizeof(MPI_Request));
// send to bottom, receive from the top:
MPI_Isend ( i_bottomNeighborOutflow->h.elemVector(), 1, i_mpiRow,
i_bottomNeighborRank, 11, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_topNeighborInflow->h.elemVector(), 1, i_mpiRow,
i_topNeighborRank, 11, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_bottomNeighborOutflow->h.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 11,
// o_topNeighborInflow->h.elemVector(), 1, i_mpiRow, i_topNeighborRank,11,
// MPI_COMM_WORLD, &l_status );
MPI_Isend ( i_bottomNeighborOutflow->hu.elemVector(), 1, i_mpiRow,
i_bottomNeighborRank, 12, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_topNeighborInflow->hu.elemVector(), 1, i_mpiRow,
i_topNeighborRank, 12, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_bottomNeighborOutflow->hu.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 12,
// o_topNeighborInflow->hu.elemVector(), 1, i_mpiRow, i_topNeighborRank, 12,
// MPI_COMM_WORLD, &l_status );
MPI_Isend ( i_bottomNeighborOutflow->hv.elemVector(), 1, i_mpiRow,
i_bottomNeighborRank, 13, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_topNeighborInflow->hv.elemVector(), 1, i_mpiRow,
i_topNeighborRank, 13, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_bottomNeighborOutflow->hv.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 13,
// o_topNeighborInflow->hv.elemVector(), 1, i_mpiRow, i_topNeighborRank, 13,
// MPI_COMM_WORLD, &l_status);
// send to top, receive from the bottom:
MPI_Isend ( i_topNeighborOutflow->h.elemVector(), 1, i_mpiRow,
i_topNeighborRank, 14, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_bottomNeighborInflow->h.elemVector(), 1, i_mpiRow,
i_bottomNeighborRank, 14, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_topNeighborOutflow->h.elemVector(), 1, i_mpiRow, i_topNeighborRank, 14,
// o_bottomNeighborInflow->h.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 14,
// MPI_COMM_WORLD, &l_status );
MPI_Isend ( i_topNeighborOutflow->hu.elemVector(), 1, i_mpiRow,
i_topNeighborRank, 15, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_bottomNeighborInflow->hu.elemVector(), 1, i_mpiRow,
i_bottomNeighborRank, 15, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_topNeighborOutflow->hu.elemVector(), 1, i_mpiRow, i_topNeighborRank, 15,
// o_bottomNeighborInflow->hu.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 15,
// MPI_COMM_WORLD, &l_status );
MPI_Isend ( i_topNeighborOutflow->hv.elemVector(), 1, i_mpiRow,
i_topNeighborRank, 16, MPI_COMM_WORLD, l_request);
MPI_Irecv ( o_bottomNeighborInflow->hv.elemVector(), 1, i_mpiRow,
i_bottomNeighborRank, 16, MPI_COMM_WORLD, l_request);
// MPI_Sendrecv( i_topNeighborOutflow->hv.elemVector(), 1, i_mpiRow, i_topNeighborRank, 16,
// o_bottomNeighborInflow->hv.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 16,
// MPI_COMM_WORLD, &l_status );
return l_request;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment