Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
S
SWE
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Gaurav Kukreja
SWE
Commits
af6de788
Commit
af6de788
authored
Jan 19, 2014
by
Gaurav Kukreja
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Using MPI_Isend and MPI_Irecv, overlapped communication and computation
Signed-off-by:
Gaurav Kukreja
<
gmkukreja@gmail.com
>
parent
dfbe3c4e
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
612 additions
and
6 deletions
+612
-6
SWE_WavePropagationBlock.cpp
src/blocks/SWE_WavePropagationBlock.cpp
+428
-0
swe_mpi.cpp
src/examples/swe_mpi.cpp
+184
-6
No files found.
src/blocks/SWE_WavePropagationBlock.cpp
View file @
af6de788
...
...
@@ -380,6 +380,434 @@ SWE_WavePropagationBlock::computeNumericalFluxes ()
#endif
}
/**
* Compute net updates for the block.
* The member variable #maxTimestep will be updated with the
* maximum allowed time step size
*/
void
SWE_WavePropagationBlock
::
computeNumericalFluxes_innerBlock
()
{
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
const
double
time_begin
=
omp_get_wtime
();
#else
const
double
time_begin
=
clock
();
#endif
#endif
//maximum (linearized) wave speed within one iteration
float
maxWaveSpeed
=
(
float
)
0.
;
// compute the loop limits
// const int end_ny_1_1 = ny + 1;
const
int
end_ny_1_1
=
ny
;
// const int end_ny_1_2 = ny + 2;
const
int
end_ny_1_2
=
ny
+
1
;
/***************************************************************************************
* compute the net-updates for the vertical edges
**************************************************************************************/
#ifdef LOOP_OPENMP
#pragma omp parallel
#endif // LOOP_OPENMP
{
float
l_maxWaveSpeed
=
(
float
)
0.
;
#if WAVE_PROPAGATION_SOLVER==4
solver
::
FWaveVec
<
float
>
wavePropagationSolver
;
#else // WAVE_PROPAGATION_SOLVER==4
solver
::
AugRie_SIMD
wavePropagationSolver
;
#endif // WAVE_PROPAGATION_SOLVER==4
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
for
(
int
i
=
2
;
i
<
nx
;
i
++
)
{
int
j
;
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
for
(
j
=
2
;
j
<
end_ny_1_1
;
++
j
)
{
float
maxEdgeSpeed
;
wavePropagationSolver
.
computeNetUpdates
(
h
[
i
-
1
][
j
],
h
[
i
][
j
],
hu
[
i
-
1
][
j
],
hu
[
i
][
j
],
b
[
i
-
1
][
j
],
b
[
i
][
j
],
hNetUpdatesLeft
[
i
-
1
][
j
-
1
],
hNetUpdatesRight
[
i
-
1
][
j
-
1
],
huNetUpdatesLeft
[
i
-
1
][
j
-
1
],
huNetUpdatesRight
[
i
-
1
][
j
-
1
],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed
=
std
::
max
(
l_maxWaveSpeed
,
maxEdgeSpeed
);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed
=
std
::
max
(
maxWaveSpeed
,
maxEdgeSpeed
);
#endif // LOOP_OPENMP
}
assert
(
j
==
ny
+
1
);
// }
/***************************************************************************************
* compute the net-updates for the horizontal edges
**************************************************************************************/
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4
for
(
j
=
2
;
j
<
end_ny_1_2
;
j
++
)
{
float
maxEdgeSpeed
;
wavePropagationSolver
.
computeNetUpdates
(
h
[
i
][
j
-
1
],
h
[
i
][
j
],
hv
[
i
][
j
-
1
],
hv
[
i
][
j
],
b
[
i
][
j
-
1
],
b
[
i
][
j
],
hNetUpdatesBelow
[
i
-
1
][
j
-
1
],
hNetUpdatesAbove
[
i
-
1
][
j
-
1
],
hvNetUpdatesBelow
[
i
-
1
][
j
-
1
],
hvNetUpdatesAbove
[
i
-
1
][
j
-
1
],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed
=
std
::
max
(
l_maxWaveSpeed
,
maxEdgeSpeed
);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed
=
std
::
max
(
maxWaveSpeed
,
maxEdgeSpeed
);
#endif // LOOP_OPENMP
}
assert
(
j
=
ny
+
2
);
}
// vvvvv GKUKREJA : Fusing the loops for horizontal and vertical edges, adding this one extra iteration to avoid if looping.
{
int
i
=
nx
+
1
;
int
j
=
1
;
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
#ifdef LOOP_OPENMP
#pragma omp for schedule(static) nowait
#endif
for
(
j
=
1
;
j
<
end_ny_1_1
;
++
j
)
{
float
maxEdgeSpeed
;
wavePropagationSolver
.
computeNetUpdates
(
h
[
i
-
1
][
j
],
h
[
i
][
j
],
hu
[
i
-
1
][
j
],
hu
[
i
][
j
],
b
[
i
-
1
][
j
],
b
[
i
][
j
],
hNetUpdatesLeft
[
i
-
1
][
j
-
1
],
hNetUpdatesRight
[
i
-
1
][
j
-
1
],
huNetUpdatesLeft
[
i
-
1
][
j
-
1
],
huNetUpdatesRight
[
i
-
1
][
j
-
1
],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed
=
std
::
max
(
l_maxWaveSpeed
,
maxEdgeSpeed
);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed
=
std
::
max
(
maxWaveSpeed
,
maxEdgeSpeed
);
#endif // LOOP_OPENMP
}
assert
(
j
==
ny
+
1
);
}
// ^^^^^ GKUKREJA : Fusing the loops for horizontal and vertical edges, adding this one extra iteration to avoid if looping.
#ifdef LOOP_OPENMP
#pragma omp critical
{
maxWaveSpeed
=
std
::
max
(
l_maxWaveSpeed
,
maxWaveSpeed
);
#ifdef COUNTFLOPS
flops
+=
wavePropagationSolver
.
flops
;
#endif
}
}
// #pragma omp parallel
#endif
if
(
maxWaveSpeed
>
0.00001
)
{
//TODO zeroTol
//compute the time step width
//CFL-Codition
//(max. wave speed) * dt / dx < .5
// => dt = .5 * dx/(max wave speed)
maxTimestep
=
std
::
min
(
dx
/
maxWaveSpeed
,
dy
/
maxWaveSpeed
);
maxTimestep
*=
(
float
)
.4
;
//CFL-number = .5
}
else
{
//might happen in dry cells
maxTimestep
=
std
::
numeric_limits
<
float
>::
max
();
}
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
time_needed
+=
omp_get_wtime
()
-
time_begin
;
#else
time_needed
+=
clock
()
-
time_begin
;
#endif
#endif
}
/**
* Compute net updates for the block.
* The member variable #maxTimestep will be updated with the
* maximum allowed time step size
*/
void
SWE_WavePropagationBlock
::
computeNumericalFluxes_borders
()
{
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
const
double
time_begin
=
omp_get_wtime
();
#else
const
double
time_begin
=
clock
();
#endif
#endif
//maximum (linearized) wave speed within one iteration
float
maxWaveSpeed
=
(
float
)
0.
;
// compute the loop limits
const
int
end_ny_1_1
=
ny
+
1
;
const
int
end_ny_1_2
=
ny
+
2
;
#ifdef LOOP_OPENMP
#pragma omp parallel
#endif
{
float
l_maxWaveSpeed
=
(
float
)
0.
;
#if WAVE_PROPAGATION_SOLVER==4
solver
::
FWaveVec
<
float
>
wavePropagationSolver
;
#else // WAVE_PROPAGATION_SOLVER==4
solver
::
AugRie_SIMD
wavePropagationSolver
;
#endif // WAVE_PROPAGATION_SOLVER==4
/***************************************************************************************
* compute the net-updates for the vertical edges
**************************************************************************************/
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute vertical edges, for the top and bottom border
for
(
j
=
1
;
j
<
end_ny_1_1
;
++
j
)
{
float
maxEdgeSpeed
;
wavePropagationSolver
.
computeNetUpdates
(
h
[
0
][
j
],
h
[
1
][
j
],
hu
[
0
][
j
],
hu
[
1
][
j
],
b
[
0
][
j
],
b
[
1
][
j
],
hNetUpdatesLeft
[
0
][
j
-
1
],
hNetUpdatesRight
[
0
][
j
-
1
],
huNetUpdatesLeft
[
0
][
j
-
1
],
huNetUpdatesRight
[
0
][
j
-
1
],
maxEdgeSpeed
);
wavePropagationSolver
.
computeNetUpdates
(
h
[
nx
][
j
],
h
[
nx
+
1
][
j
],
hu
[
nx
][
j
],
hu
[
nx
+
1
][
j
],
b
[
nx
][
j
],
b
[
nx
+
1
][
j
],
hNetUpdatesLeft
[
nx
][
j
-
1
],
hNetUpdatesRight
[
nx
][
j
-
1
],
huNetUpdatesLeft
[
nx
][
j
-
1
],
huNetUpdatesRight
[
nx
][
j
-
1
],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed
=
std
::
max
(
l_maxWaveSpeed
,
maxEdgeSpeed
);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed
=
std
::
max
(
maxWaveSpeed
,
maxEdgeSpeed
);
#endif // LOOP_OPENMP
}
// assert (j == ny + 1);
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute vertical edges, for the left and right border
for
(
int
i
=
1
;
i
<
nx
+
2
;
i
++
)
{
wavePropagationSolver
.
computeNetUpdates
(
h
[
i
-
1
][
1
],
h
[
i
][
1
],
hu
[
i
-
1
][
1
],
hu
[
i
][
1
],
b
[
i
-
1
][
1
],
b
[
i
][
1
],
hNetUpdatesLeft
[
i
-
1
][
0
],
hNetUpdatesRight
[
i
-
1
][
0
],
huNetUpdatesLeft
[
i
-
1
][
0
],
huNetUpdatesRight
[
i
-
1
][
0
],
maxEdgeSpeed
);
wavePropagationSolver
.
computeNetUpdates
(
h
[
i
-
1
][
end_ny_1_1
-
1
],
h
[
i
][
end_ny_1_1
-
1
],
hu
[
i
-
1
][
end_ny_1_1
-
1
],
hu
[
i
][
end_ny_1_1
-
1
],
b
[
i
-
1
][
end_ny_1_1
-
1
],
b
[
i
][
end_ny_1_1
-
1
],
hNetUpdatesLeft
[
i
-
1
][
end_ny_1_1
-
2
],
hNetUpdatesRight
[
i
-
1
][
end_ny_1_1
-
2
],
huNetUpdatesLeft
[
i
-
1
][
end_ny_1_1
-
2
],
huNetUpdatesRight
[
i
-
1
][
end_ny_1_1
-
2
],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed
=
std
::
max
(
l_maxWaveSpeed
,
maxEdgeSpeed
);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed
=
std
::
max
(
maxWaveSpeed
,
maxEdgeSpeed
);
#endif // LOOP_OPENMP
}
/***************************************************************************************
* compute the net-updates for the horizontal edges
**************************************************************************************/
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute horizontal edges, for the top and bottom border
for
(
j
=
1
;
j
<
end_ny_1_2
;
j
++
)
{
float
maxEdgeSpeed
;
wavePropagationSolver
.
computeNetUpdates
(
h
[
1
][
j
-
1
],
h
[
1
][
j
],
hv
[
1
][
j
-
1
],
hv
[
1
][
j
],
b
[
1
][
j
-
1
],
b
[
1
][
j
],
hNetUpdatesBelow
[
0
][
j
-
1
],
hNetUpdatesAbove
[
0
][
j
-
1
],
hvNetUpdatesBelow
[
0
][
j
-
1
],
hvNetUpdatesAbove
[
0
][
j
-
1
],
maxEdgeSpeed
);
wavePropagationSolver
.
computeNetUpdates
(
h
[
nx
+
1
][
j
-
1
],
h
[
nx
+
1
][
j
],
hv
[
nx
+
1
][
j
-
1
],
hv
[
nx
+
1
][
j
],
b
[
nx
+
1
][
j
-
1
],
b
[
nx
+
1
][
j
],
hNetUpdatesBelow
[
nx
][
j
-
1
],
hNetUpdatesAbove
[
nx
][
j
-
1
],
hvNetUpdatesBelow
[
nx
][
j
-
1
],
hvNetUpdatesAbove
[
nx
][
j
-
1
],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed
=
std
::
max
(
l_maxWaveSpeed
,
maxEdgeSpeed
);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed
=
std
::
max
(
maxWaveSpeed
,
maxEdgeSpeed
);
#endif // LOOP_OPENMP
}
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute horizontal edges, for the top and bottom border
for
(
i
=
1
;
i
<
nx
+
2
;
i
++
)
{
float
maxEdgeSpeed
;
wavePropagationSolver
.
computeNetUpdates
(
h
[
i
][
0
],
h
[
i
][
1
],
hv
[
i
][
0
],
hv
[
i
][
1
],
b
[
i
][
0
],
b
[
i
][
1
],
hNetUpdatesBelow
[
i
-
1
][
0
],
hNetUpdatesAbove
[
i
-
1
][
0
],
hvNetUpdatesBelow
[
i
-
1
][
0
],
hvNetUpdatesAbove
[
i
-
1
][
0
],
maxEdgeSpeed
);
wavePropagationSolver
.
computeNetUpdates
(
h
[
i
][
end_ny_1_2
-
2
],
h
[
i
][
end_ny_1_2
-
1
],
hv
[
i
][
end_ny_1_2
-
2
],
hv
[
i
][
end_ny_1_2
-
1
],
b
[
i
][
end_ny_1_2
-
2
],
b
[
i
][
end_ny_1_2
-
1
],
hNetUpdatesBelow
[
i
-
1
][
end_ny_1_2
-
2
],
hNetUpdatesAbove
[
i
-
1
][
end_ny_1_2
-
2
],
hvNetUpdatesBelow
[
i
-
1
][
end_ny_1_2
-
2
],
hvNetUpdatesAbove
[
i
-
1
][
end_ny_1_2
-
2
],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed
=
std
::
max
(
l_maxWaveSpeed
,
maxEdgeSpeed
);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed
=
std
::
max
(
maxWaveSpeed
,
maxEdgeSpeed
);
#endif // LOOP_OPENMP
}
// assert (j = ny + 2);
#ifdef LOOP_OPENMP
#pragma omp critical
{
maxWaveSpeed
=
std
::
max
(
l_maxWaveSpeed
,
maxWaveSpeed
);
#ifdef COUNTFLOPS
flops
+=
wavePropagationSolver
.
flops
;
#endif
}
}
// #pragma omp parallel
#endif
if
(
maxWaveSpeed
>
0.00001
)
{
//TODO zeroTol
//compute the time step width
//CFL-Codition
//(max. wave speed) * dt / dx < .5
// => dt = .5 * dx/(max wave speed)
maxTimestep
=
std
::
min
(
dx
/
maxWaveSpeed
,
dy
/
maxWaveSpeed
);
maxTimestep
*=
(
float
)
.4
;
//CFL-number = .5
}
else
{
//might happen in dry cells
maxTimestep
=
std
::
numeric_limits
<
float
>::
max
();
}
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
time_needed
+=
omp_get_wtime
()
-
time_begin
;
#else
time_needed
+=
clock
()
-
time_begin
;
#endif
#endif
}
/**
* Updates the unknowns with the already computed net-updates.
*
...
...
src/examples/swe_mpi.cpp
View file @
af6de788
...
...
@@ -436,15 +436,17 @@ int main( int argc, char** argv ) {
// do time steps until next checkpoint is reached
while
(
l_t
<
l_checkPoints
[
c
]
)
{
MPI_Request
l_request
;
//reset CPU-Communication clock
tools
::
Logger
::
logger
.
resetClockToCurrentTime
(
"CpuCommunication"
);
// exchange ghost and copy layers
exchange
LeftRightGhostLayers
(
l_leftNeighborRank
,
l_leftInflow
,
l_leftOutflow
,
l_request
=
exchangeAsync
LeftRightGhostLayers
(
l_leftNeighborRank
,
l_leftInflow
,
l_leftOutflow
,
l_rightNeighborRank
,
l_rightInflow
,
l_rightOutflow
,
l_mpiCol
);
exchange
BottomTopGhostLayers
(
l_bottomNeighborRank
,
l_bottomInflow
,
l_bottomOutflow
,
l_request
=
exchangeAsync
BottomTopGhostLayers
(
l_bottomNeighborRank
,
l_bottomInflow
,
l_bottomOutflow
,
l_topNeighborRank
,
l_topInflow
,
l_topOutflow
,
l_mpiRow
);
...
...
@@ -454,8 +456,14 @@ int main( int argc, char** argv ) {
// set values in ghost cells
l_wavePropgationBlock
.
setGhostLayer
();
// compute numerical flux on each edge
l_wavePropgationBlock
.
computeNumericalFluxes
();
// compute numerical flux for inner edge
l_wavePropgationBlock
.
computeNumericalFluxes_innerBlock
();
// Only Inner Block
// checking if the borders have been exchanged
MPI_Wait
(
&
l_request
,
&
status
);
// compute numerical flux for the blocks
l_wavePropgationBlock
.
computeNumericalFluxes_borders
();
// Only Borders
//! maximum allowed time step width within a block.
float
l_maxTimeStepWidth
=
l_wavePropgationBlock
.
getMaxTimestep
();
...
...
@@ -555,9 +563,16 @@ void exchangeLeftRightGhostLayers( const int i_leftNeighborRank, SWE_Block1D* o
MPI_Status
l_status
;
// int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
// int dest, int sendtag,
// void *recvbuf, int recvcount, MPI_Datatype recvtype,
// int source, int recvtag,
// MPI_Comm comm, MPI_Status *status)
// send to left, receive from the right:
MPI_Sendrecv
(
i_leftOutflow
->
h
.
elemVector
(),
1
,
i_mpiCol
,
i_leftNeighborRank
,
1
,
o_rightInflow
->
h
.
elemVector
(),
1
,
i_mpiCol
,
i_rightNeighborRank
,
1
,
MPI_Sendrecv
(
i_leftOutflow
->
h
.
elemVector
(),
1
,
i_mpiCol
,
i_leftNeighborRank
,
1
,
o_rightInflow
->
h
.
elemVector
(),
1
,
i_mpiCol
,
i_rightNeighborRank
,
1
,
MPI_COMM_WORLD
,
&
l_status
);
MPI_Sendrecv
(
i_leftOutflow
->
hu
.
elemVector
(),
1
,
i_mpiCol
,
i_leftNeighborRank
,
2
,
...
...
@@ -583,6 +598,92 @@ void exchangeLeftRightGhostLayers( const int i_leftNeighborRank, SWE_Block1D* o
}
/**
* Exchanges the left and right ghost layers with MPI's SendReceive.
*
* @param i_leftNeighborRank MPI rank of the left neighbor.
* @param o_leftInflow ghost layer, where the left neighbor writes into.
* @param i_leftOutflow layer where the left neighbor reads from.
* @param i_rightNeighborRank MPI rank of the right neighbor.
* @param o_rightInflow ghost layer, where the right neighbor writes into.
* @param i_rightOutflow layer, where the right neighbor reads form.
* @param i_mpiCol MPI data type for the vertical gost layers.
*/
MPI_Request
*
exchangeAsyncLeftRightGhostLayers
(
const
int
i_leftNeighborRank
,
SWE_Block1D
*
o_leftInflow
,
SWE_Block1D
*
i_leftOutflow
,
const
int
i_rightNeighborRank
,
SWE_Block1D
*
o_rightInflow
,
SWE_Block1D
*
i_rightOutflow
,
MPI_Datatype
i_mpiCol
)
{
// MPI_Status l_status;
MPI_Request
*
l_request
;
// int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
// int dest, int sendtag,
// void *recvbuf, int recvcount, MPI_Datatype recvtype,
// int source, int recvtag,
// MPI_Comm comm, MPI_Status *status)
// send to left, receive from the right:
MPI_Isend
(
i_leftOutflow
->
h
.
elemVector
(),
1
,
i_mpiCol
,
i_leftNeighborRank
,
1
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_rightInflow
->
h
.
elemVector
(),
1
,
i_mpiCol
,
i_rightNeighborRank
,
1
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_leftOutflow->h.elemVector(), 1, i_mpiCol,
// i_leftNeighborRank, 1,
// o_rightInflow->h.elemVector(), 1, i_mpiCol,
// i_rightNeighborRank, 1,
// MPI_COMM_WORLD, &l_status );
MPI_Isend
(
i_leftOutflow
->
hu
.
elemVector
(),
1
,
i_mpiCol
,
i_leftNeighborRank
,
2
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_rightInflow
->
hu
.
elemVector
(),
1
,
i_mpiCol
,
i_rightNeighborRank
,
2
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_leftOutflow->hu.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 2,
// o_rightInflow->hu.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 2,
// MPI_COMM_WORLD, &l_status );
MPI_Isend
(
i_leftOutflow
->
hv
.
elemVector
(),
1
,
i_mpiCol
,
i_leftNeighborRank
,
3
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_rightInflow
->
hv
.
elemVector
(),
1
,
i_mpiCol
,
i_rightNeighborRank
,
3
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_leftOutflow->hv.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 3,
// o_rightInflow->hv.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 3,
// MPI_COMM_WORLD, &l_status );
// send to right, receive from the left:
MPI_Isend
(
i_rightOutflow
->
h
.
elemVector
(),
1
,
i_mpiCol
,
i_rightNeighborRank
,
4
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_leftInflow
->
h
.
elemVector
(),
1
,
i_mpiCol
,
i_leftNeighborRank
,
4
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_rightOutflow->h.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 4,
// o_leftInflow->h.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 4,
// MPI_COMM_WORLD, &l_status );
MPI_Isend
(
i_rightOutflow
->
hu
.
elemVector
(),
1
,
i_mpiCol
,
i_rightNeighborRank
,
5
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_leftInflow
->
hu
.
elemVector
(),
1
,
i_mpiCol
,
i_leftNeighborRank
,
5
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_rightOutflow->hu.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 5,
// o_leftInflow->hu.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 5,
// MPI_COMM_WORLD, &l_status);
MPI_Isend
(
i_rightOutflow
->
hv
.
elemVector
(),
1
,
i_mpiCol
,
i_rightNeighborRank
,
6
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_leftInflow
->
hv
.
elemVector
(),
1
,
i_mpiCol
,
i_leftNeighborRank
,
6
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_rightOutflow->hv.elemVector(), 1, i_mpiCol, i_rightNeighborRank, 6,
// o_leftInflow->hv.elemVector(), 1, i_mpiCol, i_leftNeighborRank, 6,
// MPI_COMM_WORLD, &l_status );
return
l_request
;
}
/**
* Exchanges the bottom and top ghost layers with MPI's SendReceive.
*
...
...
@@ -624,5 +725,82 @@ void exchangeBottomTopGhostLayers( const int i_bottomNeighborRank, SWE_Block1D*
MPI_Sendrecv
(
i_topNeighborOutflow
->
hv
.
elemVector
(),
1
,
i_mpiRow
,
i_topNeighborRank
,
16
,
o_bottomNeighborInflow
->
hv
.
elemVector
(),
1
,
i_mpiRow
,
i_bottomNeighborRank
,
16
,
MPI_COMM_WORLD
,
&
l_status
);
}
/**
* Exchanges the bottom and top ghost layers with MPI's SendReceive.
*
* @param i_bottomNeighborRank MPI rank of the bottom neighbor.
* @param o_bottomNeighborInflow ghost layer, where the bottom neighbor writes into.
* @param i_bottomNeighborOutflow host layer, where the bottom neighbor reads from.
* @param i_topNeighborRank MPI rank of the top neighbor.
* @param o_topNeighborInflow ghost layer, where the top neighbor writes into.
* @param i_topNeighborOutflow ghost layer, where the top neighbor reads from.
* @param i_mpiRow MPI data type for the horizontal ghost layers.
*/
MPI_Request
*
exchangeAsyncBottomTopGhostLayers
(
const
int
i_bottomNeighborRank
,
SWE_Block1D
*
o_bottomNeighborInflow
,
SWE_Block1D
*
i_bottomNeighborOutflow
,
const
int
i_topNeighborRank
,
SWE_Block1D
*
o_topNeighborInflow
,
SWE_Block1D
*
i_topNeighborOutflow
,
const
MPI_Datatype
i_mpiRow
)
{
// MPI_Status l_status;
MPI_Request
*
l_request
;
l_request
=
(
MPI_Request
*
)
malloc
(
sizeof
(
MPI_Request
));
// send to bottom, receive from the top:
MPI_Isend
(
i_bottomNeighborOutflow
->
h
.
elemVector
(),
1
,
i_mpiRow
,
i_bottomNeighborRank
,
11
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_topNeighborInflow
->
h
.
elemVector
(),
1
,
i_mpiRow
,
i_topNeighborRank
,
11
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_bottomNeighborOutflow->h.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 11,
// o_topNeighborInflow->h.elemVector(), 1, i_mpiRow, i_topNeighborRank,11,
// MPI_COMM_WORLD, &l_status );
MPI_Isend
(
i_bottomNeighborOutflow
->
hu
.
elemVector
(),
1
,
i_mpiRow
,
i_bottomNeighborRank
,
12
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_topNeighborInflow
->
hu
.
elemVector
(),
1
,
i_mpiRow
,
i_topNeighborRank
,
12
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_bottomNeighborOutflow->hu.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 12,
// o_topNeighborInflow->hu.elemVector(), 1, i_mpiRow, i_topNeighborRank, 12,
// MPI_COMM_WORLD, &l_status );
MPI_Isend
(
i_bottomNeighborOutflow
->
hv
.
elemVector
(),
1
,
i_mpiRow
,
i_bottomNeighborRank
,
13
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_topNeighborInflow
->
hv
.
elemVector
(),
1
,
i_mpiRow
,
i_topNeighborRank
,
13
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_bottomNeighborOutflow->hv.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 13,
// o_topNeighborInflow->hv.elemVector(), 1, i_mpiRow, i_topNeighborRank, 13,
// MPI_COMM_WORLD, &l_status);
// send to top, receive from the bottom:
MPI_Isend
(
i_topNeighborOutflow
->
h
.
elemVector
(),
1
,
i_mpiRow
,
i_topNeighborRank
,
14
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_bottomNeighborInflow
->
h
.
elemVector
(),
1
,
i_mpiRow
,
i_bottomNeighborRank
,
14
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_topNeighborOutflow->h.elemVector(), 1, i_mpiRow, i_topNeighborRank, 14,
// o_bottomNeighborInflow->h.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 14,
// MPI_COMM_WORLD, &l_status );
MPI_Isend
(
i_topNeighborOutflow
->
hu
.
elemVector
(),
1
,
i_mpiRow
,
i_topNeighborRank
,
15
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_bottomNeighborInflow
->
hu
.
elemVector
(),
1
,
i_mpiRow
,
i_bottomNeighborRank
,
15
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_topNeighborOutflow->hu.elemVector(), 1, i_mpiRow, i_topNeighborRank, 15,
// o_bottomNeighborInflow->hu.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 15,
// MPI_COMM_WORLD, &l_status );
MPI_Isend
(
i_topNeighborOutflow
->
hv
.
elemVector
(),
1
,
i_mpiRow
,
i_topNeighborRank
,
16
,
MPI_COMM_WORLD
,
l_request
);
MPI_Irecv
(
o_bottomNeighborInflow
->
hv
.
elemVector
(),
1
,
i_mpiRow
,
i_bottomNeighborRank
,
16
,
MPI_COMM_WORLD
,
l_request
);
// MPI_Sendrecv( i_topNeighborOutflow->hv.elemVector(), 1, i_mpiRow, i_topNeighborRank, 16,
// o_bottomNeighborInflow->hv.elemVector(), 1, i_mpiRow, i_bottomNeighborRank, 16,
// MPI_COMM_WORLD, &l_status );
return
l_request
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment