Commit 4bcb6a60 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Working async mpi, with debug prints

Signed-off-by: 's avatarGaurav Kukreja <gaurav.kukreja@tum.de>
parent af6de788
......@@ -388,6 +388,9 @@ SWE_WavePropagationBlock::computeNumericalFluxes ()
void
SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
{
int i;
int j;
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
const double time_begin = omp_get_wtime();
......@@ -424,8 +427,10 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#endif // LOOP_OPENMP
for (int i = 2; i < nx; i++) {
int j;
for (i = 2; i < nx; i++) {
j;
//std::cout << __FILE__ << ": " << __LINE__ << endl;
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
......@@ -452,7 +457,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
assert (j == ny + 1);
//assert (j == ny + 1);
// }
/***************************************************************************************
......@@ -484,45 +489,50 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
assert (j = ny + 2);
//assert (j = ny + 2);
}
std::cout << __FILE__ << ": " << __LINE__ << endl;
// vvvvv GKUKREJA : Fusing the loops for horizontal and vertical edges, adding this one extra iteration to avoid if looping.
{
int i = nx + 1;
int j = 1;
i = nx;
j = 2;
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
#ifdef LOOP_OPENMP
#pragma omp for schedule(static) nowait
#endif
for (j = 1; j < end_ny_1_1; ++j) {
float maxEdgeSpeed;
for (j = 2; j < end_ny_1_1; ++j) {
float maxEdgeSpeed;
std::cout << __FILE__ << ": " << __LINE__ << endl;
wavePropagationSolver.computeNetUpdates (
h[i - 1][j], h[i][j],
hu[i - 1][j], hu[i][j],
b[i - 1][j], b[i][j],
hNetUpdatesLeft[i - 1][j - 1], hNetUpdatesRight[i - 1][j - 1],
huNetUpdatesLeft[i - 1][j - 1], huNetUpdatesRight[i - 1][j - 1],
maxEdgeSpeed
);
wavePropagationSolver.computeNetUpdates (
h[i - 1][j], h[i][j],
hu[i - 1][j], hu[i][j],
b[i - 1][j], b[i][j],
hNetUpdatesLeft[i - 1][j - 1], hNetUpdatesRight[i - 1][j - 1],
huNetUpdatesLeft[i - 1][j - 1], huNetUpdatesRight[i - 1][j - 1],
maxEdgeSpeed
);
std::cout << __FILE__ << ": " << __LINE__ << endl;
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
#else // LOOP_OPENMP
//update the maximum wave speed
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
//update the maximum wave speed
maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
#endif // LOOP_OPENMP
}
assert (j == ny + 1);
}
}
// ^^^^^ GKUKREJA : Fusing the loops for horizontal and vertical edges, adding this one extra iteration to avoid if looping.
......@@ -560,6 +570,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
time_needed += clock() - time_begin;
#endif
#endif
std::cout << __FILE__ << ": " << __LINE__ << endl;
}
/**
......@@ -570,6 +582,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
void
SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
{
int i;
int j;
#ifdef COUNTFLOPS
#ifdef LOOP_OPENMP
const double time_begin = omp_get_wtime();
......@@ -584,6 +598,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
const int end_ny_1_1 = ny + 1;
const int end_ny_1_2 = ny + 2;
std::cout << __FILE__ << ": " << __LINE__ << endl;
#ifdef LOOP_OPENMP
#pragma omp parallel
......@@ -604,13 +619,13 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#pragma omp for schedule(static)
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Vectorization is currently only possible for the FWaveVec solver
// Vectorize the inner loop
#pragma simd
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute vertical edges, for the top and bottom border
for (j = 1; j < end_ny_1_1; ++j) {
......@@ -646,7 +661,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#pragma omp for schedule(static)
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
......@@ -655,25 +670,27 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute vertical edges, for the left and right border
for(int i = 1; i < nx + 2; i++)
for(i = 1; i < nx + 2; i++)
{
wavePropagationSolver.computeNetUpdates (
h[i - 1][1], h[i][1],
hu[i - 1][1], hu[i][1],
b[i - 1][1], b[i][1],
hNetUpdatesLeft[i - 1][0], hNetUpdatesRight[i - 1][0],
huNetUpdatesLeft[i - 1][0], huNetUpdatesRight[i - 1][0],
maxEdgeSpeed
);
wavePropagationSolver.computeNetUpdates (
h[i - 1][end_ny_1_1 - 1], h[i][end_ny_1_1 - 1],
hu[i - 1][end_ny_1_1 - 1], hu[i][end_ny_1_1 - 1],
b[i - 1][end_ny_1_1 - 1], b[i][end_ny_1_1 - 1],
hNetUpdatesLeft[i - 1][end_ny_1_1 - 2], hNetUpdatesRight[i - 1][end_ny_1_1 - 2],
huNetUpdatesLeft[i - 1][end_ny_1_1 - 2], huNetUpdatesRight[i - 1][end_ny_1_1 - 2],
maxEdgeSpeed
);
float maxEdgeSpeed;
wavePropagationSolver.computeNetUpdates (
h[i - 1][1], h[i][1],
hu[i - 1][1], hu[i][1],
b[i - 1][1], b[i][1],
hNetUpdatesLeft[i - 1][0], hNetUpdatesRight[i - 1][0],
huNetUpdatesLeft[i - 1][0], huNetUpdatesRight[i - 1][0],
maxEdgeSpeed
);
wavePropagationSolver.computeNetUpdates (
h[i - 1][end_ny_1_1 - 1], h[i][end_ny_1_1 - 1],
hu[i - 1][end_ny_1_1 - 1], hu[i][end_ny_1_1 - 1],
b[i - 1][end_ny_1_1 - 1], b[i][end_ny_1_1 - 1],
hNetUpdatesLeft[i - 1][end_ny_1_1 - 2], hNetUpdatesRight[i - 1][end_ny_1_1 - 2],
huNetUpdatesLeft[i - 1][end_ny_1_1 - 2], huNetUpdatesRight[i - 1][end_ny_1_1 - 2],
maxEdgeSpeed
);
#ifdef LOOP_OPENMP
......@@ -691,7 +708,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
**************************************************************************************/
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#pragma omp for schedule(static)
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
......@@ -713,11 +730,11 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
);
wavePropagationSolver.computeNetUpdates (
h[nx + 1][j - 1], h[nx + 1][j],
hv[nx + 1][j - 1], hv[nx + 1][j],
b[nx + 1][j - 1], b[nx + 1][j],
hNetUpdatesBelow[nx][j - 1], hNetUpdatesAbove[nx][j - 1],
hvNetUpdatesBelow[nx][j - 1], hvNetUpdatesAbove[nx][j - 1],
h[nx][j - 1], h[nx][j],
hv[nx][j - 1], hv[nx][j],
b[nx][j - 1], b[nx][j],
hNetUpdatesBelow[nx - 1][j - 1], hNetUpdatesAbove[nx - 1][j - 1],
hvNetUpdatesBelow[nx - 1][j - 1], hvNetUpdatesAbove[nx - 1][j - 1],
maxEdgeSpeed
);
......@@ -732,7 +749,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
#ifdef LOOP_OPENMP
// Use OpenMP for the outer loop
#pragma omp for schedule(static) nowait
#pragma omp for schedule(static)
#endif // LOOP_OPENMP
#if WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
......@@ -741,7 +758,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
#pragma simd
#endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
// Compute horizontal edges, for the top and bottom border
for (i = 1; i < nx + 2; i++) {
for (i = 1; i < nx + 1; i++) {
float maxEdgeSpeed;
wavePropagationSolver.computeNetUpdates (
......@@ -753,6 +770,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
maxEdgeSpeed
);
std::cout << __FILE__ << ": " << __LINE__ << endl;
wavePropagationSolver.computeNetUpdates (
h[i][end_ny_1_2 - 2], h[i][end_ny_1_2 - 1],
hv[i][end_ny_1_2 - 2], hv[i][end_ny_1_2 - 1],
......@@ -762,6 +781,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
maxEdgeSpeed
);
std::cout << __FILE__ << ": " << __LINE__ << endl;
#ifdef LOOP_OPENMP
//update the thread-local maximum wave speed
l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
......@@ -806,6 +827,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
time_needed += clock() - time_begin;
#endif
#endif
std::cout << __FILE__ << ": " << __LINE__ << endl;
}
/**
......
......@@ -139,6 +139,12 @@ public:
//computes the net-updates for the block
void computeNumericalFluxes ();
//computes the net-updates for the inner block only
void computeNumericalFluxes_innerBlock ();
//computes the net-updates for the borders only
void computeNumericalFluxes_borders ();
//update the cells
void updateUnknowns (float dt);
......
......@@ -89,6 +89,17 @@ void exchangeBottomTopGhostLayers( const int i_bottomNeighborRank, SWE_Block1D*
const int i_topNeighborRank, SWE_Block1D* o_topNeighborInflow, SWE_Block1D* i_topNeighborOutflow,
const MPI_Datatype i_mpiRow);
// Asynchronously Exchanges the left and right ghost layers.
MPI_Request* exchangeAsyncLeftRightGhostLayers( const int i_leftNeighborRank, SWE_Block1D* o_leftInflow, SWE_Block1D* i_leftOutflow,
const int i_rightNeighborRank, SWE_Block1D* o_rightInflow, SWE_Block1D* i_rightOutflow,
MPI_Datatype i_mpiCol);
// Asynchronously Exchanges the bottom and top ghist layers.
MPI_Request* exchangeAsyncBottomTopGhostLayers( const int i_bottomNeighborRank, SWE_Block1D* o_bottomNeighborInflow, SWE_Block1D* i_bottomNeighborOutflow,
const int i_topNeighborRank, SWE_Block1D* o_topNeighborInflow, SWE_Block1D* i_topNeighborOutflow,
const MPI_Datatype i_mpiRow);
/**
* Main program for the simulation on a single SWE_WavePropagationBlock.
*/
......@@ -383,6 +394,8 @@ int main( int argc, char** argv ) {
l_topNeighborRank, l_topInflow, l_topOutflow,
l_mpiRow );
std::cout << "Exchanged Ghost Layers Initially\n";
// Init fancy progressbar
tools::ProgressBar progressBar(l_endSimulation, l_mpiRank);
......@@ -436,7 +449,8 @@ int main( int argc, char** argv ) {
// do time steps until next checkpoint is reached
while( l_t < l_checkPoints[c] ) {
MPI_Request l_request;
MPI_Request* l_request;
MPI_Status status;
//reset CPU-Communication clock
tools::Logger::logger.resetClockToCurrentTime("CpuCommunication");
......@@ -459,12 +473,18 @@ int main( int argc, char** argv ) {
// compute numerical flux for inner edge
l_wavePropgationBlock.computeNumericalFluxes_innerBlock(); // Only Inner Block
std::cout << __FILE__ << ": " << __LINE__ << endl;
// checking if the borders have been exchanged
MPI_Wait(&l_request, &status);
MPI_Wait(l_request, &status);
std::cout << __FILE__ << ": " << __LINE__ << endl;
// compute numerical flux for the blocks
l_wavePropgationBlock.computeNumericalFluxes_borders(); // Only Borders
std::cout << __FILE__ << ": " << __LINE__ << endl;
//! maximum allowed time step width within a block.
float l_maxTimeStepWidth = l_wavePropgationBlock.getMaxTimestep();
......@@ -616,6 +636,8 @@ MPI_Request* exchangeAsyncLeftRightGhostLayers( const int i_leftNeighborRank, S
// MPI_Status l_status;
MPI_Request* l_request;
l_request = (MPI_Request*) malloc(sizeof(MPI_Request));
// int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype,
// int dest, int sendtag,
// void *recvbuf, int recvcount, MPI_Datatype recvtype,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment