Working async mpi, with debug prints

Signed-off-by: Gaurav Kukreja <gaurav.kukreja@tum.de>

Working async mpi, with debug prints
Signed-off-by: Gaurav Kukreja <gaurav.kukreja@tum.de>
4bcb6a60 · Gaurav Kukreja · af6de788 · 4bcb6a60 · 4bcb6a60 · 4bcb6a60
Commit 4bcb6a60 authored Jan 19, 2014 by Gaurav Kukreja
Showing with 107 additions and 56 deletions

SWE_WavePropagationBlock.cpp src/blocks/SWE_WavePropagationBlock.cpp +77 -54

SWE_WavePropagationBlock.hh src/blocks/SWE_WavePropagationBlock.hh +6 -0

swe_mpi.cpp src/examples/swe_mpi.cpp +24 -2

No files found.
--- a/src/blocks/SWE_WavePropagationBlock.cpp
+++ b/src/blocks/SWE_WavePropagationBlock.cpp
@@ -388,6 +388,9 @@ SWE_WavePropagationBlock::computeNumericalFluxes ()
 void
 SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
 {
+
+	int i;
+	int j;
 #ifdef COUNTFLOPS
 #ifdef LOOP_OPENMP
 	const double time_begin = omp_get_wtime();
@@ -424,8 +427,10 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
 		// Use OpenMP for the outer loop
 		#pragma omp for schedule(static) nowait
 #endif // LOOP_OPENMP
-		for (int i = 2; i < nx; i++) {
-			int j;
+		for (i = 2; i < nx; i++) {
+			j;
+
+			//std::cout << __FILE__ << ": " << __LINE__ << endl;

 #if  WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
 			// Vectorization is currently only possible for the FWaveVec solver
@@ -452,7 +457,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
 				maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
 #endif // LOOP_OPENMP
 			}
-			assert (j == ny + 1);
+			//assert (j == ny + 1);
 //		}

 	/***************************************************************************************
@@ -484,45 +489,50 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
 				maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
 #endif // LOOP_OPENMP
 			}
-			assert (j = ny + 2);
+			//assert (j = ny + 2);
 		}

+		std::cout << __FILE__ << ": " << __LINE__ << endl;
+
 // vvvvv  GKUKREJA : Fusing the loops for horizontal and vertical edges, adding this one extra iteration to avoid if looping.

 		{
-			int i = nx + 1;
-            int j = 1;
+			i = nx;
+		        j = 2; 

 #if  WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
-            // Vectorization is currently only possible for the FWaveVec solver
-            // Vectorize the inner loop
-#pragma simd
+			// Vectorization is currently only possible for the FWaveVec solver
+			// Vectorize the inner loop
+			#pragma simd
 #endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
 #ifdef LOOP_OPENMP
 			#pragma omp for schedule(static) nowait
 #endif
-            for (j = 1; j < end_ny_1_1; ++j) {
-                float maxEdgeSpeed;
+			for (j = 2; j < end_ny_1_1; ++j) {
+	                float maxEdgeSpeed;
+						
+			std::cout << __FILE__ << ": " << __LINE__ << endl;
+
+	                wavePropagationSolver.computeNetUpdates (
+	                        h[i - 1][j], h[i][j],
+	                        hu[i - 1][j], hu[i][j],
+	                        b[i - 1][j], b[i][j],
+	                        hNetUpdatesLeft[i - 1][j - 1], hNetUpdatesRight[i - 1][j - 1],
+	                        huNetUpdatesLeft[i - 1][j - 1], huNetUpdatesRight[i - 1][j - 1],
+	                        maxEdgeSpeed
+	                );

-                wavePropagationSolver.computeNetUpdates (
-                        h[i - 1][j], h[i][j],
-                        hu[i - 1][j], hu[i][j],
-                        b[i - 1][j], b[i][j],
-                        hNetUpdatesLeft[i - 1][j - 1], hNetUpdatesRight[i - 1][j - 1],
-                        huNetUpdatesLeft[i - 1][j - 1], huNetUpdatesRight[i - 1][j - 1],
-                        maxEdgeSpeed
-                );
+			std::cout << __FILE__ << ": " << __LINE__ << endl;

 #ifdef LOOP_OPENMP
-                //update the thread-local maximum wave speed
-                l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
+	                //update the thread-local maximum wave speed
+	                l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
 #else // LOOP_OPENMP
-                //update the maximum wave speed
-                maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
+	                //update the maximum wave speed
+	                maxWaveSpeed = std::max (maxWaveSpeed, maxEdgeSpeed);
 #endif // LOOP_OPENMP
-            }
-            assert (j == ny + 1);
 		}
+	}

 // ^^^^^ GKUKREJA : Fusing the loops for horizontal and vertical edges, adding this one extra iteration to avoid if looping.

@@ -560,6 +570,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
 	time_needed += clock() - time_begin;
 #endif
 #endif
+
+	std::cout << __FILE__ << ": " << __LINE__ << endl;
 }

 /**
@@ -570,6 +582,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_innerBlock ()
 void
 SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
 {
+	int i;
+	int j;
 #ifdef COUNTFLOPS
 #ifdef LOOP_OPENMP
 	const double time_begin = omp_get_wtime();
@@ -584,6 +598,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
 	const int end_ny_1_1 = ny + 1;
 	const int end_ny_1_2 = ny + 2;

+	std::cout << __FILE__ << ": " << __LINE__ << endl;
 	
 #ifdef LOOP_OPENMP
 #pragma omp parallel
@@ -604,13 +619,13 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()

 #ifdef LOOP_OPENMP
 		// Use OpenMP for the outer loop
-#pragma omp for schedule(static) nowait
+		#pragma omp for schedule(static) 
 #endif // LOOP_OPENMP

 #if  WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
 		// Vectorization is currently only possible for the FWaveVec solver
 		// Vectorize the inner loop
-#pragma simd
+		#pragma simd
 #endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
 		// Compute vertical edges, for the top and bottom border
 		for (j = 1; j < end_ny_1_1; ++j) {
@@ -646,7 +661,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()

 #ifdef LOOP_OPENMP
 		// Use OpenMP for the outer loop
-#pragma omp for schedule(static) nowait
+#pragma omp for schedule(static) 
 #endif // LOOP_OPENMP

 #if  WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
@@ -655,25 +670,27 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
 #pragma simd
 #endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
 		// Compute vertical edges, for the left and right border
-		for(int i = 1; i < nx + 2; i++)
+		for(i = 1; i < nx + 2; i++)
 		{
-            wavePropagationSolver.computeNetUpdates (
-                h[i - 1][1], h[i][1],
-                hu[i - 1][1], hu[i][1],
-                b[i - 1][1], b[i][1],
-                hNetUpdatesLeft[i - 1][0], hNetUpdatesRight[i - 1][0],
-                huNetUpdatesLeft[i - 1][0], huNetUpdatesRight[i - 1][0],
-                maxEdgeSpeed
-            );
-
-            wavePropagationSolver.computeNetUpdates (
-                h[i - 1][end_ny_1_1 - 1], h[i][end_ny_1_1 - 1],
-                hu[i - 1][end_ny_1_1 - 1], hu[i][end_ny_1_1 - 1],
-                b[i - 1][end_ny_1_1 - 1], b[i][end_ny_1_1 - 1],
-                hNetUpdatesLeft[i - 1][end_ny_1_1 - 2], hNetUpdatesRight[i - 1][end_ny_1_1 - 2],
-                huNetUpdatesLeft[i - 1][end_ny_1_1 - 2], huNetUpdatesRight[i - 1][end_ny_1_1 - 2],
-                maxEdgeSpeed
-            );
+			float maxEdgeSpeed;
+
+			wavePropagationSolver.computeNetUpdates (
+                		h[i - 1][1], h[i][1],
+		                hu[i - 1][1], hu[i][1],
+		                b[i - 1][1], b[i][1],
+		                hNetUpdatesLeft[i - 1][0], hNetUpdatesRight[i - 1][0],
+		                huNetUpdatesLeft[i - 1][0], huNetUpdatesRight[i - 1][0],
+		                maxEdgeSpeed
+	        	);
+
+			wavePropagationSolver.computeNetUpdates (
+		                h[i - 1][end_ny_1_1 - 1], h[i][end_ny_1_1 - 1],
+		                hu[i - 1][end_ny_1_1 - 1], hu[i][end_ny_1_1 - 1],
+		                b[i - 1][end_ny_1_1 - 1], b[i][end_ny_1_1 - 1],
+		                hNetUpdatesLeft[i - 1][end_ny_1_1 - 2], hNetUpdatesRight[i - 1][end_ny_1_1 - 2],
+		                huNetUpdatesLeft[i - 1][end_ny_1_1 - 2], huNetUpdatesRight[i - 1][end_ny_1_1 - 2],
+		                maxEdgeSpeed
+			);


 #ifdef LOOP_OPENMP
@@ -691,7 +708,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
 	 **************************************************************************************/
 #ifdef LOOP_OPENMP
 		// Use OpenMP for the outer loop
-		#pragma omp for schedule(static) nowait
+		#pragma omp for schedule(static) 
 #endif // LOOP_OPENMP

 #if  WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
@@ -713,11 +730,11 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
 			);

 			wavePropagationSolver.computeNetUpdates (
-				h[nx + 1][j - 1], h[nx + 1][j],
-				hv[nx + 1][j - 1], hv[nx + 1][j],
-				b[nx + 1][j - 1], b[nx + 1][j],
-				hNetUpdatesBelow[nx][j - 1], hNetUpdatesAbove[nx][j - 1],
-				hvNetUpdatesBelow[nx][j - 1], hvNetUpdatesAbove[nx][j - 1],
+				h[nx][j - 1], h[nx][j],
+				hv[nx][j - 1], hv[nx][j],
+				b[nx][j - 1], b[nx][j],
+				hNetUpdatesBelow[nx - 1][j - 1], hNetUpdatesAbove[nx - 1][j - 1],
+				hvNetUpdatesBelow[nx - 1][j - 1], hvNetUpdatesAbove[nx - 1][j - 1],
 				maxEdgeSpeed
 			);

@@ -732,7 +749,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()

 #ifdef LOOP_OPENMP
 		// Use OpenMP for the outer loop
-		#pragma omp for schedule(static) nowait
+		#pragma omp for schedule(static)
 #endif // LOOP_OPENMP

 #if  WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
@@ -741,7 +758,7 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
 		#pragma simd
 #endif // WAVE_PROPAGATION_SOLVER==4 and defined VECTORIZE
 	 	// Compute horizontal edges, for the top and bottom border
-		for (i = 1; i < nx + 2; i++) {
+		for (i = 1; i < nx + 1; i++) {
 			float maxEdgeSpeed;

 			wavePropagationSolver.computeNetUpdates (
@@ -753,6 +770,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
 				maxEdgeSpeed
 			);

+			std::cout << __FILE__ << ": " << __LINE__ << endl;
+
 			wavePropagationSolver.computeNetUpdates (
 				h[i][end_ny_1_2 - 2], h[i][end_ny_1_2 - 1],
 				hv[i][end_ny_1_2 - 2], hv[i][end_ny_1_2 - 1],
@@ -762,6 +781,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
 				maxEdgeSpeed
 			);

+			std::cout << __FILE__ << ": " << __LINE__ << endl;
+
 #ifdef LOOP_OPENMP
 			//update the thread-local maximum wave speed
 			l_maxWaveSpeed = std::max (l_maxWaveSpeed, maxEdgeSpeed);
@@ -806,6 +827,8 @@ SWE_WavePropagationBlock::computeNumericalFluxes_borders ()
 	time_needed += clock() - time_begin;
 #endif
 #endif
+
+	std::cout << __FILE__ << ": " << __LINE__ << endl;
 }

 /**

--- a/src/blocks/SWE_WavePropagationBlock.hh
+++ b/src/blocks/SWE_WavePropagationBlock.hh
@@ -139,6 +139,12 @@ public:
 	//computes the net-updates for the block
 	void computeNumericalFluxes ();

+	//computes the net-updates for the inner block only
+	void computeNumericalFluxes_innerBlock ();
+
+	//computes the net-updates for the borders only
+	void computeNumericalFluxes_borders ();
+
 	//update the cells
 	void updateUnknowns (float dt);


--- a/src/examples/swe_mpi.cpp
+++ b/src/examples/swe_mpi.cpp
@@ -89,6 +89,17 @@ void exchangeBottomTopGhostLayers( const int i_bottomNeighborRank, SWE_Block1D*
                                   const int i_topNeighborRank,    SWE_Block1D* o_topNeighborInflow,    SWE_Block1D* i_topNeighborOutflow,
                                   const MPI_Datatype i_mpiRow);

+// Asynchronously Exchanges the left and right ghost layers.
+MPI_Request* exchangeAsyncLeftRightGhostLayers( const int i_leftNeighborRank,  SWE_Block1D* o_leftInflow,  SWE_Block1D* i_leftOutflow,
+                                   const int i_rightNeighborRank, SWE_Block1D* o_rightInflow, SWE_Block1D* i_rightOutflow,
+                                   MPI_Datatype i_mpiCol);
+
+// Asynchronously Exchanges the bottom and top ghist layers.
+MPI_Request* exchangeAsyncBottomTopGhostLayers( const int i_bottomNeighborRank, SWE_Block1D* o_bottomNeighborInflow, SWE_Block1D* i_bottomNeighborOutflow,
+                                   const int i_topNeighborRank,    SWE_Block1D* o_topNeighborInflow,    SWE_Block1D* i_topNeighborOutflow,
+                                   const MPI_Datatype i_mpiRow);
+
+
 /**
 * Main program for the simulation on a single SWE_WavePropagationBlock.
 */
@@ -383,6 +394,8 @@ int main( int argc, char** argv ) {
                  l_topNeighborRank,    l_topInflow,    l_topOutflow,
                  l_mpiRow );

+  std::cout << "Exchanged Ghost Layers Initially\n";
+
  // Init fancy progressbar
  tools::ProgressBar progressBar(l_endSimulation, l_mpiRank);

@@ -436,7 +449,8 @@ int main( int argc, char** argv ) {

    // do time steps until next checkpoint is reached
    while( l_t < l_checkPoints[c] ) {
-      MPI_Request l_request;
+      MPI_Request* l_request;
+      MPI_Status status;

      //reset CPU-Communication clock
      tools::Logger::logger.resetClockToCurrentTime("CpuCommunication");
@@ -459,12 +473,18 @@ int main( int argc, char** argv ) {
      // compute numerical flux for inner edge
      l_wavePropgationBlock.computeNumericalFluxes_innerBlock(); // Only Inner Block

+      std::cout << __FILE__ << ": " << __LINE__ << endl;
+
      // checking if the borders have been exchanged
-      MPI_Wait(&l_request, &status);
+      MPI_Wait(l_request, &status);
+
+      std::cout << __FILE__ << ": " << __LINE__ << endl;

      // compute numerical flux for the blocks
      l_wavePropgationBlock.computeNumericalFluxes_borders(); // Only Borders 

+      std::cout << __FILE__ << ": " << __LINE__ << endl;
+
      //! maximum allowed time step width within a block.
      float l_maxTimeStepWidth = l_wavePropgationBlock.getMaxTimestep();

@@ -616,6 +636,8 @@ MPI_Request* exchangeAsyncLeftRightGhostLayers( const int i_leftNeighborRank,  S
  // MPI_Status l_status;
  MPI_Request* l_request;

+  l_request = (MPI_Request*) malloc(sizeof(MPI_Request));
+
  // int MPI_Sendrecv(void *sendbuf, int sendcount, MPI_Datatype sendtype, 
  //               int dest, int sendtag,
  //               void *recvbuf, int recvcount, MPI_Datatype recvtype,