Use vectorized fwave solver

2715e5a9 · Sebastian Rettenberger · 53852a8e · 2715e5a9 · 2715e5a9 · 2715e5a9
Commit 2715e5a9 authored Nov 16, 2012 by Sebastian Rettenberger
Showing with 64 additions and 12 deletions

SConstruct SConstruct +16 -2

SWE_WavePropagationBlock.cpp src/SWE_WavePropagationBlock.cpp +42 -9

SWE_WavePropagationBlock.hh src/SWE_WavePropagationBlock.hh +5 -0

swe_solvers submodules/swe_solvers +1 -1

No files found.
--- a/SConstruct
+++ b/SConstruct
@@ -91,11 +91,17 @@ vars.AddVariables(
  PathVariable( 'asagiInputDir', 'location of netcdf input files', '', PathVariable.PathAccept ),

  EnumVariable( 'solver', 'Riemann solver', 'augrie',
-                allowed_values=('rusanov', 'fwave', 'augrie', 'hybrid')
+                allowed_values=('rusanov', 'fwave', 'augrie', 'hybrid', 'fwavevec')
              ),
                  
+  BoolVariable( 'vectorize', 'add pragmas to help vectorization (release only)', True ),
+                  
  BoolVariable( 'showVectorization', 'show loop vectorization (Intel compiler only)', False ),

+  EnumVariable( 'platform', 'compile for a specific platform (Intel compiler only', 'default',
+                allowed_values=('default', 'mic' )
+              ),
+
  BoolVariable( 'xmlRuntime', 'use a xml-file for runtime parameters', False )
 )

@@ -187,8 +193,14 @@ elif env['compileMode'] == 'release':
 env.Append(CCFLAGS=['-fstrict-aliasing', '-fargument-noalias'])

 # Vectorization?
+if env['compileMode'] == 'release' and env['vectorize']:
+  env.Append(CPPDEFINES=['VECTORIZE'])
 if env['compiler'] == 'intel' and env['showVectorization']:
-  env.Append(CCFLAGS=['-vec-report3'])
+  env.Append(CCFLAGS=['-vec-report2'])
+  
+# Platform
+if env['compiler'] == 'intel' and env['platform'] == 'mic':
+  env.Append(CCFLAGS=['-mmic'])

 # set the precompiler variables for the solver
 if env['solver'] == 'fwave':
@@ -197,6 +209,8 @@ elif env['solver'] == 'augrie':
  env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=2'])
 elif env['solver'] == 'hybrid':
  env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=0'])
+elif env['solver'] == 'fwavevec':
+  env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=4'])

 # set the precompiler flags for serial version
 if env['parallelization'] in ['none', 'cuda']:

--- a/src/SWE_WavePropagationBlock.cpp
+++ b/src/SWE_WavePropagationBlock.cpp
@@ -34,6 +34,18 @@
 #include <omp.h>
 #endif

+#ifndef PHYSICAL_VECTOR_SIZE
+#define PHYSICAL_VECTOR_SIZE 128
+#endif
+#include <cstdlib>
+
+#if  WAVE_PROPAGATION_SOLVER==4
+/** The number of floats that fit in one vector register */
+#define VECTOR_LENGTH (PHYSICAL_VECTOR_SIZE/8/sizeof(float))
+#else
+#define VECTOR_LENGTH 1
+#endif
+
 /**
 * Constructor of a SWE_WavePropagationBlock.
 *
@@ -103,10 +115,13 @@ SWE_WavePropagationBlock::SWE_WavePropagationBlock():
 * Compute net updates for the block.
 * The member variable #maxTimestep will be updated with the 
 * maximum allowed time step size
+ *
+ * @todo vectorization only works correct if PHYSICAL_VECTOR_SIZE is
+ *  according to the hardware
 */
 void SWE_WavePropagationBlock::computeNumericalFluxes() {
  //maximum (linearized) wave speed within one iteration
-  float maxWaveSpeed = (float) 0.;
+  float maxWaveSpeed[VECTOR_LENGTH] = {(float) 0.};

  //compute the net-updates for the vertical edges
  #ifdef LOOP_OPENMP
@@ -116,16 +131,21 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
  solver::Hybrid<float> wavePropagationSolver;
  #pragma omp for
  #endif
+  float maxEdgeSpeed[VECTOR_LENGTH];
  for(int i = 1; i < nx+2; i++) {
+#if  WAVE_PROPAGATION_SOLVER==4
+#ifdef VECTORIZE
+	#pragma simd vectorlength(VECTOR_LENGTH)
+#endif
+#endif
    for(int j = 1; j < ny+1; j++) {
-      float maxEdgeSpeed;
      #if WAVE_PROPAGATION_SOLVER!=3
      wavePropagationSolver.computeNetUpdates( h[i-1][j], h[i][j],
                                               hu[i-1][j], hu[i][j],
                                               b[i-1][j], b[i][j],
                                               hNetUpdatesLeft[i-1][j-1], hNetUpdatesRight[i-1][j-1],
                                               huNetUpdatesLeft[i-1][j-1], huNetUpdatesRight[i-1][j-1],
-                                               maxEdgeSpeed );
+                                               maxEdgeSpeed[(j-1)%VECTOR_LENGTH] );
     #else
     //TODO: implement again.
     assert(false);
@@ -136,7 +156,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
      l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed);
      #else
      //update the maximum wave speed
-      maxWaveSpeed = std::max(maxWaveSpeed, maxEdgeSpeed);
+      maxWaveSpeed[(j-1)%VECTOR_LENGTH] = std::max(maxWaveSpeed[(j-1)%VECTOR_LENGTH], maxEdgeSpeed[(j-1)%VECTOR_LENGTH]);
      #endif
    }
  }
@@ -146,15 +166,19 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
  #pragma omp for
  #endif
  for(int i = 1; i < nx+1; i++) {
+#if  WAVE_PROPAGATION_SOLVER==4
+#ifdef VECTORIZE
+	#pragma simd vectorlength(VECTOR_LENGTH)
+#endif
+#endif
    for(int j = 1; j < ny+2; j++) {
-      float maxEdgeSpeed;
      #if WAVE_PROPAGATION_SOLVER!=3
      wavePropagationSolver.computeNetUpdates( h[i][j-1], h[i][j],
                                               hv[i][j-1], hv[i][j],
                                               b[i][j-1], b[i][j],
                                               hNetUpdatesBelow[i-1][j-1], hNetUpdatesAbove[i-1][j-1],
                                               hvNetUpdatesBelow[i-1][j-1], hvNetUpdatesAbove[i-1][j-1],
-                                               maxEdgeSpeed );
+                                               maxEdgeSpeed[(j-1)%VECTOR_LENGTH] );
      #else
      //TODO: implement again.
      assert(false);
@@ -165,7 +189,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
      l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed);
      #else
      //update the maximum wave speed
-      maxWaveSpeed = std::max(maxWaveSpeed, maxEdgeSpeed);
+      maxWaveSpeed[(j-1)%VECTOR_LENGTH] = std::max(maxWaveSpeed[(j-1)%VECTOR_LENGTH], maxEdgeSpeed[(j-1)%VECTOR_LENGTH]);
      #endif
    }
  }
@@ -177,12 +201,19 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
  } // end of parallel for block
  #endif

-  if(maxWaveSpeed > 0.00001) { //TODO zeroTol
+#if  WAVE_PROPAGATION_SOLVER==4
+  // Get global maximum
+  for (int i = 1; i < VECTOR_LENGTH; i++)
+	  if (maxWaveSpeed[i] > maxWaveSpeed[0])
+		  maxWaveSpeed[0] = maxWaveSpeed[i];
+#endif
+
+  if(maxWaveSpeed[0] > 0.00001) { //TODO zeroTol
    //compute the time step width
    //CFL-Codition
    //(max. wave speed) * dt / dx < .5
    // => dt = .5 * dx/(max wave speed)
-    maxTimestep = std::min( dx/maxWaveSpeed, dy/maxWaveSpeed );
+    maxTimestep = std::min( dx/maxWaveSpeed[0], dy/maxWaveSpeed[0] );

 //    #if WAVE_PROPAGATION_SOLVER!=3
    maxTimestep *= (float) .4; //CFL-number = .5
@@ -206,8 +237,10 @@ void SWE_WavePropagationBlock::updateUnknowns(float dt) {
  #pragma omp parallel for
  #endif
  for(int i = 1; i < nx+1; i++) {
+#ifdef VECTORIZE
 	  // Tell the compiler that he can safely ignore all dependencies in this loop
 	  #pragma ivdep
+#endif
      for(int j = 1; j < ny+1; j++) {
        h[i][j] -=   dt/dx * (hNetUpdatesRight[i-1][j-1] + hNetUpdatesLeft[i][j-1])
                   + dt/dy * (hNetUpdatesAbove[i-1][j-1] + hNetUpdatesBelow[i-1][j]);

--- a/src/SWE_WavePropagationBlock.hh
+++ b/src/SWE_WavePropagationBlock.hh
@@ -46,6 +46,8 @@
 #include "solvers/AugRie.hpp"
 #elif WAVE_PROPAGATION_SOLVER==3
 #include "solvers/AugRieGeoClaw.hpp"
+#elif WAVE_PROPAGATION_SOLVER==4
+#include "solvers/FWaveVec.hpp"
 #else
 #include "solvers/Hybrid.hpp"
 #endif
@@ -72,6 +74,9 @@ class SWE_WavePropagationBlock: public SWE_Block {
 #elif WAVE_PROPAGATION_SOLVER==3
    //! Approximate Augmented Riemann solver
    solver::AugRieGeoClaw<double> wavePropagationSolver;
+#elif WAVE_PROPAGATION_SOLVER==4
+    //! Vectorized FWave solver
+    solver::FWaveVec<float> wavePropagationSolver;
 #else
    //! Hybrid solver (f-wave + augmented)
    solver::Hybrid<float> wavePropagationSolver;

--- a/swe_solvers @ 92dd00ca
+++ b/swe_solvers @ 92dd00ca
-Subproject commit 89e95490307a6eb5927d38d5e26082960f8a1217
+Subproject commit 92dd00ca89633014167f1f31e66bb2671e1acf2f