Use vectorized fwave solver

parent 53852a8e
...@@ -91,11 +91,17 @@ vars.AddVariables( ...@@ -91,11 +91,17 @@ vars.AddVariables(
PathVariable( 'asagiInputDir', 'location of netcdf input files', '', PathVariable.PathAccept ), PathVariable( 'asagiInputDir', 'location of netcdf input files', '', PathVariable.PathAccept ),
EnumVariable( 'solver', 'Riemann solver', 'augrie', EnumVariable( 'solver', 'Riemann solver', 'augrie',
allowed_values=('rusanov', 'fwave', 'augrie', 'hybrid') allowed_values=('rusanov', 'fwave', 'augrie', 'hybrid', 'fwavevec')
), ),
BoolVariable( 'vectorize', 'add pragmas to help vectorization (release only)', True ),
BoolVariable( 'showVectorization', 'show loop vectorization (Intel compiler only)', False ), BoolVariable( 'showVectorization', 'show loop vectorization (Intel compiler only)', False ),
EnumVariable( 'platform', 'compile for a specific platform (Intel compiler only', 'default',
allowed_values=('default', 'mic' )
),
BoolVariable( 'xmlRuntime', 'use a xml-file for runtime parameters', False ) BoolVariable( 'xmlRuntime', 'use a xml-file for runtime parameters', False )
) )
...@@ -187,8 +193,14 @@ elif env['compileMode'] == 'release': ...@@ -187,8 +193,14 @@ elif env['compileMode'] == 'release':
env.Append(CCFLAGS=['-fstrict-aliasing', '-fargument-noalias']) env.Append(CCFLAGS=['-fstrict-aliasing', '-fargument-noalias'])
# Vectorization? # Vectorization?
if env['compileMode'] == 'release' and env['vectorize']:
env.Append(CPPDEFINES=['VECTORIZE'])
if env['compiler'] == 'intel' and env['showVectorization']: if env['compiler'] == 'intel' and env['showVectorization']:
env.Append(CCFLAGS=['-vec-report3']) env.Append(CCFLAGS=['-vec-report2'])
# Platform
if env['compiler'] == 'intel' and env['platform'] == 'mic':
env.Append(CCFLAGS=['-mmic'])
# set the precompiler variables for the solver # set the precompiler variables for the solver
if env['solver'] == 'fwave': if env['solver'] == 'fwave':
...@@ -197,6 +209,8 @@ elif env['solver'] == 'augrie': ...@@ -197,6 +209,8 @@ elif env['solver'] == 'augrie':
env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=2']) env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=2'])
elif env['solver'] == 'hybrid': elif env['solver'] == 'hybrid':
env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=0']) env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=0'])
elif env['solver'] == 'fwavevec':
env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=4'])
# set the precompiler flags for serial version # set the precompiler flags for serial version
if env['parallelization'] in ['none', 'cuda']: if env['parallelization'] in ['none', 'cuda']:
......
...@@ -34,6 +34,18 @@ ...@@ -34,6 +34,18 @@
#include <omp.h> #include <omp.h>
#endif #endif
#ifndef PHYSICAL_VECTOR_SIZE
#define PHYSICAL_VECTOR_SIZE 128
#endif
#include <cstdlib>
#if WAVE_PROPAGATION_SOLVER==4
/** The number of floats that fit in one vector register */
#define VECTOR_LENGTH (PHYSICAL_VECTOR_SIZE/8/sizeof(float))
#else
#define VECTOR_LENGTH 1
#endif
/** /**
* Constructor of a SWE_WavePropagationBlock. * Constructor of a SWE_WavePropagationBlock.
* *
...@@ -103,10 +115,13 @@ SWE_WavePropagationBlock::SWE_WavePropagationBlock(): ...@@ -103,10 +115,13 @@ SWE_WavePropagationBlock::SWE_WavePropagationBlock():
* Compute net updates for the block. * Compute net updates for the block.
* The member variable #maxTimestep will be updated with the * The member variable #maxTimestep will be updated with the
* maximum allowed time step size * maximum allowed time step size
*
* @todo vectorization only works correct if PHYSICAL_VECTOR_SIZE is
* according to the hardware
*/ */
void SWE_WavePropagationBlock::computeNumericalFluxes() { void SWE_WavePropagationBlock::computeNumericalFluxes() {
//maximum (linearized) wave speed within one iteration //maximum (linearized) wave speed within one iteration
float maxWaveSpeed = (float) 0.; float maxWaveSpeed[VECTOR_LENGTH] = {(float) 0.};
//compute the net-updates for the vertical edges //compute the net-updates for the vertical edges
#ifdef LOOP_OPENMP #ifdef LOOP_OPENMP
...@@ -116,16 +131,21 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -116,16 +131,21 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
solver::Hybrid<float> wavePropagationSolver; solver::Hybrid<float> wavePropagationSolver;
#pragma omp for #pragma omp for
#endif #endif
float maxEdgeSpeed[VECTOR_LENGTH];
for(int i = 1; i < nx+2; i++) { for(int i = 1; i < nx+2; i++) {
#if WAVE_PROPAGATION_SOLVER==4
#ifdef VECTORIZE
#pragma simd vectorlength(VECTOR_LENGTH)
#endif
#endif
for(int j = 1; j < ny+1; j++) { for(int j = 1; j < ny+1; j++) {
float maxEdgeSpeed;
#if WAVE_PROPAGATION_SOLVER!=3 #if WAVE_PROPAGATION_SOLVER!=3
wavePropagationSolver.computeNetUpdates( h[i-1][j], h[i][j], wavePropagationSolver.computeNetUpdates( h[i-1][j], h[i][j],
hu[i-1][j], hu[i][j], hu[i-1][j], hu[i][j],
b[i-1][j], b[i][j], b[i-1][j], b[i][j],
hNetUpdatesLeft[i-1][j-1], hNetUpdatesRight[i-1][j-1], hNetUpdatesLeft[i-1][j-1], hNetUpdatesRight[i-1][j-1],
huNetUpdatesLeft[i-1][j-1], huNetUpdatesRight[i-1][j-1], huNetUpdatesLeft[i-1][j-1], huNetUpdatesRight[i-1][j-1],
maxEdgeSpeed ); maxEdgeSpeed[(j-1)%VECTOR_LENGTH] );
#else #else
//TODO: implement again. //TODO: implement again.
assert(false); assert(false);
...@@ -136,7 +156,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -136,7 +156,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed); l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed);
#else #else
//update the maximum wave speed //update the maximum wave speed
maxWaveSpeed = std::max(maxWaveSpeed, maxEdgeSpeed); maxWaveSpeed[(j-1)%VECTOR_LENGTH] = std::max(maxWaveSpeed[(j-1)%VECTOR_LENGTH], maxEdgeSpeed[(j-1)%VECTOR_LENGTH]);
#endif #endif
} }
} }
...@@ -146,15 +166,19 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -146,15 +166,19 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
#pragma omp for #pragma omp for
#endif #endif
for(int i = 1; i < nx+1; i++) { for(int i = 1; i < nx+1; i++) {
#if WAVE_PROPAGATION_SOLVER==4
#ifdef VECTORIZE
#pragma simd vectorlength(VECTOR_LENGTH)
#endif
#endif
for(int j = 1; j < ny+2; j++) { for(int j = 1; j < ny+2; j++) {
float maxEdgeSpeed;
#if WAVE_PROPAGATION_SOLVER!=3 #if WAVE_PROPAGATION_SOLVER!=3
wavePropagationSolver.computeNetUpdates( h[i][j-1], h[i][j], wavePropagationSolver.computeNetUpdates( h[i][j-1], h[i][j],
hv[i][j-1], hv[i][j], hv[i][j-1], hv[i][j],
b[i][j-1], b[i][j], b[i][j-1], b[i][j],
hNetUpdatesBelow[i-1][j-1], hNetUpdatesAbove[i-1][j-1], hNetUpdatesBelow[i-1][j-1], hNetUpdatesAbove[i-1][j-1],
hvNetUpdatesBelow[i-1][j-1], hvNetUpdatesAbove[i-1][j-1], hvNetUpdatesBelow[i-1][j-1], hvNetUpdatesAbove[i-1][j-1],
maxEdgeSpeed ); maxEdgeSpeed[(j-1)%VECTOR_LENGTH] );
#else #else
//TODO: implement again. //TODO: implement again.
assert(false); assert(false);
...@@ -165,7 +189,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -165,7 +189,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed); l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed);
#else #else
//update the maximum wave speed //update the maximum wave speed
maxWaveSpeed = std::max(maxWaveSpeed, maxEdgeSpeed); maxWaveSpeed[(j-1)%VECTOR_LENGTH] = std::max(maxWaveSpeed[(j-1)%VECTOR_LENGTH], maxEdgeSpeed[(j-1)%VECTOR_LENGTH]);
#endif #endif
} }
} }
...@@ -177,12 +201,19 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -177,12 +201,19 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
} // end of parallel for block } // end of parallel for block
#endif #endif
if(maxWaveSpeed > 0.00001) { //TODO zeroTol #if WAVE_PROPAGATION_SOLVER==4
// Get global maximum
for (int i = 1; i < VECTOR_LENGTH; i++)
if (maxWaveSpeed[i] > maxWaveSpeed[0])
maxWaveSpeed[0] = maxWaveSpeed[i];
#endif
if(maxWaveSpeed[0] > 0.00001) { //TODO zeroTol
//compute the time step width //compute the time step width
//CFL-Codition //CFL-Codition
//(max. wave speed) * dt / dx < .5 //(max. wave speed) * dt / dx < .5
// => dt = .5 * dx/(max wave speed) // => dt = .5 * dx/(max wave speed)
maxTimestep = std::min( dx/maxWaveSpeed, dy/maxWaveSpeed ); maxTimestep = std::min( dx/maxWaveSpeed[0], dy/maxWaveSpeed[0] );
// #if WAVE_PROPAGATION_SOLVER!=3 // #if WAVE_PROPAGATION_SOLVER!=3
maxTimestep *= (float) .4; //CFL-number = .5 maxTimestep *= (float) .4; //CFL-number = .5
...@@ -206,8 +237,10 @@ void SWE_WavePropagationBlock::updateUnknowns(float dt) { ...@@ -206,8 +237,10 @@ void SWE_WavePropagationBlock::updateUnknowns(float dt) {
#pragma omp parallel for #pragma omp parallel for
#endif #endif
for(int i = 1; i < nx+1; i++) { for(int i = 1; i < nx+1; i++) {
#ifdef VECTORIZE
// Tell the compiler that he can safely ignore all dependencies in this loop // Tell the compiler that he can safely ignore all dependencies in this loop
#pragma ivdep #pragma ivdep
#endif
for(int j = 1; j < ny+1; j++) { for(int j = 1; j < ny+1; j++) {
h[i][j] -= dt/dx * (hNetUpdatesRight[i-1][j-1] + hNetUpdatesLeft[i][j-1]) h[i][j] -= dt/dx * (hNetUpdatesRight[i-1][j-1] + hNetUpdatesLeft[i][j-1])
+ dt/dy * (hNetUpdatesAbove[i-1][j-1] + hNetUpdatesBelow[i-1][j]); + dt/dy * (hNetUpdatesAbove[i-1][j-1] + hNetUpdatesBelow[i-1][j]);
......
...@@ -46,6 +46,8 @@ ...@@ -46,6 +46,8 @@
#include "solvers/AugRie.hpp" #include "solvers/AugRie.hpp"
#elif WAVE_PROPAGATION_SOLVER==3 #elif WAVE_PROPAGATION_SOLVER==3
#include "solvers/AugRieGeoClaw.hpp" #include "solvers/AugRieGeoClaw.hpp"
#elif WAVE_PROPAGATION_SOLVER==4
#include "solvers/FWaveVec.hpp"
#else #else
#include "solvers/Hybrid.hpp" #include "solvers/Hybrid.hpp"
#endif #endif
...@@ -72,6 +74,9 @@ class SWE_WavePropagationBlock: public SWE_Block { ...@@ -72,6 +74,9 @@ class SWE_WavePropagationBlock: public SWE_Block {
#elif WAVE_PROPAGATION_SOLVER==3 #elif WAVE_PROPAGATION_SOLVER==3
//! Approximate Augmented Riemann solver //! Approximate Augmented Riemann solver
solver::AugRieGeoClaw<double> wavePropagationSolver; solver::AugRieGeoClaw<double> wavePropagationSolver;
#elif WAVE_PROPAGATION_SOLVER==4
//! Vectorized FWave solver
solver::FWaveVec<float> wavePropagationSolver;
#else #else
//! Hybrid solver (f-wave + augmented) //! Hybrid solver (f-wave + augmented)
solver::Hybrid<float> wavePropagationSolver; solver::Hybrid<float> wavePropagationSolver;
......
Subproject commit 89e95490307a6eb5927d38d5e26082960f8a1217 Subproject commit 92dd00ca89633014167f1f31e66bb2671e1acf2f
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment