Let the compiler do the simd max reduction

parent 1d292f3e
...@@ -114,8 +114,6 @@ vars.AddVariables( ...@@ -114,8 +114,6 @@ vars.AddVariables(
allowed_values=('default', 'mic' ) allowed_values=('default', 'mic' )
), ),
('physicalVectorSize', 'size of vector registers is bits', 128, isInt, toInt),
BoolVariable( 'xmlRuntime', 'use a xml-file for runtime parameters', False ) BoolVariable( 'xmlRuntime', 'use a xml-file for runtime parameters', False )
) )
...@@ -230,7 +228,7 @@ elif env['solver'] == 'augrie': ...@@ -230,7 +228,7 @@ elif env['solver'] == 'augrie':
elif env['solver'] == 'hybrid': elif env['solver'] == 'hybrid':
env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=0']) env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=0'])
elif env['solver'] == 'fwavevec': elif env['solver'] == 'fwavevec':
env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=4', 'PHYSICAL_VECTOR_SIZE='+str(env['physicalVectorSize'])]) env.Append(CPPDEFINES=['WAVE_PROPAGATION_SOLVER=4'])
# set the precompiler flags for CUDA # set the precompiler flags for CUDA
if env['parallelization'] in ['cuda', 'mpi_with_cuda']: if env['parallelization'] in ['cuda', 'mpi_with_cuda']:
......
...@@ -35,13 +35,6 @@ ...@@ -35,13 +35,6 @@
#include <omp.h> #include <omp.h>
#endif #endif
#if WAVE_PROPAGATION_SOLVER==4
/** The number of floats that fit in one vector register */
#define VECTOR_LENGTH (PHYSICAL_VECTOR_SIZE/8/sizeof(float))
#else
#define VECTOR_LENGTH 1
#endif
/** /**
* Constructor of a SWE_WavePropagationBlock. * Constructor of a SWE_WavePropagationBlock.
* *
...@@ -114,7 +107,7 @@ SWE_WavePropagationBlock::SWE_WavePropagationBlock(): ...@@ -114,7 +107,7 @@ SWE_WavePropagationBlock::SWE_WavePropagationBlock():
*/ */
void SWE_WavePropagationBlock::computeNumericalFluxes() { void SWE_WavePropagationBlock::computeNumericalFluxes() {
//maximum (linearized) wave speed within one iteration //maximum (linearized) wave speed within one iteration
float maxWaveSpeed[VECTOR_LENGTH] = {(float) 0.}; float maxWaveSpeed = (float) 0.;
//compute the net-updates for the vertical edges //compute the net-updates for the vertical edges
#ifdef LOOP_OPENMP #ifdef LOOP_OPENMP
...@@ -127,7 +120,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -127,7 +120,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
for(int i = 1; i < nx+2; i++) { for(int i = 1; i < nx+2; i++) {
#if WAVE_PROPAGATION_SOLVER==4 #if WAVE_PROPAGATION_SOLVER==4
#ifdef VECTORIZE #ifdef VECTORIZE
#pragma simd vectorlength(VECTOR_LENGTH) #pragma simd
#endif #endif
#endif #endif
for(int j = 1; j < ny+1; j++) { for(int j = 1; j < ny+1; j++) {
...@@ -149,7 +142,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -149,7 +142,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed); l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed);
#else #else
//update the maximum wave speed //update the maximum wave speed
maxWaveSpeed[(j-1)%VECTOR_LENGTH] = std::max(maxWaveSpeed[(j-1)%VECTOR_LENGTH], maxEdgeSpeed); maxWaveSpeed = std::max(maxWaveSpeed, maxEdgeSpeed);
#endif #endif
} }
} }
...@@ -161,7 +154,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -161,7 +154,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
for(int i = 1; i < nx+1; i++) { for(int i = 1; i < nx+1; i++) {
#if WAVE_PROPAGATION_SOLVER==4 #if WAVE_PROPAGATION_SOLVER==4
#ifdef VECTORIZE #ifdef VECTORIZE
#pragma simd vectorlength(VECTOR_LENGTH) #pragma simd
#endif #endif
#endif #endif
for(int j = 1; j < ny+2; j++) { for(int j = 1; j < ny+2; j++) {
...@@ -183,7 +176,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -183,7 +176,7 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed); l_maxWaveSpeed = std::max(l_maxWaveSpeed, maxEdgeSpeed);
#else #else
//update the maximum wave speed //update the maximum wave speed
maxWaveSpeed[(j-1)%VECTOR_LENGTH] = std::max(maxWaveSpeed[(j-1)%VECTOR_LENGTH], maxEdgeSpeed); maxWaveSpeed = std::max(maxWaveSpeed, maxEdgeSpeed);
#endif #endif
} }
} }
...@@ -195,19 +188,12 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() { ...@@ -195,19 +188,12 @@ void SWE_WavePropagationBlock::computeNumericalFluxes() {
} // end of parallel for block } // end of parallel for block
#endif #endif
#if WAVE_PROPAGATION_SOLVER==4 if(maxWaveSpeed > 0.00001) { //TODO zeroTol
// Get global maximum
for (int i = 1; i < VECTOR_LENGTH; i++)
if (maxWaveSpeed[i] > maxWaveSpeed[0])
maxWaveSpeed[0] = maxWaveSpeed[i];
#endif
if(maxWaveSpeed[0] > 0.00001) { //TODO zeroTol
//compute the time step width //compute the time step width
//CFL-Codition //CFL-Codition
//(max. wave speed) * dt / dx < .5 //(max. wave speed) * dt / dx < .5
// => dt = .5 * dx/(max wave speed) // => dt = .5 * dx/(max wave speed)
maxTimestep = std::min( dx/maxWaveSpeed[0], dy/maxWaveSpeed[0] ); maxTimestep = std::min( dx/maxWaveSpeed, dy/maxWaveSpeed );
// #if WAVE_PROPAGATION_SOLVER!=3 // #if WAVE_PROPAGATION_SOLVER!=3
maxTimestep *= (float) .4; //CFL-number = .5 maxTimestep *= (float) .4; //CFL-number = .5
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment