Vectorize updateUnknowns

parent a313b7d7
...@@ -184,7 +184,7 @@ elif env['compileMode'] == 'release': ...@@ -184,7 +184,7 @@ elif env['compileMode'] == 'release':
env.Append(CCFLAGS=['-O2']) env.Append(CCFLAGS=['-O2'])
# Other compiler flags (for all compilers) # Other compiler flags (for all compilers)
env.Append(CCFLAGS=['-fstrict-aliasing']) env.Append(CCFLAGS=['-fstrict-aliasing', '-fargument-noalias'])
# Vectorization? # Vectorization?
if env['compiler'] == 'intel' and env['showVectorization']: if env['compiler'] == 'intel' and env['showVectorization']:
......
...@@ -206,6 +206,8 @@ void SWE_WavePropagationBlock::updateUnknowns(float dt) { ...@@ -206,6 +206,8 @@ void SWE_WavePropagationBlock::updateUnknowns(float dt) {
#pragma omp parallel for #pragma omp parallel for
#endif #endif
for(int i = 1; i < nx+1; i++) { for(int i = 1; i < nx+1; i++) {
// Tell the compiler that he can safely ignore all dependencies in this loop
#pragma ivdep
for(int j = 1; j < ny+1; j++) { for(int j = 1; j < ny+1; j++) {
h[i][j] -= dt/dx * (hNetUpdatesRight[i-1][j-1] + hNetUpdatesLeft[i][j-1]) h[i][j] -= dt/dx * (hNetUpdatesRight[i-1][j-1] + hNetUpdatesLeft[i][j-1])
+ dt/dy * (hNetUpdatesAbove[i-1][j-1] + hNetUpdatesBelow[i-1][j]); + dt/dy * (hNetUpdatesAbove[i-1][j-1] + hNetUpdatesBelow[i-1][j]);
...@@ -218,10 +220,14 @@ void SWE_WavePropagationBlock::updateUnknowns(float dt) { ...@@ -218,10 +220,14 @@ void SWE_WavePropagationBlock::updateUnknowns(float dt) {
//TODO: dryTol //TODO: dryTol
if(h[i][j] < 0) { if(h[i][j] < 0) {
#ifndef NDEBUG
// Only print this warning when debug is enabled.
// Otherwise we cannot vectorize this loop
if(h[i][j] < -0.1) { if(h[i][j] < -0.1) {
std::cerr << "Warning, negative height: (i,j)=(" << i << "," << j << ")=" << h[i][j] << std::endl; std::cerr << "Warning, negative height: (i,j)=(" << i << "," << j << ")=" << h[i][j] << std::endl;
std::cerr << " b: " << b[i][j] << std::endl; std::cerr << " b: " << b[i][j] << std::endl;
} }
#endif
//zero (small) negative depths //zero (small) negative depths
h[i][j] = hu[i][j] = hv[i][j] = 0.; h[i][j] = hu[i][j] = hv[i][j] = 0.;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment