Commit 3006f1f7 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Loop Blocking

To improve Cache Hit Ratio for larger resolutions
Signed-off-by: 's avatarGaurav Kukreja <mailme.gaurav@gmail.com>
parent 40d2c9da
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "heat.h" #include "heat.h"
#include "timing.h" #include "timing.h"
#define BLOCKSIZE 100
void usage( char *s ) void usage( char *s )
{ {
...@@ -26,6 +27,8 @@ int main( int argc, char *argv[] ) ...@@ -26,6 +27,8 @@ int main( int argc, char *argv[] )
FILE *infile, *resfile; FILE *infile, *resfile;
char *resfilename; char *resfilename;
unsigned BlockSize = BLOCKSIZE;
double *tmp; double *tmp;
// algorithmic parameters // algorithmic parameters
...@@ -116,7 +119,7 @@ int main( int argc, char *argv[] ) ...@@ -116,7 +119,7 @@ int main( int argc, char *argv[] )
case 0: // JACOBI case 0: // JACOBI
residual = relax_jacobi_return_residual(param.u, param.uhelp, np, np); residual = relax_jacobi_return_residual(param.u, param.uhelp, np, np, BlockSize);
tmp = param.u; tmp = param.u;
param.u = param.uhelp; param.u = param.uhelp;
param.uhelp = tmp; param.uhelp = tmp;
......
...@@ -62,7 +62,7 @@ double residual_jacobi( double *u, ...@@ -62,7 +62,7 @@ double residual_jacobi( double *u,
unsigned sizex, unsigned sizey ); unsigned sizex, unsigned sizey );
#endif #endif
double relax_jacobi_return_residual( double *u, double *utmp, double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey ); unsigned sizex, unsigned sizey, unsigned BlockSize );
#endif // JACOBI_H_INCLUDED #endif // JACOBI_H_INCLUDED
...@@ -40,25 +40,28 @@ double residual_jacobi( double *u, ...@@ -40,25 +40,28 @@ double residual_jacobi( double *u,
* One Jacobi iteration step * One Jacobi iteration step
*/ */
double relax_jacobi_return_residual( double *u, double *utmp, double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey ) unsigned sizex, unsigned sizey, unsigned BlockSize )
{ {
int i, j; int i, j, k, l;
double unew, diff, sum=0.0; double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ ) unsigned BlockCountX = (sizex-2)/BlockSize;
{ unsigned BlockCountY = (sizey-2)/BlockSize;
for( j=1; j<sizex-1; j++ )
for ( k = 0; k < BlockCountY; k++)
for ( l = 0; l < BlockCountX; l++)
{ {
{ for( i=1 + BlockSize*k; i <= BlockSize*(k+1); i++ )
utmp[i*sizex + j]= 0.25 * (u[ i*sizex+j -1 ]+ // left for( j=1 + BlockSize*l; j <= BlockSize*(l+1); j++ )
u[ i*sizex+j +1 ]+ // right {
u[ (i-1)*sizex + j ]+ // top utmp[i*sizex+j]= 0.25 * (u[ i*sizex + (j-1) ]+ // left
u[ (i+1)*sizex + j ]); // bottom u[ i*sizex + (j+1) ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
diff = utmp[i*sizex + j] - u[i*sizex +j]; diff = utmp[i*sizex + j] - u[i*sizex +j];
sum += diff * diff; sum += diff * diff;
} }
} }
}
return sum; return sum;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment