Commit 3006f1f7 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Loop Blocking

To improve Cache Hit Ratio for larger resolutions
Signed-off-by: 's avatarGaurav Kukreja <mailme.gaurav@gmail.com>
parent 40d2c9da
......@@ -12,6 +12,7 @@
#include "heat.h"
#include "timing.h"
#define BLOCKSIZE 100
void usage( char *s )
{
......@@ -26,6 +27,8 @@ int main( int argc, char *argv[] )
FILE *infile, *resfile;
char *resfilename;
unsigned BlockSize = BLOCKSIZE;
double *tmp;
// algorithmic parameters
......@@ -116,7 +119,7 @@ int main( int argc, char *argv[] )
case 0: // JACOBI
residual = relax_jacobi_return_residual(param.u, param.uhelp, np, np);
residual = relax_jacobi_return_residual(param.u, param.uhelp, np, np, BlockSize);
tmp = param.u;
param.u = param.uhelp;
param.uhelp = tmp;
......
......@@ -62,7 +62,7 @@ double residual_jacobi( double *u,
unsigned sizex, unsigned sizey );
#endif
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey );
unsigned sizex, unsigned sizey, unsigned BlockSize );
#endif // JACOBI_H_INCLUDED
......@@ -40,18 +40,22 @@ double residual_jacobi( double *u,
* One Jacobi iteration step
*/
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey )
unsigned sizex, unsigned sizey, unsigned BlockSize )
{
int i, j;
int i, j, k, l;
double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
unsigned BlockCountX = (sizex-2)/BlockSize;
unsigned BlockCountY = (sizey-2)/BlockSize;
for ( k = 0; k < BlockCountY; k++)
for ( l = 0; l < BlockCountX; l++)
{
for( i=1 + BlockSize*k; i <= BlockSize*(k+1); i++ )
for( j=1 + BlockSize*l; j <= BlockSize*(l+1); j++ )
{
utmp[i*sizex + j]= 0.25 * (u[ i*sizex+j -1 ]+ // left
u[ i*sizex+j +1 ]+ // right
utmp[i*sizex+j]= 0.25 * (u[ i*sizex + (j-1) ]+ // left
u[ i*sizex + (j+1) ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
......@@ -59,6 +63,5 @@ double relax_jacobi_return_residual( double *u, double *utmp,
sum += diff * diff;
}
}
}
return sum;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment