Commit ae696c66 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Assignment 4 Chunk

 * Gauss 2D Non-blocking works for 2x2, 1x4, 4x1
 * Gauss special code for 1x4
 * Gauss special code for 4x1
Signed-off-by: 's avatarGaurav Kukreja <mailme.gaurav@gmail.com>
parent 099699b9
/*
* relax_gauss.c
*
* Gauss-Seidel Relaxation
*
*/
#include "heat.h"
/*
* Residual (length of error vector)
* between current solution and next after a Gauss-Seidel step
*
* Temporary array utmp needed to not change current solution
*
* Flop count in inner body is 7
*/
double residual_gauss( double *u, double *utmp,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
double unew, diff, sum=0.0;
// first row (boundary condition) into utmp
for( j=1; j<sizex-1; j++ )
utmp[0*sizex + j] = u[0*sizex + j];
// first column (boundary condition) into utmp
for( i=1; i<sizey-1; i++ )
utmp[i*sizex + 0] = u[i*sizex + 0];
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
unew = 0.25 * (utmp[ i*sizex + (j-1) ]+ // new left
u [ i*sizex + (j+1) ]+ // right
utmp[ (i-1)*sizex + j ]+ // new top
u [ (i+1)*sizex + j ]); // bottom
diff = unew - u[i*sizex + j];
sum += diff * diff;
utmp[i*sizex + j] = unew;
}
}
return sum;
}
/*
* One Gauss-Seidel iteration step
*
* Flop count in inner body is 4
*/
void relax_gauss( double *u,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
u[i*sizex+j]= 0.25 * (u[ i*sizex + (j-1) ]+
u[ i*sizex + (j+1) ]+
u[ (i-1)*sizex + j ]+
u[ (i+1)*sizex + j ]);
}
}
}
# Intel compiler
CC = icc
CFLAGS = -g
CC = mpicc
CFLAGS =-O3
MPICC = mpicc
......
/*
* heat.h
*
* Iterative solver for heat distribution
*/
#include <stdio.h>
#include <stdlib.h>
#define FALSE 0
#define TRUE 1
#define INTERLEAVING_COUNT 1024
#include "input.h"
#include "heat.h"
#include "timing.h"
void usage( char *s )
{
fprintf(stderr,
"Usage: %s <input file> [result file]\n\n", s);
}
int main( int argc, char *argv[] )
{
unsigned iter;
FILE *infile, *resfile;
char *resfilename;
int periods[2]={FALSE, FALSE};
MPI_Comm comm2d;
int nthreads, rank;
int thdx, thdy;
double *tmp;
// algorithmic parameters
algoparam_t param;
int np;
double runtime, flop;
double residual;
int coords[2];
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nthreads);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// check arguments
if( argc < 2 )
{
usage( argv[0] );
MPI_Finalize();
return 1;
}
// check input file
if( !(infile=fopen(argv[1], "r")) )
{
fprintf(stderr,
"\nError: Cannot open \"%s\" for reading.\n\n", argv[1]);
usage(argv[0]);
MPI_Finalize();
return 1;
}
// check input
if( !read_input(infile, &param) )
{
fprintf(stderr, "\nError: Error parsing input file.\n\n");
usage(argv[0]);
MPI_Finalize();
return 1;
}
MPI_Cart_create(MPI_COMM_WORLD, 2, param.thread_dims, periods, FALSE, &comm2d);
MPI_Cart_coords(comm2d, rank, 2, coords);
if(rank==0)
print_params(&param);
param.u = 0;
param.uhelp = 0;
param.sendbuf_left =0;
param.sendbuf_right = 0;
param.recbuf_left = 0;
param.recbuf_right =0;
param.sendbuf_top =0;
param.sendbuf_bottom = 0;
param.recbuf_top = 0;
param.recbuf_bottom =0;
param.act_res = param.initial_res;
// loop over different resolutions
while(1) {
// free allocated memory of previous experiment
if (param.u != 0)
finalize(&param);
if( !initialize(&param, coords) )
{
fprintf(stderr, "Error in Jacobi initialization.\n\n");
usage(argv[0]);
}
fprintf(stderr, "Resolution: %5u\r", param.act_res);
// full size (param.act_res are only the inner points)
np = param.act_res + 2;
// starting time
runtime = MPI_Wtime();
residual = 999999999;
iter = 0;
while(1) {
switch( param.algorithm ) {
case 0: // JACOBI
residual = relax_jacobi_return_residual(param.u, param.uhelp, np, np);
tmp = param.u;
param.u = param.uhelp;
param.uhelp = tmp;
break;
case 1: // GAUSS
residual = relax_gauss_return_residual(&param, INTERLEAVING_COUNT, coords , comm2d);
break;
}
iter+= INTERLEAVING_COUNT;
// solution good enough ?
if (residual < 0.000005) break;
// max. iteration reached ? (no limit with maxiter=0)
if (param.maxiter>0 && iter>=param.maxiter) break;
if (iter % 100 == 0)
fprintf(stderr, "residual %f, %d iterations\n", residual, iter);
}
// Flop count after <i> iterations
flop = iter * 7.0 * param.act_res * param.act_res;
// stopping time
runtime = MPI_Wtime() - runtime;
if (rank == 0)
{
fprintf(stderr, "Resolution: %5u, ", param.act_res);
fprintf(stderr, "Time: %04.3f ", runtime);
fprintf(stderr, "(%3.3f GFlop => %6.2f MFlop/s, ",
flop/1000000000.0,
flop/runtime/1000000);
fprintf(stderr, "residual %f, %d iterations)\n", residual, iter);
// for plot...
printf("%5d %f\n", param.act_res, flop/runtime/1000000);
}
if (param.act_res + param.res_step_size > param.max_res) break;
param.act_res += param.res_step_size;
}
finalize( &param );
MPI_Finalize();
return 0;
}
......@@ -8,6 +8,7 @@
#define JACOBI_H_INCLUDED
#include <stdio.h>
#include <mpi.h>
// configuration
......@@ -27,16 +28,18 @@ typedef struct
unsigned max_res; // spatial resolution
unsigned initial_res;
unsigned res_step_size;
unsigned blocksize_x;
unsigned blocksize_y;
int algorithm; // 0=>Jacobi, 1=>Gauss
unsigned visres; // visualization resolution
double *u, *uhelp;
double *uvis;
double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;
double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;
unsigned numsrcs; // number of heat sources
heatsrc_t *heatsrcs;
int thread_dims[2]; //x*y dimensions of thread
int thread_dims[2]; //x*y dimensions of thread
}
algoparam_t;
......@@ -45,24 +48,13 @@ algoparam_t;
// function declarations
// misc.c
int initialize( algoparam_t *param );
int initialize( algoparam_t *param, int *coords );
int finalize( algoparam_t *param );
void write_image( FILE * f, double *u,
unsigned sizex, unsigned sizey );
int coarsen(double *uold, unsigned oldx, unsigned oldy ,
double *unew, unsigned newx, unsigned newy );
// Gauss-Seidel: relax_gauss.c
double residual_gauss( double *u, double *utmp,
unsigned sizex, unsigned sizey );
void relax_gauss( double *u,
unsigned sizex, unsigned sizey );
double relax_gauss_return_residual( algoparam_t *param, int interleaving_count, int *coords, MPI_Comm comm2d);
// Jacobi: relax_jacobi.c
#if 0
double residual_jacobi( double *u,
unsigned sizex, unsigned sizey );
#endif
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey );
......
......@@ -15,8 +15,8 @@ int read_input( FILE *infile, algoparam_t *param )
char buf[BUFSIZE];
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u %u", &(param->thread_dims[0]), &(param->thread_dims[1]) );
if( n!=1 )
n = sscanf( buf, "%d %d", &(param->thread_dims[0]), &(param->thread_dims[1]) );
if( n!=2 )
return 0;
fgets(buf, BUFSIZE, infile);
......
/*
* misc.c
*
* Helper functions for
* - initialization
* - finalization,
* - writing out a picture
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include "heat.h"
/*
* Initialize the iterative solver
* - allocate memory for matrices
* - set boundary conditions according to configuration
*/
int initialize( algoparam_t *param , int *coords)
{
int i, j;
double dist;
// total number of points
const int np = param->act_res + 2;
param->blocksize_x=param->act_res / param->thread_dims[0];
param->blocksize_y=param->act_res / param->thread_dims[1];
//
// allocate memory
//
(param->sendbuf_left) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->recbuf_left) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->sendbuf_right) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->recbuf_right) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->sendbuf_top) = (double*)calloc( sizeof(double), param->blocksize_x );
(param->recbuf_top) = (double*)calloc( sizeof(double), param->blocksize_x );
(param->sendbuf_bottom) = (double*)calloc( sizeof(double), param->blocksize_x );
(param->recbuf_bottom) = (double*)calloc( sizeof(double), param->blocksize_x );
(param->u) = (double*)calloc( sizeof(double),(param->blocksize_x + 2)*(param->blocksize_y + 2) );
(param->uhelp) = (double*)calloc( sizeof(double),(param->blocksize_x + 2)*(param->blocksize_y + 2) );
if( !(param->u) || !(param->uhelp) || !(param->sendbuf_left) || !(param->sendbuf_right) || !(param->recbuf_left) || !(param->recbuf_right) )
{
fprintf(stderr, "Error: Cannot allocate memory\n");
return 0;
}
for( i=0; i<param->numsrcs; i++ )
{
/* top row */
if(coords[1]==0)
{
for( j=0 ; j<param->blocksize_x + 1 ; j++ )
{
dist = sqrt( pow((double)(j + (param->blocksize_x * coords[0]) )/ (double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[j] +=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[j] = (param->u)[j];
}
}
}
/* bottom row */
if(coords[1]==param->thread_dims[1]-1)
{
for( j=0; j<param->blocksize_x + 1 ; j++ )
{
dist = sqrt( pow((double)(j + (param->blocksize_x * coords[0]) )/ (double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(1-param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[(param->blocksize_y+1)*(param->blocksize_x+2) + j] +=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[(param->blocksize_y+1)*(param->blocksize_x+2)+j] = (param->u)[(param->blocksize_y+1)*(param->blocksize_x+2)+j];
}
}
}
if(coords[0]==0)
{
/* leftmost column */
for( j=0; j<param->blocksize_y+1; j++ )
{
dist = sqrt( pow(param->heatsrcs[i].posx, 2)+
pow((double)(j + (param->blocksize_y * coords[1]) ) / (double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*(param->blocksize_x+2) ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*(param->blocksize_x+2) ] = (param->u)[ j*(param->blocksize_x+2) ];
}
}
}
if(coords[0]==param->thread_dims[0]-1)
{
/* rightmost column */
for( j=1; j<param->blocksize_y+1; j++ )
{
dist = sqrt( pow(1 - param->heatsrcs[i].posx, 2)+
pow((double)(j + (param->blocksize_y * coords[1]) ) / (double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*(param->blocksize_x+2)+(param->blocksize_x+1) ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*(param->blocksize_x+2)+(param->blocksize_x+1) ] = (param->u)[ j*(param->blocksize_x+2)+(param->blocksize_x+1) ];
}
}
}
}
return 1;
}
/*
* free used memory
*/
int finalize( algoparam_t *param )
{
if( param->u )
{
free(param->u);
param->u = 0;
}
if( param->uhelp )
{
free(param->uhelp);
param->uhelp = 0;
}
if ( param->sendbuf_left )
{
free(param->sendbuf_left);
param->sendbuf_left = 0;
}
if ( param->sendbuf_right )
{
free(param->sendbuf_right);
param->sendbuf_right = 0;
}
if ( param->recbuf_left )
{
free(param->recbuf_left);
param->recbuf_left = 0;
}
if ( param->recbuf_right )
{
free(param->recbuf_right);
param->recbuf_right = 0;
}
if ( param->sendbuf_top )
{
free(param->sendbuf_top);
param->sendbuf_top = 0;
}
if ( param->sendbuf_bottom )
{
free(param->sendbuf_bottom);
param->sendbuf_bottom = 0;
}
if ( param->recbuf_top )
{
free(param->recbuf_top);
param->recbuf_top = 0;
}
if ( param->recbuf_bottom )
{
free(param->recbuf_bottom);
param->recbuf_bottom = 0;
}
return 1;
}
/*
* relax_gauss.c
*
* Gauss-Seidel Relaxation
*
*/
#include "heat.h"
/*
* One Gauss-Seidel iteration step
*
* Flop count in inner body is 7
*/
int get_rank_from_2dcoords(int coord_x, int coord_y, int *thread_dims, MPI_Comm comm2d)
{
// returns -1 if coordinates are invalid
int rank;
int coords[2] = {coord_x, coord_y};
if (coord_x < 0) return -1;
if (coord_y < 0) return -1;
if (coord_x >= thread_dims[0]) return -1;
if (coord_y >= thread_dims[1]) return -1;
MPI_Cart_rank(comm2d, coords, &rank);
return rank;
}
double relax_gauss_return_residual( algoparam_t *param, int interleaving_count, int *coords, MPI_Comm comm2d)
{
unsigned i, j, k;
double tmp, diff, local_residual, global_residual;
int size_x, size_y;
int rank_left, rank_right, rank_top, rank_bottom;
MPI_Request sendrq_right, recrq_right, sendrq_left, recrq_left, sendrq_top, recrq_top, sendrq_bottom, recrq_bottom;
MPI_Status status;
rank_left = get_rank_from_2dcoords(coords[0]-1, coords[1], param->thread_dims, comm2d);
rank_right = get_rank_from_2dcoords(coords[0]+1, coords[1], param->thread_dims, comm2d);
rank_top = get_rank_from_2dcoords(coords[0], coords[1]-1, param->thread_dims, comm2d);
rank_bottom = get_rank_from_2dcoords(coords[0], coords[1]+1, param->thread_dims, comm2d);
size_x = param->blocksize_x + 2;
size_y = param->blocksize_y + 2;
for (k = 0; k < interleaving_count; k++)
{
local_residual = 0;
// Receive border values from top block
if (rank_top != -1)
{
if (k==0)
MPI_Irecv(param->recbuf_top, size_x - 2, MPI_DOUBLE, rank_top, 10, comm2d, &recrq_top);
MPI_Wait(&recrq_top, &status);
for (j=1; j < size_x -1; j++)
param->u[j] = param->recbuf_top[j-1];
if (k< interleaving_count-1)
MPI_Irecv(param->recbuf_top, size_x - 2, MPI_DOUBLE, rank_top, 10, comm2d, &recrq_top);
}
// calculate the upper row
i=1;
for( j=1; j<size_x-1; j++ )
{
tmp = param->u[i*size_x+j];
param->u[i*size_x+j]= 0.25 * (param->u[ i*size_x + (j-1) ]+
param->u[ i*size_x + (j+1) ]+
param->u[ (i-1)*size_x + j ]+
param->u[ (i+1)*size_x + j ]);
diff = param->u[i*size_x+j] - tmp;
local_residual += diff*diff;
}
// Send border values to top block
if (rank_top != -1)
{
if (k>0)
MPI_Wait(&sendrq_top, &status);
for (j=1; j < size_x-1; j++)
param->sendbuf_top[j-1] = param->u[1*size_x + j];
MPI_Isend(param->sendbuf_top, size_x - 2, MPI_DOUBLE, rank_top, 10, comm2d, &sendrq_top);
}
// Receive border values from bottom block
if (rank_bottom != -1)
{
if(k>0)
{
MPI_Irecv(param->recbuf_bottom, size_x - 2, MPI_DOUBLE, rank_bottom, 10, comm2d, &recrq_bottom);
}
}
// do the calculation for the inner part
for( i=2; i < size_y-2; i++ )
{
for( j=1; j<size_x-1; j++ )
{
tmp = param->u[i*size_x+j];
param->u[i*size_x+j]= 0.25 * (param->u[ i*size_x + (j-1) ]+
param->u[ i*size_x + (j+1) ]+
param->u[ (i-1)*size_x + j ]+
param->u[ (i+1)*size_x + j ]);
diff = param->u[i*size_x+j] - tmp;
local_residual += diff*diff;
}
}
// Receive border values from bottom block
if (rank_bottom != -1)
{
if (k>0)
{
MPI_Wait(&recrq_bottom, &status);
for (j=1; j < size_x-1; j++)
param->u[(size_y-1)*size_x+ j] = param->recbuf_bottom[j-1];
}
}
// do the calculation for the lower right border
i = size_y-2;
for( j=1; j<size_x-1; j++ )
{
tmp = param->u[i*size_x+j];
param->u[i*size_x+j]= 0.25 * (param->u[ i*size_x + (j-1) ]+
param->u[ i*size_x + (j+1) ]+
param->u[ (i-1)*size_x + j ]+
param->u[ (i+1)*size_x + j ]);
diff = param->u[i*size_x+j] - tmp;
local_residual += diff*diff;
}
// Send border values to bottom block
if (rank_bottom != -1)
{
if (k>0)
MPI_Wait(&sendrq_bottom, &status);
for (j=1; j < size_x -1; j++)
param->sendbuf_bottom[j-1] = param->u[(size_y-2)*size_x + j];
MPI_Isend(param->sendbuf_bottom, size_x - 2, MPI_DOUBLE, rank_bottom, 10, comm2d, &sendrq_bottom);
}
if (k == interleaving_count-1)
{
// Receive border values from bottom block
if (rank_bottom != -1)
{
MPI_Recv(&(param->u[(size_y-1)*size_x+ j]), size_y - 2, MPI_DOUBLE, rank_right, 10, comm2d, &status);
}
}
}
MPI_Reduce(&local_residual, &global_residual, 1, MPI_DOUBLE, MPI_SUM, 0, comm2d);
MPI_Bcast(&global_residual, 1, MPI_DOUBLE, 0, comm2d);
return global_residual;
}
!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/
!_TAG_PROGRAM_NAME Exuberant Ctags //
!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/
!_TAG_PROGRAM_VERSION 5.9~svn20110310 //
BUFSIZE input.c 10;" d file:
CC Makefile /^CC = mpicc$/;" m
CFLAGS Makefile /^CFLAGS =-O3$/;" m
FALSE heat.c 11;" d file:
INPUT_H_INCLUDED input.h 6;" d
INTERLEAVING_COUNT heat.c 13;" d file:
JACOBI_H_INCLUDED heat.h 8;" d
MPICC Makefile /^MPICC = mpicc$/;" m
TIMING_H_INCLUDED timing.h 6;" d
TRUE heat.c 12;" d file:
act_res heat.h /^ unsigned act_res;$/;" m struct:__anon2
algoparam_t heat.h /^algoparam_t;$/;" t typeref:struct:__anon2
algorithm heat.h /^ int algorithm; \/\/ 0=>Jacobi, 1=>Gauss$/;" m struct:__anon2
blocksize_x heat.h /^ unsigned blocksize_x;$/;" m struct:__anon2
blocksize_y heat.h /^ unsigned blocksize_y;$/;" m struct:__anon2
finalize misc.c /^int finalize( algoparam_t *param )$/;" f
get_rank_from_2dcoords relax_gauss.c /^int get_rank_from_2dcoords(int coord_x, int coord_y, int *thread_dims, MPI_Comm comm2d)$/;" f
heatsrc_t heat.h /^heatsrc_t;$/;" t typeref:struct:__anon1
heatsrcs heat.h /^ heatsrc_t *heatsrcs;$/;" m struct:__anon2
initial_res heat.h /^ unsigned initial_res;$/;" m struct:__anon2
initialize misc.c /^int initialize( algoparam_t *param , int *coords)$/;" f
main heat.c /^int main( int argc, char *argv[] )$/;" f
max_res heat.h /^ unsigned max_res; \/\/ spatial resolution$/;" m struct:__anon2
maxiter heat.h /^ unsigned maxiter; \/\/ maximum number of iterations$/;" m struct:__anon2
numsrcs heat.h /^ unsigned numsrcs; \/\/ number of heat sources$/;" m struct:__anon2
posx heat.h /^ float posx;$/;" m struct:__anon1
posy heat.h /^ float posy;$/;" m struct:__anon1
print_params input.c /^void print_params( algoparam_t *param )$/;" f
range heat.h /^ float range;$/;" m struct:__anon1
read_input input.c /^int read_input( FILE *infile, algoparam_t *param )$/;" f
recbuf_bottom heat.h /^ double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;$/;" m struct:__anon2
recbuf_left heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
recbuf_right heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
recbuf_top heat.h /^ double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;$/;" m struct:__anon2
relax_gauss_return_residual relax_gauss.c /^double relax_gauss_return_residual( algoparam_t *param, int interleaving_count, int *coords, MPI_Comm comm2d)$/;" f
relax_jacobi_return_residual relax_jacobi.c /^double relax_jacobi_return_residual( double *u, double *utmp,$/;" f
res_step_size heat.h /^ unsigned res_step_size;$/;" m struct:__anon2
sendbuf_bottom heat.h /^ double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;$/;" m struct:__anon2
sendbuf_left heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
sendbuf_right heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
sendbuf_top heat.h /^ double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;$/;" m struct:__anon2
temp heat.h /^ float temp;$/;" m struct:__anon1
thread_dims heat.h /^ int thread_dims[2]; \/\/x*y dimensions of thread$/;" m struct:__anon2
u heat.h /^ double *u, *uhelp;$/;" m struct:__anon2
uhelp heat.h /^ double *u, *uhelp;$/;" m struct:__anon2
usage heat.c /^void usage( char *s )$/;" f
wtime timing.c /^double wtime()$/;" f
1 16 # x and y dimensions for threads
1024 # iterations
256 # initial resolution
4096 # max resolution (spatial resolution)
3840 # resolution step size
1 # Algorithm 0=Jacobi 1=Gauss
2 # number of heat sources
0.0 0.0 1.0 1.0 # (x,y), size temperature
1.0 1.0 1.0 0.5
# Intel compiler
CC = mpicc
CFLAGS =-O3
MPICC = mpicc
all: heat
heat : heat.o input.o misc.o timing.o relax_gauss.o relax_jacobi.o
$(CC) $(CFLAGS) -o $@ $+ -lm
%.o : %.c %.h
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f *.o heat *~ *.ppm
remake : clean all
This diff is collapsed.
/*
* heat.h
*
* Iterative solver for heat distribution
*/
#include <stdio.h>
#include <stdlib.h>
#define FALSE 0
#define TRUE 1
#define INTERLEAVING_COUNT 1024
#include "input.h"
#include "heat.h"
#include "timing.h"
void usage( char *s )
{
fprintf(stderr,
"Usage: %s <input file> [result file]\n\n", s);
}
int main( int argc, char *argv[] )
{
unsigned iter;
FILE *infile, *resfile;
char *resfilename;
int periods[2]={FALSE, FALSE};
MPI_Comm comm2d;
int nthreads, rank;
int thdx, thdy;
double *tmp;
// algorithmic parameters
algoparam_t param;
int np;
double runtime, flop;
double residual;
int coords[2];
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nthreads);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// check arguments
if( argc < 2 )
{
usage( argv[0] );
MPI_Finalize();
return 1;
}
// check input file
if( !(infile=fopen(argv[1], "r")) )
{
fprintf(stderr,
"\nError: Cannot open \"%s\" for reading.\n\n", argv[1]);
usage(argv[0]);
MPI_Finalize();
return 1;
}
// check input
if( !read_input(infile, &param) )
{
fprintf(stderr, "\nError: Error parsing input file.\n\n");
usage(argv[0]);
MPI_Finalize();
return 1;
}
MPI_Cart_create(MPI_COMM_WORLD, 2, param.thread_dims, periods, FALSE, &comm2d);
MPI_Cart_coords(comm2d, rank, 2, coords);
if(rank==0)
print_params(&param);
param.u = 0;
param.uhelp = 0;
param.sendbuf_left =0;
param.sendbuf_right = 0;
param.recbuf_left = 0;
param.recbuf_right =0;
param.sendbuf_top =0;
param.sendbuf_bottom = 0;
param.recbuf_top = 0;
param.recbuf_bottom =0;
param.act_res = param.initial_res;
// loop over different resolutions
while(1) {
// free allocated memory of previous experiment
if (param.u != 0)
finalize(&param);
if( !initialize(&param, coords) )
{
fprintf(stderr, "Error in Jacobi initialization.\n\n");
usage(argv[0]);
}
fprintf(stderr, "Resolution: %5u\r", param.act_res);
// full size (param.act_res are only the inner points)
np = param.act_res + 2;
// starting time
runtime = MPI_Wtime();
residual = 999999999;
iter = 0;
while(1) {
switch( param.algorithm ) {
case 0: // JACOBI
residual = relax_jacobi_return_residual(param.u, param.uhelp, np, np);
tmp = param.u;
param.u = param.uhelp;
param.uhelp = tmp;
break;
case 1: // GAUSS
residual = relax_gauss_return_residual(&param, INTERLEAVING_COUNT, coords , comm2d);
break;
}
iter+= INTERLEAVING_COUNT;
// solution good enough ?
if (residual < 0.000005) break;
// max. iteration reached ? (no limit with maxiter=0)
if (param.maxiter>0 && iter>=param.maxiter) break;
if (iter % 100 == 0)
fprintf(stderr, "residual %f, %d iterations\n", residual, iter);
}
// Flop count after <i> iterations
flop = iter * 7.0 * param.act_res * param.act_res;
// stopping time
runtime = MPI_Wtime() - runtime;
if (rank == 0)
{
fprintf(stderr, "Resolution: %5u, ", param.act_res);
fprintf(stderr, "Time: %04.3f ", runtime);
fprintf(stderr, "(%3.3f GFlop => %6.2f MFlop/s, ",
flop/1000000000.0,
flop/runtime/1000000);
fprintf(stderr, "residual %f, %d iterations)\n", residual, iter);
// for plot...
printf("%5d %f\n", param.act_res, flop/runtime/1000000);
}
if (param.act_res + param.res_step_size > param.max_res) break;
param.act_res += param.res_step_size;
}
finalize( &param );
MPI_Finalize();
return 0;
}
/*
* heat.h
*
* Global definitions for the iterative solver
*/
#ifndef JACOBI_H_INCLUDED
#define JACOBI_H_INCLUDED
#include <stdio.h>
#include <mpi.h>
// configuration
typedef struct
{
float posx;
float posy;
float range;
float temp;
}
heatsrc_t;
typedef struct
{
unsigned maxiter; // maximum number of iterations
unsigned act_res;
unsigned max_res; // spatial resolution
unsigned initial_res;
unsigned res_step_size;
unsigned blocksize_x;
unsigned blocksize_y;
int algorithm; // 0=>Jacobi, 1=>Gauss
double *u, *uhelp;
double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;
double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;
unsigned numsrcs; // number of heat sources
heatsrc_t *heatsrcs;
int thread_dims[2]; //x*y dimensions of thread
}
algoparam_t;
// function declarations
// misc.c
int initialize( algoparam_t *param, int *coords );
int finalize( algoparam_t *param );
// Gauss-Seidel: relax_gauss.c
double relax_gauss_return_residual( algoparam_t *param, int interleaving_count, int *coords, MPI_Comm comm2d);
// Jacobi: relax_jacobi.c
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey );
#endif // JACOBI_H_INCLUDED
//
// input.c
//
#include <stdio.h>
#include <stdlib.h>
#include "input.h"
#define BUFSIZE 100
int read_input( FILE *infile, algoparam_t *param )
{
int i, n;
char buf[BUFSIZE];
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%d %d", &(param->thread_dims[0]), &(param->thread_dims[1]) );
if( n!=2 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->maxiter) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->initial_res) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->max_res) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->res_step_size) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf(buf, "%d", &(param->algorithm) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf(buf, "%u", &(param->numsrcs) );
if( n!=1 )
return 0;
(param->heatsrcs) =
(heatsrc_t*) malloc( sizeof(heatsrc_t) * (param->numsrcs) );
for( i=0; i<param->numsrcs; i++ )
{
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%f %f %f %f",
&(param->heatsrcs[i].posx),
&(param->heatsrcs[i].posy),
&(param->heatsrcs[i].range),
&(param->heatsrcs[i].temp) );
if( n!=4 )
return 0;
}
return 1;
}
void print_params( algoparam_t *param )
{
int i;
fprintf(stderr, "Resolutions : (%u, %u, ... %u)\n",
param->initial_res,
param->initial_res + param->res_step_size,
param->max_res);
fprintf(stderr, "Iterations : %u\n", param->maxiter);
fprintf(stderr, "Algorithm : %d (%s)\n",
param->algorithm,
(param->algorithm == 0) ? "Jacobi":"Gauss-Jacobi" );
fprintf(stderr, "Num. Heat sources : %u\n", param->numsrcs);
for( i=0; i<param->numsrcs; i++ )
{
fprintf(stderr, " %2d: (%2.2f, %2.2f) %2.2f %2.2f \n",
i+1,
param->heatsrcs[i].posx,
param->heatsrcs[i].posy,
param->heatsrcs[i].range,
param->heatsrcs[i].temp );
}
}
//
// input.h
//
#ifndef INPUT_H_INCLUDED
#define INPUT_H_INCLUDED
#include <stdio.h>
#include "heat.h"
int read_input( FILE *infile, algoparam_t *param );
void print_params( algoparam_t *param );
#endif // INPUT_H_INCLUDED
/*
* misc.c
*
* Helper functions for
* - initialization
* - finalization,
* - writing out a picture
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include "heat.h"
/*
* Initialize the iterative solver
* - allocate memory for matrices
* - set boundary conditions according to configuration
*/
int initialize( algoparam_t *param , int *coords)
{
int i, j;
double dist;
// total number of points
const int np = param->act_res + 2;
param->blocksize_x=param->act_res / param->thread_dims[0];
param->blocksize_y=param->act_res / param->thread_dims[1];
//
// allocate memory
//
(param->sendbuf_left) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->recbuf_left) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->sendbuf_right) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->recbuf_right) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->sendbuf_top) = (double*)calloc( sizeof(double), param->blocksize_x );
(param->recbuf_top) = (double*)calloc( sizeof(double), param->blocksize_x );
(param->sendbuf_bottom) = (double*)calloc( sizeof(double), param->blocksize_x );
(param->recbuf_bottom) = (double*)calloc( sizeof(double), param->blocksize_x );
(param->u) = (double*)calloc( sizeof(double),(param->blocksize_x + 2)*(param->blocksize_y + 2) );
(param->uhelp) = (double*)calloc( sizeof(double),(param->blocksize_x + 2)*(param->blocksize_y + 2) );
if( !(param->u) || !(param->uhelp) || !(param->sendbuf_left) || !(param->sendbuf_right) || !(param->recbuf_left) || !(param->recbuf_right) )
{
fprintf(stderr, "Error: Cannot allocate memory\n");
return 0;
}
for( i=0; i<param->numsrcs; i++ )
{
/* top row */
if(coords[1]==0)
{
for( j=0 ; j<param->blocksize_x + 1 ; j++ )
{
dist = sqrt( pow((double)(j + (param->blocksize_x * coords[0]) )/ (double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[j] +=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[j] = (param->u)[j];
}
}
}
/* bottom row */
if(coords[1]==param->thread_dims[1]-1)
{
for( j=0; j<param->blocksize_x + 1 ; j++ )
{
dist = sqrt( pow((double)(j + (param->blocksize_x * coords[0]) )/ (double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(1-param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[(param->blocksize_y+1)*(param->blocksize_x+2) + j] +=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[(param->blocksize_y+1)*(param->blocksize_x+2)+j] = (param->u)[(param->blocksize_y+1)*(param->blocksize_x+2)+j];
}
}
}
if(coords[0]==0)
{
/* leftmost column */
for( j=0; j<param->blocksize_y+1; j++ )
{
dist = sqrt( pow(param->heatsrcs[i].posx, 2)+
pow((double)(j + (param->blocksize_y * coords[1]) ) / (double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*(param->blocksize_x+2) ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*(param->blocksize_x+2) ] = (param->u)[ j*(param->blocksize_x+2) ];
}
}
}
if(coords[0]==param->thread_dims[0]-1)
{
/* rightmost column */
for( j=1; j<param->blocksize_y+1; j++ )
{
dist = sqrt( pow(1 - param->heatsrcs[i].posx, 2)+
pow((double)(j + (param->blocksize_y * coords[1]) ) / (double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*(param->blocksize_x+2)+(param->blocksize_x+1) ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*(param->blocksize_x+2)+(param->blocksize_x+1) ] = (param->u)[ j*(param->blocksize_x+2)+(param->blocksize_x+1) ];
}
}
}
}
return 1;
}
/*
* free used memory
*/
int finalize( algoparam_t *param )
{
if( param->u )
{
free(param->u);
param->u = 0;
}
if( param->uhelp )
{
free(param->uhelp);
param->uhelp = 0;
}
if ( param->sendbuf_left )
{
free(param->sendbuf_left);
param->sendbuf_left = 0;
}
if ( param->sendbuf_right )
{
free(param->sendbuf_right);
param->sendbuf_right = 0;
}
if ( param->recbuf_left )
{
free(param->recbuf_left);
param->recbuf_left = 0;
}
if ( param->recbuf_right )
{
free(param->recbuf_right);
param->recbuf_right = 0;
}
if ( param->sendbuf_top )
{
free(param->sendbuf_top);
param->sendbuf_top = 0;
}
if ( param->sendbuf_bottom )
{
free(param->sendbuf_bottom);
param->sendbuf_bottom = 0;
}
if ( param->recbuf_top )
{
free(param->recbuf_top);
param->recbuf_top = 0;
}
if ( param->recbuf_bottom )
{
free(param->recbuf_bottom);
param->recbuf_bottom = 0;
}
return 1;
}
#!/usr/bin/perl
#
# pfmon2cg, version 0.02
# Converter for pfmon output to Cachegrind profile format
#
# 4/2007, Josef Weidendorfer
#
#
# Read in pfmon data, e.g. written from the command
#
# pfmon --with-header --smpl-outfile=pfmon.out \
# -e l2_misses --short-smpl-periods=100000 -- <command>
# (sampling points every 100000 L2 Misses)
#
# This gives a list of sampled instruction addresses ($ipps)
while(<>) {
if (/^#/) {
if (/command line\s*:\s*(.*)$/) { $command = $1; }
if (/PMD\d+: (.*?)\s*(,|=)/) { $events .= "$1 "; }
next;
}
if (/^entry/) {
($iip) = ($entry =~ /IIP:(\S+)/);
if ($iip ne "") { $iips .= "$iip\n"; }
$entry = "";
}
chomp;
$entry .= $_;
}
($iip) = ($entry =~ /IIP:(\S+)/);
if ($iip ne "") { $iips .= "$iip\n"; }
($pid) = ($entry =~ /PID:(\S+)/);
open OUT, ">.p2cg";
print OUT $iips;
close OUT;
# Let addr2line convert the instruction addresses into
# source code references (needs debug info)
#
# This creates hashes with event sums
# Strip command line arguments
$onlycommand = $command;
$onlycommand =~ s/\s.*//;
$cmd = "addr2line -f -e $onlycommand < .p2cg";
#print "#Run Cmd $cmd\n";
open ATOL, "$cmd < .p2cg|";
while(<ATOL>) {
chomp;
$func = $_;
$fileline = <ATOL>;
chomp $fileline;
($file,$line) = ($fileline =~ /(.*?):(.*)/);
#print "Func $func, file $file, line $line\n";
if ($file{$func} eq "") {
push @funcs, $func;
$file{$func} = $file;
}
if ($count{$func,$line} eq "") {
$tmp = $lines->{$func};
$tmp = [] unless defined $tmp;
push @$tmp, $line;
$lines->{$func} = $tmp;
}
$count{$func,$line}++;
$totals++;
}
close ATOL;
#
# Dump out Cachegrind format
#
# First header info...
print "version: 1\n";
print "creator: pf2mon 0.02\n";
print "pid: $pid\n";
print "cmd: $command\n\n";
print "positions: line\n";
print "events: $events\n";
print "summary: $totals\n\n";
# Now profiling data.
#
# We only support static linked binaries for now, so all code
# is in the binary
print "ob=$onlycommand\n";
foreach $f (@funcs) {
print "\nfl=$file{$f}\n";
print "fn=$f\n";
$tmp = $lines->{$f};
foreach $l (@$tmp) {
print "$l $count{$f,$l}\n";
}
}
/*
* relax_gauss.c
*
* Gauss-Seidel Relaxation
*
*/
#include "heat.h"
/*
* One Gauss-Seidel iteration step
*
* Flop count in inner body is 7
*/
int get_rank_from_2dcoords(int coord_x, int coord_y, int *thread_dims, MPI_Comm comm2d)
{
// returns -1 if coordinates are invalid
int rank;
int coords[2] = {coord_x, coord_y};
if (coord_x < 0) return -1;
if (coord_y < 0) return -1;
if (coord_x >= thread_dims[0]) return -1;
if (coord_y >= thread_dims[1]) return -1;
MPI_Cart_rank(comm2d, coords, &rank);
return rank;
}
double relax_gauss_return_residual( algoparam_t *param, int interleaving_count, int *coords, MPI_Comm comm2d)
{
unsigned i, j, k;
double tmp, diff, local_residual, global_residual;
int size_x, size_y;
int rank_left, rank_right, rank_top, rank_bottom;
MPI_Request sendrq_right, recrq_right, sendrq_left, recrq_left, sendrq_top, recrq_top, sendrq_bottom, recrq_bottom;
MPI_Status status;
rank_left = get_rank_from_2dcoords(coords[0]-1, coords[1], param->thread_dims, comm2d);
rank_right = get_rank_from_2dcoords(coords[0]+1, coords[1], param->thread_dims, comm2d);
rank_top = get_rank_from_2dcoords(coords[0], coords[1]-1, param->thread_dims, comm2d);
rank_bottom = get_rank_from_2dcoords(coords[0], coords[1]+1, param->thread_dims, comm2d);
size_x = param->blocksize_x + 2;
size_y = param->blocksize_y + 2;
for (k = 0; k < interleaving_count; k++)
{
local_residual = 0;
// Receive border values from left block
if (rank_left != -1)
{
if (k == 0)
MPI_Irecv(param->recbuf_left, size_y - 2, MPI_DOUBLE, rank_left, 10, comm2d, &recrq_left);
MPI_Wait(&recrq_left, &status);
for (i=1; i < size_y-1; i++)
param->u[i*size_x] = param->recbuf_left[i-1];
if (k < interleaving_count-1)
MPI_Irecv(param->recbuf_left, size_y - 2, MPI_DOUBLE, rank_left, 10, comm2d, &recrq_left);
}
// Receive border values from top block
if (rank_top != -1)
{
if (k==0)
MPI_Irecv(param->recbuf_top, size_x - 2, MPI_DOUBLE, rank_top, 10, comm2d, &recrq_top);
MPI_Wait(&recrq_top, &status);
for (j=1; j < size_x -1; j++)
param->u[j] = param->recbuf_top[j-1];
if (k< interleaving_count-1)
MPI_Irecv(param->recbuf_top, size_x - 2, MPI_DOUBLE, rank_top, 10, comm2d, &recrq_top);
}
// do the calculation for the upper left + inner part
for( i=1; i < size_y-2; i++ )
{
for( j=1; j<size_x-2; j++ )
{
tmp = param->u[i*size_x+j];
param->u[i*size_x+j]= 0.25 * (param->u[ i*size_x + (j-1) ]+
param->u[ i*size_x + (j+1) ]+
param->u[ (i-1)*size_x + j ]+
param->u[ (i+1)*size_x + j ]);
diff = param->u[i*size_x+j] - tmp;
local_residual += diff*diff;
}
}
// Receive border values from right block
if (rank_right != -1)
{
if (k>0)
{
MPI_Wait(&recrq_right, &status);
for (i=1; i < size_y-1; i++)
param->u[i*size_x+ size_x-1] = param->recbuf_right[i-1];
}
MPI_Irecv(param->recbuf_right, size_y - 2, MPI_DOUBLE, rank_right, 10, comm2d, &recrq_right);
}
// Receive border values from bottom block
if (rank_bottom != -1)
{
if (k>0)
{
MPI_Wait(&recrq_bottom, &status);
for (j=1; j < size_x-1; j++)
param->u[(size_y-1)*size_x+ j] = param->recbuf_bottom[j-1];
}
MPI_Irecv(param->recbuf_bottom, size_x - 2, MPI_DOUBLE, rank_bottom, 10, comm2d, &recrq_bottom);
}
// do the calculation for the lower right border
i = size_y-2;
for( j=1; j<size_x-2; j++ )
{
tmp = param->u[i*size_x+j];
param->u[i*size_x+j]= 0.25 * (param->u[ i*size_x + (j-1) ]+
param->u[ i*size_x + (j+1) ]+
param->u[ (i-1)*size_x + j ]+
param->u[ (i+1)*size_x + j ]);
diff = param->u[i*size_x+j] - tmp;
local_residual += diff*diff;
}
j = size_x-2;
for (i = 1; i<size_y-1; i++ )
{
tmp = param->u[i*size_x+j];
param->u[i*size_x+j]= 0.25 * (param->u[ i*size_x + (j-1) ]+
param->u[ i*size_x + (j+1) ]+
param->u[ (i-1)*size_x + j ]+
param->u[ (i+1)*size_x + j ]);
diff = param->u[i*size_x+j] - tmp;
local_residual += diff*diff;
}
// Send border values to left block
if (rank_left != -1)
{
if (k>0)
MPI_Wait(&sendrq_left, &status);
for (i=1; i < size_y-1; i++)
param->sendbuf_left[i-1] = param->u[i*size_x + 1];
MPI_Isend(param->sendbuf_left, size_y - 2, MPI_DOUBLE, rank_left, 10, comm2d, &sendrq_left);
}
// Send border values to top block
if (rank_top != -1)
{
if (k>0)
MPI_Wait(&sendrq_top, &status);
for (j=1; j < size_x-1; j++)
param->sendbuf_top[j-1] = param->u[1*size_x + j];
MPI_Isend(param->sendbuf_top, size_x - 2, MPI_DOUBLE, rank_top, 10, comm2d, &sendrq_top);
}
// Send border values to right block
if (rank_right != -1)
{
if (k>0)
MPI_Wait(&sendrq_right, &status);
for (i=1; i < size_y-1; i++)
param->sendbuf_right[i-1] = param->u[i*size_x + size_x-2];
MPI_Isend(param->sendbuf_right, size_y - 2, MPI_DOUBLE, rank_right, 10, comm2d, &sendrq_right);
}
// Send border values to bottom block
if (rank_bottom != -1)
{
if (k>0)
MPI_Wait(&sendrq_bottom, &status);
for (j=1; j < size_x -1; j++)
param->sendbuf_bottom[j-1] = param->u[(size_y-2)*size_x + j];
MPI_Isend(param->sendbuf_bottom, size_x - 2, MPI_DOUBLE, rank_bottom, 10, comm2d, &sendrq_bottom);
}
if (k == interleaving_count-1)
{
// Receive border values from right block
if (rank_right != -1)
{
MPI_Wait(&recrq_right, &status);
for (i=1; i < size_y-1; i++)
param->u[i*size_x+ size_x-1] = param->recbuf_right[i-1];
}
// Receive border values from bottom block
if (rank_bottom != -1)
{
MPI_Wait(&recrq_bottom, &status);
for (j=1; j < size_x-1; j++)
param->u[(size_y-1)*size_x+ j] = param->recbuf_bottom[j-1];
}
}
}
MPI_Reduce(&local_residual, &global_residual, 1, MPI_DOUBLE, MPI_SUM, 0, comm2d);
MPI_Bcast(&global_residual, 1, MPI_DOUBLE, 0, comm2d);
return global_residual;
}
/*
* relax_jacobi.c
*
* Jacobi Relaxation
*
*/
#include "heat.h"
#if 0
/*
* Residual (length of error vector)
* between current solution and next after a Jacobi step
*/
double residual_jacobi( double *u,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
unew = 0.25 * (u[ i*sizex + (j-1) ]+ // left
u[ i*sizex + (j+1) ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
diff = unew - u[i*sizex + j];
sum += diff * diff;
}
}
return sum;
}
#endif
/*
* One Jacobi iteration step
*/
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey )
{
int i, j;
double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
{
utmp[i*sizex + j]= 0.25 * (u[ i*sizex+j -1 ]+ // left
u[ i*sizex+j +1 ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
diff = utmp[i*sizex + j] - u[i*sizex +j];
sum += diff * diff;
}
}
}
return sum;
}
!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/
!_TAG_PROGRAM_NAME Exuberant Ctags //
!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/
!_TAG_PROGRAM_VERSION 5.9~svn20110310 //
BUFSIZE input.c 10;" d file:
CC Makefile /^CC = mpicc$/;" m
CFLAGS Makefile /^CFLAGS =-O3$/;" m
FALSE heat.c 11;" d file:
INPUT_H_INCLUDED input.h 6;" d
INTERLEAVING_COUNT heat.c 13;" d file:
JACOBI_H_INCLUDED heat.h 8;" d
MPICC Makefile /^MPICC = mpicc$/;" m
TIMING_H_INCLUDED timing.h 6;" d
TRUE heat.c 12;" d file:
act_res heat.h /^ unsigned act_res;$/;" m struct:__anon2
algoparam_t heat.h /^algoparam_t;$/;" t typeref:struct:__anon2
algorithm heat.h /^ int algorithm; \/\/ 0=>Jacobi, 1=>Gauss$/;" m struct:__anon2
blocksize_x heat.h /^ unsigned blocksize_x;$/;" m struct:__anon2
blocksize_y heat.h /^ unsigned blocksize_y;$/;" m struct:__anon2
finalize misc.c /^int finalize( algoparam_t *param )$/;" f
get_rank_from_2dcoords relax_gauss.c /^int get_rank_from_2dcoords(int coord_x, int coord_y, int *thread_dims, MPI_Comm comm2d)$/;" f
heatsrc_t heat.h /^heatsrc_t;$/;" t typeref:struct:__anon1
heatsrcs heat.h /^ heatsrc_t *heatsrcs;$/;" m struct:__anon2
initial_res heat.h /^ unsigned initial_res;$/;" m struct:__anon2
initialize misc.c /^int initialize( algoparam_t *param , int *coords)$/;" f
main heat.c /^int main( int argc, char *argv[] )$/;" f
max_res heat.h /^ unsigned max_res; \/\/ spatial resolution$/;" m struct:__anon2
maxiter heat.h /^ unsigned maxiter; \/\/ maximum number of iterations$/;" m struct:__anon2
numsrcs heat.h /^ unsigned numsrcs; \/\/ number of heat sources$/;" m struct:__anon2
posx heat.h /^ float posx;$/;" m struct:__anon1
posy heat.h /^ float posy;$/;" m struct:__anon1
print_params input.c /^void print_params( algoparam_t *param )$/;" f
range heat.h /^ float range;$/;" m struct:__anon1
read_input input.c /^int read_input( FILE *infile, algoparam_t *param )$/;" f
recbuf_bottom heat.h /^ double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;$/;" m struct:__anon2
recbuf_left heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
recbuf_right heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
recbuf_top heat.h /^ double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;$/;" m struct:__anon2
relax_gauss_return_residual relax_gauss.c /^double relax_gauss_return_residual( algoparam_t *param, int interleaving_count, int *coords, MPI_Comm comm2d)$/;" f
relax_jacobi_return_residual relax_jacobi.c /^double relax_jacobi_return_residual( double *u, double *utmp,$/;" f
res_step_size heat.h /^ unsigned res_step_size;$/;" m struct:__anon2
sendbuf_bottom heat.h /^ double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;$/;" m struct:__anon2
sendbuf_left heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
sendbuf_right heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
sendbuf_top heat.h /^ double *sendbuf_top, *recbuf_top, *sendbuf_bottom, *recbuf_bottom;$/;" m struct:__anon2
temp heat.h /^ float temp;$/;" m struct:__anon1
thread_dims heat.h /^ int thread_dims[2]; \/\/x*y dimensions of thread$/;" m struct:__anon2
u heat.h /^ double *u, *uhelp;$/;" m struct:__anon2
uhelp heat.h /^ double *u, *uhelp;$/;" m struct:__anon2
usage heat.c /^void usage( char *s )$/;" f
wtime timing.c /^double wtime()$/;" f
1 16 # x and y dimensions for threads
1024 # iterations
256 # initial resolution
4096 # max resolution (spatial resolution)
3840 # resolution step size
1 # Algorithm 0=Jacobi 1=Gauss
2 # number of heat sources
0.0 0.0 1.0 1.0 # (x,y), size temperature
1.0 1.0 1.0 0.5
2 2 # x and y dimensions for threads
50 # iterations
0 # iterations
100 # initial resolution
2900 # max resolution (spatial resolution)
100 # max resolution (spatial resolution)
200 # resolution step size
0 # Algorithm 0=Jacobi 1=Gauss
1 # Algorithm 0=Jacobi 1=Gauss
2 # number of heat sources
0.0 0.0 1.0 1.0 # (x,y), size temperature
1.0 1.0 1.0 0.5
//
// timing.c
//
#include <sys/time.h>
#include "timing.h"
double wtime()
{
struct timeval tv;
gettimeofday(&tv, 0);
return tv.tv_sec+1e-6*tv.tv_usec;
}
//
// timing.h
//
#ifndef TIMING_H_INCLUDED
#define TIMING_H_INCLUDED
double wtime();
#endif // TIMING_H_IINCLUDED
# Intel compiler
CC = mpicc
CFLAGS = -O3
MPICC = mpicc
all: heat
heat : heat.o input.o misc.o timing.o relax_gauss.o relax_jacobi.o
$(CC) $(CFLAGS) -o $@ $+ -lm
%.o : %.c %.h
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f *.o heat *~ *.ppm
remake : clean all
This diff is collapsed.
......@@ -8,6 +8,10 @@
#include <stdio.h>
#include <stdlib.h>
#define FALSE 0
#define TRUE 1
#define INTERLEAVING_COUNT 1024
#include "input.h"
#include "heat.h"
#include "timing.h"
......@@ -25,10 +29,10 @@ int main( int argc, char *argv[] )
unsigned iter;
FILE *infile, *resfile;
char *resfilename;
int periods[2]={false, false};
int periods[2]={FALSE, FALSE};
MPI_Comm comm2d;
int count, rank;
int nthreads, rank;
int thdx, thdy;
double *tmp;
......@@ -50,6 +54,7 @@ int main( int argc, char *argv[] )
if( argc < 2 )
{
usage( argv[0] );
MPI_Finalize();
return 1;
}
......@@ -60,47 +65,37 @@ int main( int argc, char *argv[] )
"\nError: Cannot open \"%s\" for reading.\n\n", argv[1]);
usage(argv[0]);
MPI_Finalize();
return 1;
}
#if 0
// check result file
resfilename= (argc>=3) ? argv[2]:"heat.ppm";
if( !(resfile=fopen(resfilename, "w")) )
{
fprintf(stderr,
"\nError: Cannot open \"%s\" for writing.\n\n",
resfilename);
usage(argv[0]);
return 1;
}
#endif
// check input
if( !read_input(infile, &param) )
{
fprintf(stderr, "\nError: Error parsing input file.\n\n");
usage(argv[0]);
MPI_Finalize();
return 1;
}
MPI_Cart_create(MPI_COMM_WORLD, 2, param.thread_dims, periods, false, &comm2d);
MPI_Cart_coords(comm2d, rank, 2, &coords);
if(myid==0)
print_params(&param);
MPI_Cart_create(MPI_COMM_WORLD, 2, param.thread_dims, periods, FALSE, &comm2d);
MPI_Cart_coords(comm2d, rank, 2, coords);
// set the visualization resolution
param.visres = param.max_res;
if(rank==0)
print_params(&param);
param.u = 0;
param.uhelp = 0;
param.sendbuf_left =0;
param.sendbuf_right = 0;
param.recbuf_left = 0;
param.recbuf_right =0;
param.act_res = param.initial_res;
// loop over different resolutions
while(1) {
......@@ -108,7 +103,7 @@ if(myid==0)
if (param.u != 0)
finalize(&param);
if( !initialize(&param) )
if( !initialize(&param, coords) )
{
fprintf(stderr, "Error in Jacobi initialization.\n\n");
......@@ -121,9 +116,9 @@ if(myid==0)
np = param.act_res + 2;
// starting time
runtime = wtime();
runtime = MPI_Wtime();
residual = 999999999;
iter = 0;
while(1) {
......@@ -138,13 +133,11 @@ if(myid==0)
break;
case 1: // GAUSS
relax_gauss(param.u, np, np);
residual = residual_gauss( param.u, param.uhelp, np, np);
residual = relax_gauss_return_residual(&param, INTERLEAVING_COUNT, coords , comm2d);
break;
}
iter++;
iter+= INTERLEAVING_COUNT;
// solution good enough ?
if (residual < 0.000005) break;
......@@ -155,27 +148,30 @@ if(myid==0)
if (iter % 100 == 0)
fprintf(stderr, "residual %f, %d iterations\n", residual, iter);
}
// Flop count after <i> iterations
flop = iter * 11.0 * param.act_res * param.act_res;
flop = iter * 7.0 * param.act_res * param.act_res;
// stopping time
runtime = wtime() - runtime;
runtime = MPI_Wtime() - runtime;
fprintf(stderr, "Resolution: %5u, ", param.act_res);
fprintf(stderr, "Time: %04.3f ", runtime);
fprintf(stderr, "(%3.3f GFlop => %6.2f MFlop/s, ",
flop/1000000000.0,
flop/runtime/1000000);
fprintf(stderr, "residual %f, %d iterations)\n", residual, iter);
// for plot...
printf("%5d %f\n", param.act_res, flop/runtime/1000000);
if (rank == 0)
{
fprintf(stderr, "Resolution: %5u, ", param.act_res);
fprintf(stderr, "Time: %04.3f ", runtime);
fprintf(stderr, "(%3.3f GFlop => %6.2f MFlop/s, ",
flop/1000000000.0,
flop/runtime/1000000);
fprintf(stderr, "residual %f, %d iterations)\n", residual, iter);
// for plot...
printf("%5d %f\n", param.act_res, flop/runtime/1000000);
}
if (param.act_res + param.res_step_size > param.max_res) break;
param.act_res += param.res_step_size;
}
finalize( &param );
MPI_Finalize();
return 0;
}
/*
* heat.h
*
* Global definitions for the iterative solver
*/
#ifndef JACOBI_H_INCLUDED
#define JACOBI_H_INCLUDED
#include <stdio.h>
#include <mpi.h>
// configuration
typedef struct
{
float posx;
float posy;
float range;
float temp;
}
heatsrc_t;
typedef struct
{
unsigned maxiter; // maximum number of iterations
unsigned act_res;
unsigned max_res; // spatial resolution
unsigned initial_res;
unsigned res_step_size;
unsigned blocksize_x;
unsigned blocksize_y;
int algorithm; // 0=>Jacobi, 1=>Gauss
double *u, *uhelp;
double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;
unsigned numsrcs; // number of heat sources
heatsrc_t *heatsrcs;
int thread_dims[2]; //x*y dimensions of thread
}
algoparam_t;
// function declarations
// misc.c
int initialize( algoparam_t *param, int *coords );
int finalize( algoparam_t *param );
// Gauss-Seidel: relax_gauss.c
double relax_gauss_return_residual( algoparam_t *param, int interleaving_count, int *coords, MPI_Comm comm2d);
// Jacobi: relax_jacobi.c
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey );
#endif // JACOBI_H_INCLUDED
//
// input.c
//
#include <stdio.h>
#include <stdlib.h>
#include "input.h"
#define BUFSIZE 100
int read_input( FILE *infile, algoparam_t *param )
{
int i, n;
char buf[BUFSIZE];
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%d %d", &(param->thread_dims[0]), &(param->thread_dims[1]) );
if( n!=2 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->maxiter) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->initial_res) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->max_res) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->res_step_size) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf(buf, "%d", &(param->algorithm) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf(buf, "%u", &(param->numsrcs) );
if( n!=1 )
return 0;
(param->heatsrcs) =
(heatsrc_t*) malloc( sizeof(heatsrc_t) * (param->numsrcs) );
for( i=0; i<param->numsrcs; i++ )
{
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%f %f %f %f",
&(param->heatsrcs[i].posx),
&(param->heatsrcs[i].posy),
&(param->heatsrcs[i].range),
&(param->heatsrcs[i].temp) );
if( n!=4 )
return 0;
}
return 1;
}
void print_params( algoparam_t *param )
{
int i;
fprintf(stderr, "Resolutions : (%u, %u, ... %u)\n",
param->initial_res,
param->initial_res + param->res_step_size,
param->max_res);
fprintf(stderr, "Iterations : %u\n", param->maxiter);
fprintf(stderr, "Algorithm : %d (%s)\n",
param->algorithm,
(param->algorithm == 0) ? "Jacobi":"Gauss-Jacobi" );
fprintf(stderr, "Num. Heat sources : %u\n", param->numsrcs);
for( i=0; i<param->numsrcs; i++ )
{
fprintf(stderr, " %2d: (%2.2f, %2.2f) %2.2f %2.2f \n",
i+1,
param->heatsrcs[i].posx,
param->heatsrcs[i].posy,
param->heatsrcs[i].range,
param->heatsrcs[i].temp );
}
}
//
// input.h
//
#ifndef INPUT_H_INCLUDED
#define INPUT_H_INCLUDED
#include <stdio.h>
#include "heat.h"
int read_input( FILE *infile, algoparam_t *param );
void print_params( algoparam_t *param );
#endif // INPUT_H_INCLUDED
......@@ -19,7 +19,7 @@
* - allocate memory for matrices
* - set boundary conditions according to configuration
*/
int initialize( algoparam_t *param )
int initialize( algoparam_t *param , int *coords)
{
int i, j;
double dist;
......@@ -27,16 +27,21 @@ int initialize( algoparam_t *param )
// total number of points
const int np = param->act_res + 2;
//
param->blocksize_x=param->act_res / param->thread_dims[0];
param->blocksize_y=param->act_res / param->thread_dims[1];
//
// allocate memory
//
block_size_x=param->act_res / param->thread_dims[0];
block_size_y=param->act_res / param->thread_dims[1];
(param->u) = (double*)calloc( sizeof(double),(block_size_x + 2)*(block_size_y + 2) );
(param->uhelp) = (double*)calloc( sizeof(double),(block_size_x + 2)*(block_size_y + 2) );
if( !(param->u) || !(param->uhelp) )
(param->sendbuf_left) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->recbuf_left) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->sendbuf_right) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->recbuf_right) = (double*)calloc( sizeof(double), param->blocksize_y );
(param->u) = (double*)calloc( sizeof(double),(param->blocksize_x + 2)*(param->blocksize_y + 2) );
(param->uhelp) = (double*)calloc( sizeof(double),(param->blocksize_x + 2)*(param->blocksize_y + 2) );
if( !(param->u) || !(param->uhelp) || !(param->sendbuf_left) || !(param->sendbuf_right) || !(param->recbuf_left) || !(param->recbuf_right) )
{
fprintf(stderr, "Error: Cannot allocate memory\n");
return 0;
......@@ -47,9 +52,9 @@ int initialize( algoparam_t *param )
/* top row */
if(coords[1]==0)
{
for( j=0 ; j<block_size_x + 1 ; j++ )
for( j=0 ; j<param->blocksize_x + 1 ; j++ )
{
dist = sqrt( pow((double)(j + (block_size_x * coords[0]) )/ (double)(np-1) -
dist = sqrt( pow((double)(j + (param->blocksize_x * coords[0]) )/ (double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(param->heatsrcs[i].posy, 2));
......@@ -64,22 +69,22 @@ int initialize( algoparam_t *param )
}
}
/* bottom row */
if(coords[1]==thread_dims[1]-1)
if(coords[1]==param->thread_dims[1]-1)
{
for( j=0; j<block_size_x + 1 ; j++ )
for( j=0; j<param->blocksize_x + 1 ; j++ )
{
dist = sqrt( pow((double)(j + (block_size_x * coords[0]) )/ (double)(np-1) -
dist = sqrt( pow((double)(j + (param->blocksize_x * coords[0]) )/ (double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(1-param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[(block_size_y+1)*(block_size_x+2) + j] +=
(param->u)[(param->blocksize_y+1)*(param->blocksize_x+2) + j] +=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[(block_size_y+1)*(block_size_x+2)+j] = (param->u)[(block_size_y+1)*(block_size_x+2)+j];
(param->uhelp)[(param->blocksize_y+1)*(param->blocksize_x+2)+j] = (param->u)[(param->blocksize_y+1)*(param->blocksize_x+2)+j];
}
}
}
......@@ -87,45 +92,44 @@ int initialize( algoparam_t *param )
if(coords[0]==0)
{
/* leftmost column */
for( j=0; j<block_size_y+1; j++ )
for( j=0; j<param->blocksize_y+1; j++ )
{
dist = sqrt( pow(param->heatsrcs[i].posx, 2)+
pow((double)(j + (block_size_y * coords[1]) ) / (double)(np-1) -
pow((double)(j + (param->blocksize_y * coords[1]) ) / (double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*(block_size_x+2) ]+=
(param->u)[ j*(param->blocksize_x+2) ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*(block_size_x+2) ] = (param->u)[ j*(block_size_x+2) ];
(param->uhelp)[ j*(param->blocksize_x+2) ] = (param->u)[ j*(param->blocksize_x+2) ];
}
}
}
if(coords[0]==1)
if(coords[0]==param->thread_dims[0]-1)
{
/* rightmost column */
for( j=1; j<block_size_y+1; j++ )
for( j=1; j<param->blocksize_y+1; j++ )
{
dist = sqrt( pow(1 - param->heatsrcs[i].posx, 2)+
pow((double)(j + (block_size_y * coords[1]) ) / (double)(np-1) -
pow((double)(j + (param->blocksize_y * coords[1]) ) / (double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*(block_size_x+2)+(block_size_x+1) ]+=
(param->u)[ j*(param->blocksize_x+2)+(param->blocksize_x+1) ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*(block_size_x+2)+(block_size_x+1) ] = (param->u)[ j*(block_size_x+2)+(block_size_x+1) ];
(param->uhelp)[ j*(param->blocksize_x+2)+(param->blocksize_x+1) ] = (param->u)[ j*(param->blocksize_x+2)+(param->blocksize_x+1) ];
}
}
}
}
return 1;
}
......@@ -134,16 +138,42 @@ int initialize( algoparam_t *param )
*/
int finalize( algoparam_t *param )
{
if( param->u ) {
free(param->u);
param->u = 0;
if( param->u )
{
free(param->u);
param->u = 0;
}
if( param->uhelp ) {
free(param->uhelp);
param->uhelp = 0;
if( param->uhelp )
{
free(param->uhelp);
param->uhelp = 0;
}
if ( param->sendbuf_left )
{
free(param->sendbuf_left);
param->sendbuf_left = 0;
}
if ( param->sendbuf_right )
{
free(param->sendbuf_right);
param->sendbuf_right = 0;
}
if ( param->recbuf_left )
{
free(param->recbuf_left);
param->recbuf_left = 0;
}
if ( param->recbuf_right )
{
free(param->recbuf_right);
param->recbuf_right = 0;
}
return 1;
}
#!/usr/bin/perl
#
# pfmon2cg, version 0.02
# Converter for pfmon output to Cachegrind profile format
#
# 4/2007, Josef Weidendorfer
#
#
# Read in pfmon data, e.g. written from the command
#
# pfmon --with-header --smpl-outfile=pfmon.out \
# -e l2_misses --short-smpl-periods=100000 -- <command>
# (sampling points every 100000 L2 Misses)
#
# This gives a list of sampled instruction addresses ($ipps)
while(<>) {
if (/^#/) {
if (/command line\s*:\s*(.*)$/) { $command = $1; }
if (/PMD\d+: (.*?)\s*(,|=)/) { $events .= "$1 "; }
next;
}
if (/^entry/) {
($iip) = ($entry =~ /IIP:(\S+)/);
if ($iip ne "") { $iips .= "$iip\n"; }
$entry = "";
}
chomp;
$entry .= $_;
}
($iip) = ($entry =~ /IIP:(\S+)/);
if ($iip ne "") { $iips .= "$iip\n"; }
($pid) = ($entry =~ /PID:(\S+)/);
open OUT, ">.p2cg";
print OUT $iips;
close OUT;
# Let addr2line convert the instruction addresses into
# source code references (needs debug info)
#
# This creates hashes with event sums
# Strip command line arguments
$onlycommand = $command;
$onlycommand =~ s/\s.*//;
$cmd = "addr2line -f -e $onlycommand < .p2cg";
#print "#Run Cmd $cmd\n";
open ATOL, "$cmd < .p2cg|";
while(<ATOL>) {
chomp;
$func = $_;
$fileline = <ATOL>;
chomp $fileline;
($file,$line) = ($fileline =~ /(.*?):(.*)/);
#print "Func $func, file $file, line $line\n";
if ($file{$func} eq "") {
push @funcs, $func;
$file{$func} = $file;
}
if ($count{$func,$line} eq "") {
$tmp = $lines->{$func};
$tmp = [] unless defined $tmp;
push @$tmp, $line;
$lines->{$func} = $tmp;
}
$count{$func,$line}++;
$totals++;
}
close ATOL;
#
# Dump out Cachegrind format
#
# First header info...
print "version: 1\n";
print "creator: pf2mon 0.02\n";
print "pid: $pid\n";
print "cmd: $command\n\n";
print "positions: line\n";
print "events: $events\n";
print "summary: $totals\n\n";
# Now profiling data.
#
# We only support static linked binaries for now, so all code
# is in the binary
print "ob=$onlycommand\n";
foreach $f (@funcs) {
print "\nfl=$file{$f}\n";
print "fn=$f\n";
$tmp = $lines->{$f};
foreach $l (@$tmp) {
print "$l $count{$f,$l}\n";
}
}
This diff is collapsed.
/*
* relax_jacobi.c
*
* Jacobi Relaxation
*
*/
#include "heat.h"
#if 0
/*
* Residual (length of error vector)
* between current solution and next after a Jacobi step
*/
double residual_jacobi( double *u,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
unew = 0.25 * (u[ i*sizex + (j-1) ]+ // left
u[ i*sizex + (j+1) ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
diff = unew - u[i*sizex + j];
sum += diff * diff;
}
}
return sum;
}
#endif
/*
* One Jacobi iteration step
*/
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey )
{
int i, j;
double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
{
utmp[i*sizex + j]= 0.25 * (u[ i*sizex+j -1 ]+ // left
u[ i*sizex+j +1 ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
diff = utmp[i*sizex + j] - u[i*sizex +j];
sum += diff * diff;
}
}
}
return sum;
}
!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/
!_TAG_PROGRAM_NAME Exuberant Ctags //
!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/
!_TAG_PROGRAM_VERSION 5.9~svn20110310 //
BUFSIZE input.c 10;" d file:
CC Makefile /^CC = mpicc$/;" m
CFLAGS Makefile /^CFLAGS = -O3$/;" m
FALSE heat.c 11;" d file:
INPUT_H_INCLUDED input.h 6;" d
INTERLEAVING_COUNT heat.c 13;" d file:
JACOBI_H_INCLUDED heat.h 8;" d
MPICC Makefile /^MPICC = mpicc$/;" m
TIMING_H_INCLUDED timing.h 6;" d
TRUE heat.c 12;" d file:
act_res heat.h /^ unsigned act_res;$/;" m struct:__anon2
algoparam_t heat.h /^algoparam_t;$/;" t typeref:struct:__anon2
algorithm heat.h /^ int algorithm; \/\/ 0=>Jacobi, 1=>Gauss$/;" m struct:__anon2
blocksize_x heat.h /^ unsigned blocksize_x;$/;" m struct:__anon2
blocksize_y heat.h /^ unsigned blocksize_y;$/;" m struct:__anon2
finalize misc.c /^int finalize( algoparam_t *param )$/;" f
get_rank_from_2dcoords relax_gauss.c /^int get_rank_from_2dcoords(int coord_x, int coord_y, int *thread_dims, MPI_Comm comm2d)$/;" f
heatsrc_t heat.h /^heatsrc_t;$/;" t typeref:struct:__anon1
heatsrcs heat.h /^ heatsrc_t *heatsrcs;$/;" m struct:__anon2
initial_res heat.h /^ unsigned initial_res;$/;" m struct:__anon2
initialize misc.c /^int initialize( algoparam_t *param , int *coords)$/;" f
main heat.c /^int main( int argc, char *argv[] )$/;" f
max_res heat.h /^ unsigned max_res; \/\/ spatial resolution$/;" m struct:__anon2
maxiter heat.h /^ unsigned maxiter; \/\/ maximum number of iterations$/;" m struct:__anon2
numsrcs heat.h /^ unsigned numsrcs; \/\/ number of heat sources$/;" m struct:__anon2
posx heat.h /^ float posx;$/;" m struct:__anon1
posy heat.h /^ float posy;$/;" m struct:__anon1
print_params input.c /^void print_params( algoparam_t *param )$/;" f
range heat.h /^ float range;$/;" m struct:__anon1
read_input input.c /^int read_input( FILE *infile, algoparam_t *param )$/;" f
recbuf_left heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
recbuf_right heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
relax_gauss_return_residual relax_gauss.c /^double relax_gauss_return_residual( algoparam_t *param, int interleaving_count, int *coords, MPI_Comm comm2d)$/;" f
relax_jacobi_return_residual relax_jacobi.c /^double relax_jacobi_return_residual( double *u, double *utmp,$/;" f
res_step_size heat.h /^ unsigned res_step_size;$/;" m struct:__anon2
sendbuf_left heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
sendbuf_right heat.h /^ double *sendbuf_left, *recbuf_left, *sendbuf_right, *recbuf_right;$/;" m struct:__anon2
temp heat.h /^ float temp;$/;" m struct:__anon1
thread_dims heat.h /^ int thread_dims[2]; \/\/x*y dimensions of thread$/;" m struct:__anon2
u heat.h /^ double *u, *uhelp;$/;" m struct:__anon2
uhelp heat.h /^ double *u, *uhelp;$/;" m struct:__anon2
usage heat.c /^void usage( char *s )$/;" f
wtime timing.c /^double wtime()$/;" f
16 1 # x and y dimensions for threads
1024 # iterations
256 # initial resolution
4096 # max resolution (spatial resolution)
3840 # resolution step size
1 # Algorithm 0=Jacobi 1=Gauss
2 # number of heat sources
0.0 0.0 1.0 1.0 # (x,y), size temperature
1.0 1.0 1.0 0.5
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment