Commit b7b0f5df authored by Gaurav Kukreja's avatar Gaurav Kukreja

Created a copy of sequential code in head-parallel

Signed-off-by: 's avatarGaurav Kukreja <mailme.gaurav@gmail.com>
parent a2c0ce6e
# Intel compiler
CC = icc
CFLAGS = -g
MPICC = mpicc
all: heat
heat : heat.o input.o misc.o timing.o relax_gauss.o relax_jacobi.o
$(CC) $(CFLAGS) -o $@ $+ -lm
%.o : %.c %.h
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f *.o heat *~ *.ppm
remake : clean all
This diff is collapsed.
/*
* heat.h
*
* Iterative solver for heat distribution
*/
#include <stdio.h>
#include <stdlib.h>
#include "input.h"
#include "heat.h"
#include "timing.h"
void usage( char *s )
{
fprintf(stderr,
"Usage: %s <input file> [result file]\n\n", s);
}
int main( int argc, char *argv[] )
{
unsigned iter;
FILE *infile, *resfile;
char *resfilename;
double *tmp;
// algorithmic parameters
algoparam_t param;
int np;
double runtime, flop;
double residual;
// check arguments
if( argc < 2 )
{
usage( argv[0] );
return 1;
}
// check input file
if( !(infile=fopen(argv[1], "r")) )
{
fprintf(stderr,
"\nError: Cannot open \"%s\" for reading.\n\n", argv[1]);
usage(argv[0]);
return 1;
}
// check result file
resfilename= (argc>=3) ? argv[2]:"heat.ppm";
if( !(resfile=fopen(resfilename, "w")) )
{
fprintf(stderr,
"\nError: Cannot open \"%s\" for writing.\n\n",
resfilename);
usage(argv[0]);
return 1;
}
// check input
if( !read_input(infile, &param) )
{
fprintf(stderr, "\nError: Error parsing input file.\n\n");
usage(argv[0]);
return 1;
}
print_params(&param);
// set the visualization resolution
param.visres = param.max_res;
param.u = 0;
param.uhelp = 0;
param.uvis = 0;
param.act_res = param.initial_res;
// loop over different resolutions
while(1) {
// free allocated memory of previous experiment
if (param.u != 0)
finalize(&param);
if( !initialize(&param) )
{
fprintf(stderr, "Error in Jacobi initialization.\n\n");
usage(argv[0]);
}
fprintf(stderr, "Resolution: %5u\r", param.act_res);
// full size (param.act_res are only the inner points)
np = param.act_res + 2;
// starting time
runtime = wtime();
residual = 999999999;
iter = 0;
while(1) {
switch( param.algorithm ) {
case 0: // JACOBI
residual = relax_jacobi_return_residual(param.u, param.uhelp, np, np);
tmp = param.u;
param.u = param.uhelp;
param.uhelp = tmp;
break;
case 1: // GAUSS
relax_gauss(param.u, np, np);
residual = residual_gauss( param.u, param.uhelp, np, np);
break;
}
iter++;
// solution good enough ?
if (residual < 0.000005) break;
// max. iteration reached ? (no limit with maxiter=0)
if (param.maxiter>0 && iter>=param.maxiter) break;
if (iter % 100 == 0)
fprintf(stderr, "residual %f, %d iterations\n", residual, iter);
}
// Flop count after <i> iterations
flop = iter * 11.0 * param.act_res * param.act_res;
// stopping time
runtime = wtime() - runtime;
fprintf(stderr, "Resolution: %5u, ", param.act_res);
fprintf(stderr, "Time: %04.3f ", runtime);
fprintf(stderr, "(%3.3f GFlop => %6.2f MFlop/s, ",
flop/1000000000.0,
flop/runtime/1000000);
fprintf(stderr, "residual %f, %d iterations)\n", residual, iter);
// for plot...
printf("%5d %f\n", param.act_res, flop/runtime/1000000);
if (param.act_res + param.res_step_size > param.max_res) break;
param.act_res += param.res_step_size;
}
coarsen( param.u, np, np,
param.uvis, param.visres+2, param.visres+2 );
write_image( resfile, param.uvis,
param.visres+2,
param.visres+2 );
finalize( &param );
return 0;
}
/*
* heat.h
*
* Global definitions for the iterative solver
*/
#ifndef JACOBI_H_INCLUDED
#define JACOBI_H_INCLUDED
#include <stdio.h>
// configuration
typedef struct
{
float posx;
float posy;
float range;
float temp;
}
heatsrc_t;
typedef struct
{
unsigned maxiter; // maximum number of iterations
unsigned act_res;
unsigned max_res; // spatial resolution
unsigned initial_res;
unsigned res_step_size;
int algorithm; // 0=>Jacobi, 1=>Gauss
unsigned visres; // visualization resolution
double *u, *uhelp;
double *uvis;
unsigned numsrcs; // number of heat sources
heatsrc_t *heatsrcs;
}
algoparam_t;
// function declarations
// misc.c
int initialize( algoparam_t *param );
int finalize( algoparam_t *param );
void write_image( FILE * f, double *u,
unsigned sizex, unsigned sizey );
int coarsen(double *uold, unsigned oldx, unsigned oldy ,
double *unew, unsigned newx, unsigned newy );
// Gauss-Seidel: relax_gauss.c
double residual_gauss( double *u, double *utmp,
unsigned sizex, unsigned sizey );
void relax_gauss( double *u,
unsigned sizex, unsigned sizey );
// Jacobi: relax_jacobi.c
#if 0
double residual_jacobi( double *u,
unsigned sizex, unsigned sizey );
#endif
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey );
#endif // JACOBI_H_INCLUDED
//
// input.c
//
#include <stdio.h>
#include <stdlib.h>
#include "input.h"
#define BUFSIZE 100
int read_input( FILE *infile, algoparam_t *param )
{
int i, n;
char buf[BUFSIZE];
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->maxiter) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->initial_res) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->max_res) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->res_step_size) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf(buf, "%d", &(param->algorithm) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf(buf, "%u", &(param->numsrcs) );
if( n!=1 )
return 0;
(param->heatsrcs) =
(heatsrc_t*) malloc( sizeof(heatsrc_t) * (param->numsrcs) );
for( i=0; i<param->numsrcs; i++ )
{
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%f %f %f %f",
&(param->heatsrcs[i].posx),
&(param->heatsrcs[i].posy),
&(param->heatsrcs[i].range),
&(param->heatsrcs[i].temp) );
if( n!=4 )
return 0;
}
return 1;
}
void print_params( algoparam_t *param )
{
int i;
fprintf(stderr, "Resolutions : (%u, %u, ... %u)\n",
param->initial_res,
param->initial_res + param->res_step_size,
param->max_res);
fprintf(stderr, "Iterations : %u\n", param->maxiter);
fprintf(stderr, "Algorithm : %d (%s)\n",
param->algorithm,
(param->algorithm == 0) ? "Jacobi":"Gauss-Jacobi" );
fprintf(stderr, "Num. Heat sources : %u\n", param->numsrcs);
for( i=0; i<param->numsrcs; i++ )
{
fprintf(stderr, " %2d: (%2.2f, %2.2f) %2.2f %2.2f \n",
i+1,
param->heatsrcs[i].posx,
param->heatsrcs[i].posy,
param->heatsrcs[i].range,
param->heatsrcs[i].temp );
}
}
//
// input.h
//
#ifndef INPUT_H_INCLUDED
#define INPUT_H_INCLUDED
#include <stdio.h>
#include "heat.h"
int read_input( FILE *infile, algoparam_t *param );
void print_params( algoparam_t *param );
#endif // INPUT_H_INCLUDED
/*
* misc.c
*
* Helper functions for
* - initialization
* - finalization,
* - writing out a picture
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include "heat.h"
/*
* Initialize the iterative solver
* - allocate memory for matrices
* - set boundary conditions according to configuration
*/
int initialize( algoparam_t *param )
{
int i, j;
double dist;
// total number of points (including border)
const int np = param->act_res + 2;
//
// allocate memory
//
(param->u) = (double*)calloc( sizeof(double),np*np );
(param->uhelp) = (double*)calloc( sizeof(double),np*np );
(param->uvis) = (double*)calloc( sizeof(double),
(param->visres+2) *
(param->visres+2) );
if( !(param->u) || !(param->uhelp) || !(param->uvis) )
{
fprintf(stderr, "Error: Cannot allocate memory\n");
return 0;
}
for( i=0; i<param->numsrcs; i++ )
{
/* top row */
for( j=0; j<np; j++ )
{
dist = sqrt( pow((double)j/(double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[j] +=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[j] = (param->u)[j];
}
}
/* bottom row */
for( j=0; j<np; j++ )
{
dist = sqrt( pow((double)j/(double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(1-param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[(np-1)*np+j]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[(np-1)*np+j] = (param->u)[(np-1)*np+j];
}
}
/* leftmost column */
for( j=1; j<np-1; j++ )
{
dist = sqrt( pow(param->heatsrcs[i].posx, 2)+
pow((double)j/(double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*np ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*np ] = (param->u)[ j*np ];
}
}
/* rightmost column */
for( j=1; j<np-1; j++ )
{
dist = sqrt( pow(1-param->heatsrcs[i].posx, 2)+
pow((double)j/(double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*np+(np-1) ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*np+(np-1) ] = (param->u)[ j*np+(np-1) ];
}
}
}
return 1;
}
/*
* free used memory
*/
int finalize( algoparam_t *param )
{
if( param->u ) {
free(param->u);
param->u = 0;
}
if( param->uhelp ) {
free(param->uhelp);
param->uhelp = 0;
}
if( param->uvis ) {
free(param->uvis);
param->uvis = 0;
}
return 1;
}
/*
* write the given temperature u matrix to rgb values
* and write the resulting image to file f
*/
void write_image( FILE * f, double *u,
unsigned sizex, unsigned sizey )
{
// RGB table
unsigned char r[1024], g[1024], b[1024];
int i, j, k;
double min, max;
j=1023;
// prepare RGB table
for( i=0; i<256; i++ )
{
r[j]=255; g[j]=i; b[j]=0;
j--;
}
for( i=0; i<256; i++ )
{
r[j]=255-i; g[j]=255; b[j]=0;
j--;
}
for( i=0; i<256; i++ )
{
r[j]=0; g[j]=255; b[j]=i;
j--;
}
for( i=0; i<256; i++ )
{
r[j]=0; g[j]=255-i; b[j]=255;
j--;
}
min=DBL_MAX;
max=-DBL_MAX;
// find minimum and maximum
for( i=0; i<sizey; i++ )
{
for( j=0; j<sizex; j++ )
{
if( u[i*sizex+j]>max )
max=u[i*sizex+j];
if( u[i*sizex+j]<min )
min=u[i*sizex+j];
}
}
fprintf(f, "P3\n");
fprintf(f, "%u %u\n", sizex, sizey);
fprintf(f, "%u\n", 255);
for( i=0; i<sizey; i++ )
{
for( j=0; j<sizex; j++ )
{
k=(int)(1024.0*(u[i*sizex+j]-min)/(max-min));
fprintf(f, "%d %d %d ", r[k], g[k], b[k]);
}
fprintf(f, "\n");
}
}
int coarsen( double *uold, unsigned oldx, unsigned oldy ,
double *unew, unsigned newx, unsigned newy )
{
int i, j;
int stepx;
int stepy;
int stopx = newx;
int stopy = newy;
if (oldx>newx)
stepx=oldx/newx;
else {
stepx=1;
stopx=oldx;
}
if (oldy>newy)
stepy=oldy/newy;
else {
stepy=1;
stopy=oldy;
}
// NOTE: this only takes the top-left corner,
// and doesnt' do any real coarsening
for( i=0; i<stopy-1; i++ )
{
for( j=0; j<stopx-1; j++ )
{
unew[i*newx+j]=uold[i*oldx*stepy+j*stepx];
}
}
return 1;
}
#!/usr/bin/perl
#
# pfmon2cg, version 0.02
# Converter for pfmon output to Cachegrind profile format
#
# 4/2007, Josef Weidendorfer
#
#
# Read in pfmon data, e.g. written from the command
#
# pfmon --with-header --smpl-outfile=pfmon.out \
# -e l2_misses --short-smpl-periods=100000 -- <command>
# (sampling points every 100000 L2 Misses)
#
# This gives a list of sampled instruction addresses ($ipps)
while(<>) {
if (/^#/) {
if (/command line\s*:\s*(.*)$/) { $command = $1; }
if (/PMD\d+: (.*?)\s*(,|=)/) { $events .= "$1 "; }
next;
}
if (/^entry/) {
($iip) = ($entry =~ /IIP:(\S+)/);
if ($iip ne "") { $iips .= "$iip\n"; }
$entry = "";
}
chomp;
$entry .= $_;
}
($iip) = ($entry =~ /IIP:(\S+)/);
if ($iip ne "") { $iips .= "$iip\n"; }
($pid) = ($entry =~ /PID:(\S+)/);
open OUT, ">.p2cg";
print OUT $iips;
close OUT;
# Let addr2line convert the instruction addresses into
# source code references (needs debug info)
#
# This creates hashes with event sums
# Strip command line arguments
$onlycommand = $command;
$onlycommand =~ s/\s.*//;
$cmd = "addr2line -f -e $onlycommand < .p2cg";
#print "#Run Cmd $cmd\n";
open ATOL, "$cmd < .p2cg|";
while(<ATOL>) {
chomp;
$func = $_;
$fileline = <ATOL>;
chomp $fileline;
($file,$line) = ($fileline =~ /(.*?):(.*)/);
#print "Func $func, file $file, line $line\n";
if ($file{$func} eq "") {
push @funcs, $func;
$file{$func} = $file;
}
if ($count{$func,$line} eq "") {
$tmp = $lines->{$func};
$tmp = [] unless defined $tmp;
push @$tmp, $line;
$lines->{$func} = $tmp;
}
$count{$func,$line}++;
$totals++;
}
close ATOL;
#
# Dump out Cachegrind format
#
# First header info...
print "version: 1\n";
print "creator: pf2mon 0.02\n";
print "pid: $pid\n";
print "cmd: $command\n\n";
print "positions: line\n";
print "events: $events\n";
print "summary: $totals\n\n";
# Now profiling data.
#
# We only support static linked binaries for now, so all code
# is in the binary
print "ob=$onlycommand\n";
foreach $f (@funcs) {
print "\nfl=$file{$f}\n";
print "fn=$f\n";
$tmp = $lines->{$f};
foreach $l (@$tmp) {
print "$l $count{$f,$l}\n";
}
}
/*
* relax_gauss.c
*
* Gauss-Seidel Relaxation
*
*/
#include "heat.h"
/*
* Residual (length of error vector)
* between current solution and next after a Gauss-Seidel step
*
* Temporary array utmp needed to not change current solution
*
* Flop count in inner body is 7
*/
double residual_gauss( double *u, double *utmp,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
double unew, diff, sum=0.0;
// first row (boundary condition) into utmp
for( j=1; j<sizex-1; j++ )
utmp[0*sizex + j] = u[0*sizex + j];
// first column (boundary condition) into utmp
for( i=1; i<sizey-1; i++ )
utmp[i*sizex + 0] = u[i*sizex + 0];
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
unew = 0.25 * (utmp[ i*sizex + (j-1) ]+ // new left
u [ i*sizex + (j+1) ]+ // right
utmp[ (i-1)*sizex + j ]+ // new top
u [ (i+1)*sizex + j ]); // bottom
diff = unew - u[i*sizex + j];
sum += diff * diff;
utmp[i*sizex + j] = unew;
}
}
return sum;
}
/*
* One Gauss-Seidel iteration step
*
* Flop count in inner body is 4
*/
void relax_gauss( double *u,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
u[i*sizex+j]= 0.25 * (u[ i*sizex + (j-1) ]+
u[ i*sizex + (j+1) ]+
u[ (i-1)*sizex + j ]+
u[ (i+1)*sizex + j ]);
}
}
}
/*
* relax_jacobi.c
*
* Jacobi Relaxation
*
*/
#include "heat.h"
#if 0
/*
* Residual (length of error vector)
* between current solution and next after a Jacobi step
*/
double residual_jacobi( double *u,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
unew = 0.25 * (u[ i*sizex + (j-1) ]+ // left
u[ i*sizex + (j+1) ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
diff = unew - u[i*sizex + j];
sum += diff * diff;
}
}
return sum;
}
#endif
/*
* One Jacobi iteration step
*/
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey )
{
int i, j;
double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
{
utmp[i*sizex + j]= 0.25 * (u[ i*sizex+j -1 ]+ // left
u[ i*sizex+j +1 ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
diff = utmp[i*sizex + j] - u[i*sizex +j];
sum += diff * diff;
}
}
}
return sum;
}
50 # iterations
100 # initial resolution
2900 # max resolution (spatial resolution)
200 # resolution step size
0 # Algorithm 0=Jacobi 1=Gauss
2 # number of heat sources
0.0 0.0 1.0 1.0 # (x,y), size temperature
1.0 1.0 1.0 0.5
0 # iterations
100 # initial resolution
100 # max resolution (spatial resolution)
200 # resolution step size
1 # Algorithm 0=Jacobi 1=Gauss
2 # number of heat sources
0.0 0.0 1.0 1.0 # (x,y), size temperature
1.0 1.0 1.0 0.5
//
// timing.c
//
#include <sys/time.h>
#include "timing.h"
double wtime()
{
struct timeval tv;
gettimeofday(&tv, 0);
return tv.tv_sec+1e-6*tv.tv_usec;
}
//
// timing.h
//
#ifndef TIMING_H_INCLUDED
#define TIMING_H_INCLUDED
double wtime();
#endif // TIMING_H_IINCLUDED
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment