Commit b7b0f5df authored by Gaurav Kukreja's avatar Gaurav Kukreja

Created a copy of sequential code in head-parallel

Signed-off-by: 's avatarGaurav Kukreja <mailme.gaurav@gmail.com>
parent a2c0ce6e
# Intel compiler
CC = icc
CFLAGS = -g
MPICC = mpicc
all: heat
heat : heat.o input.o misc.o timing.o relax_gauss.o relax_jacobi.o
$(CC) $(CFLAGS) -o $@ $+ -lm
%.o : %.c %.h
$(CC) $(CFLAGS) -c -o $@ $<
clean:
rm -f *.o heat *~ *.ppm
remake : clean all
#! /usr/bin/perl -w
##--------------------------------------------------------------------##
##--- The cache simulation framework: instrumentation, recording ---##
##--- and results printing. ---##
##--- callgrind_annotate ---##
##--------------------------------------------------------------------##
# This file is part of Callgrind, a cache-simulator and call graph
# tracer built on Valgrind.
#
# Copyright (C) 2003 Josef Weidendorfer
# Josef.Weidendorfer@gmx.de
#
# This file is based heavily on vg_annotate, part of Valgrind.
# Copyright (C) 2002 Nicholas Nethercote
# njn25@cam.ac.uk
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# 02111-1307, USA.
#
# The GNU General Public License is contained in the file COPYING.
#----------------------------------------------------------------------------
# Annotator for cachegrind/callgrind.
#
# File format is described in /docs/techdocs.html.
#
# Performance improvements record, using cachegrind.out for cacheprof, doing no
# source annotation (irrelevant ones removed):
# user time
# 1. turned off warnings in add_hash_a_to_b() 3.81 --> 3.48s
# [now add_array_a_to_b()]
# 6. make line_to_CC() return a ref instead of a hash 3.01 --> 2.77s
#
#10. changed file format to avoid file/fn name repetition 2.40s
# (not sure why higher; maybe due to new '.' entries?)
#11. changed file format to drop unnecessary end-line "."s 2.36s
# (shrunk file by about 37%)
#12. switched from hash CCs to array CCs 1.61s
#13. only adding b[i] to a[i] if b[i] defined (was doing it if
# either a[i] or b[i] was defined, but if b[i] was undefined
# it just added 0) 1.48s
#14. Stopped converting "." entries to undef and then back 1.16s
#15. Using foreach $i (x..y) instead of for ($i = 0...) in
# add_array_a_to_b() 1.11s
#
# Auto-annotating primes:
#16. Finding count lengths by int((length-1)/3), not by
# commifying (halves the number of commify calls) 1.68s --> 1.47s
use strict;
#----------------------------------------------------------------------------
# Overview: the running example in the comments is for:
# - events = A,B,C,D
# - --show=C,A,D
# - --sort=D,C
#----------------------------------------------------------------------------
#----------------------------------------------------------------------------
# Global variables, main data structures
#----------------------------------------------------------------------------
# CCs are arrays, the counts corresponding to @events, with 'undef'
# representing '.'. This makes things fast (faster than using hashes for CCs)
# but we have to use @sort_order and @show_order below to handle the --sort and
# --show options, which is a bit tricky.
#----------------------------------------------------------------------------
# Total counts for summary (an array reference).
my $summary_CC;
# Totals for each function, for overall summary.
# hash(filename:fn_name => CC array)
my %fn_totals;
# Individual CCs, organised by filename and line_num for easy annotation.
# hash(filename => hash(line_num => CC array))
my %all_ind_CCs;
# Files chosen for annotation on the command line.
# key = basename (trimmed of any directory), value = full filename
my %user_ann_files;
# Generic description string.
my $desc = "";
# Command line of profiled program.
my $cmd = "";
# Info on the profiled process.
my $creator = "";
my $pid = "";
my $part = "";
my $thread = "";
# Positions used for cost lines; default: line numbers
my $has_line = 1;
my $has_addr = 0;
# Events in input file, eg. (A,B,C,D)
my @events;
my $events;
# Events to show, from command line, eg. (C,A,D)
my @show_events;
# Map from @show_events indices to @events indices, eg. (2,0,3). Gives the
# order in which we must traverse @events in order to show the @show_events,
# eg. (@events[$show_order[1]], @events[$show_order[2]]...) = @show_events.
# (Might help to think of it like a hash (0 => 2, 1 => 0, 2 => 3).)
my @show_order;
# Print out the function totals sorted by these events, eg. (D,C).
my @sort_events;
# Map from @sort_events indices to @events indices, eg. (3,2). Same idea as
# for @show_order.
my @sort_order;
# Thresholds, one for each sort event (or default to 1 if no sort events
# specified). We print out functions and do auto-annotations until we've
# handled this proportion of all the events thresholded.
my @thresholds;
my $default_threshold = 99;
my $single_threshold = $default_threshold;
# If on, automatically annotates all files that are involved in getting over
# all the threshold counts.
my $auto_annotate = 0;
# Number of lines to show around each annotated line.
my $context = 8;
# Directories in which to look for annotation files.
my @include_dirs = ("");
# Verbose mode
my $verbose = "1";
# Inclusive statistics (with subroutine events)
my $inclusive = 0;
# Inclusive totals for each function, for overall summary.
# hash(filename:fn_name => CC array)
my %cfn_totals;
# hash( file:func => [ called file:func ])
my $called_funcs;
# hash( file:func => [ calling file:func ])
my $calling_funcs;
# hash( file:func,line => [called file:func ])
my $called_from_line;
# hash( file:func,line => file:func
my %func_of_line;
# hash (file:func => object name)
my %obj_name;
# Print out the callers of a function
my $tree_caller = 0;
# Print out the called functions
my $tree_calling = 0;
# hash( file:func,cfile:cfunc => call CC[])
my %call_CCs;
# hash( file:func,cfile:cfunc => call counter)
my %call_counter;
# hash(context, index) => realname for compressed traces
my %compressed;
# Input file name, will be set in process_cmd_line
my $input_file = "";
# Version number
my $version = "3.3.0.SVN";
# Usage message.
my $usage = <<END
usage: callgrind_annotate [options] [data-file [source-files]]
options for the user, with defaults in [ ], are:
-h --help show this message
-v --version show version
--show=A,B,C only show figures for events A,B,C [all]
--sort=A,B,C sort columns by events A,B,C [event column order]
--threshold=<0--100> percentage of counts (of primary sort event) we
are interested in [$default_threshold%]
--auto=yes|no annotate all source files containing functions
that helped reach the event count threshold [no]
--context=N print N lines of context before and after
annotated lines [8]
--inclusive=yes|no add subroutine costs to functions calls [no]
--tree=none|caller| print for each function their callers,
calling|both the called functions or both [none]
-I --include=<dir> add <dir> to list of directories to search for
source files
END
;
# Used in various places of output.
my $fancy = '-' x 80 . "\n";
#-----------------------------------------------------------------------------
# Argument and option handling
#-----------------------------------------------------------------------------
sub process_cmd_line()
{
for my $arg (@ARGV) {
# Option handling
if ($arg =~ /^-/) {
# --version
if ($arg =~ /^-v$|^--version$/) {
die("callgrind_annotate-$version\n");
# --show=A,B,C
} elsif ($arg =~ /^--show=(.*)$/) {
@show_events = split(/,/, $1);
# --sort=A,B,C
} elsif ($arg =~ /^--sort=(.*)$/) {
@sort_events = split(/,/, $1);
foreach my $i (0 .. scalar @sort_events - 1) {
if ($sort_events[$i] =~#/.*:(\d+)$/) {
/.*:([\d\.]+)%?$/) {
my $th = $1;
($th >= 0 && $th <= 100) or die($usage);
$sort_events[$i] =~ s/:.*//;
$thresholds[$i] = $th;
} else {
$thresholds[$i] = 0;
}
}
# --threshold=X (tolerates a trailing '%')
} elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) {
$single_threshold = $1;
($1 >= 0 && $1 <= 100) or die($usage);
# --auto=yes|no
} elsif ($arg =~ /^--auto=(yes|no)$/) {
$auto_annotate = 1 if ($1 eq "yes");
$auto_annotate = 0 if ($1 eq "no");
# --context=N
} elsif ($arg =~ /^--context=([\d\.]+)$/) {
$context = $1;
if ($context < 0) {
die($usage);
}
# --inclusive=yes|no
} elsif ($arg =~ /^--inclusive=(yes|no)$/) {
$inclusive = 1 if ($1 eq "yes");
$inclusive = 0 if ($1 eq "no");
# --tree=none|caller|calling|both
} elsif ($arg =~ /^--tree=(none|caller|calling|both)$/) {
$tree_caller = 1 if ($1 eq "caller" || $1 eq "both");
$tree_calling = 1 if ($1 eq "calling" || $1 eq "both");
# --include=A,B,C
} elsif ($arg =~ /^(-I|--include)=(.*)$/) {
my $inc = $2;
$inc =~ s|/$||; # trim trailing '/'
push(@include_dirs, "$inc/");
} else { # -h and --help fall under this case
die($usage);
}
# Argument handling -- annotation file checking and selection.
# Stick filenames into a hash for quick 'n easy lookup throughout
} else {
if ($input_file eq "") {
$input_file = $arg;
}
else {
my $readable = 0;
foreach my $include_dir (@include_dirs) {
if (-r $include_dir . $arg) {
$readable = 1;
}
}
$readable or die("File $arg not found in any of: @include_dirs\n");
$user_ann_files{$arg} = 1;
}
}
}
if ($input_file eq "") {
$input_file = (<callgrind.out*>)[0];
if (!defined $input_file) {
$input_file = (<cachegrind.out*>)[0];
}
(defined $input_file) or die($usage);
print "Reading data from '$input_file'...\n";
}
}
#-----------------------------------------------------------------------------
# Reading of input file
#-----------------------------------------------------------------------------
sub max ($$)
{
my ($x, $y) = @_;
return ($x > $y ? $x : $y);
}
# Add the two arrays; any '.' entries are ignored. Two tricky things:
# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn
# off warnings to allow this. This makes things about 10% faster than
# checking for definedness ourselves.
# 2. We don't add an undefined count or a ".", even though it's value is 0,
# because we don't want to make an $a2->[$i] that is undef become 0
# unnecessarily.
sub add_array_a_to_b ($$)
{
my ($a1, $a2) = @_;
my $n = max(scalar @$a1, scalar @$a2);
$^W = 0;
foreach my $i (0 .. $n-1) {
$a2->[$i] += $a1->[$i] if (defined $a1->[$i] && "." ne $a1->[$i]);
}
$^W = 1;
}
# Add each event count to the CC array. '.' counts become undef, as do
# missing entries (implicitly).
sub line_to_CC ($)
{
my @CC = (split /\s+/, $_[0]);
(@CC <= @events) or die("Line $.: too many event counts\n");
return \@CC;
}
sub uncompressed_name($$)
{
my ($context, $name) = @_;
if ($name =~ /^\((\d+)\)\s*(.*)$/) {
my $index = $1;
my $realname = $2;
if ($realname eq "") {
$realname = $compressed{$context,$index};
}
else {
$compressed{$context,$index} = $realname;
}
return $realname;
}
return $name;
}
sub read_input_file()
{
open(INPUTFILE, "< $input_file") || die "File $input_file not opened\n";
my $line;
# Read header
while(<INPUTFILE>) {
# remove comments
s/#.*$//;
if (/^$/) { ; }
elsif (/^version:\s*(\d+)/) {
# Can't read format with major version > 1
($1<2) or die("Can't read format with major version $1.\n");
}
elsif (/^pid:\s+(.*)$/) { $pid = $1; }
elsif (/^thread:\s+(.*)$/) { $thread = $1; }
elsif (/^part:\s+(.*)$/) { $part = $1; }
elsif (/^desc:\s+(.*)$/) {
my $dline = $1;
# suppress profile options in description output
if ($dline =~ /^Option:/) {;}
else { $desc .= "$dline\n"; }
}
elsif (/^cmd:\s+(.*)$/) { $cmd = $1; }
elsif (/^creator:\s+(.*)$/) { $creator = $1; }
elsif (/^positions:\s+(.*)$/) {
my $positions = $1;
$has_line = ($positions =~ /line/);
$has_addr = ($positions =~ /(addr|instr)/);
}
elsif (/^events:\s+(.*)$/) {
$events = $1;
# events line is last in header
last;
}
else {
warn("WARNING: header line $. malformed, ignoring\n");
if ($verbose) { chomp; warn(" line: '$_'\n"); }
}
}
# Check for needed header entries
($cmd ne "") or die("Line $.: missing command line\n");
# Read "events:" line. We make a temporary hash in which the Nth event's
# value is N, which is useful for handling --show/--sort options below.
($events ne "") or die("Line $.: missing events line\n");
@events = split(/\s+/, $events);
my %events;
my $n = 0;
foreach my $event (@events) {
$events{$event} = $n;
$n++
}
# If no --show arg give, default to showing all events in the file.
# If --show option is used, check all specified events appeared in the
# "events:" line. Then initialise @show_order.
if (@show_events) {
foreach my $show_event (@show_events) {
(defined $events{$show_event}) or
die("--show event `$show_event' did not appear in input\n");
}
} else {
@show_events = @events;
}
foreach my $show_event (@show_events) {
push(@show_order, $events{$show_event});
}
# Do as for --show, but if no --sort arg given, default to sorting by
# column order (ie. first column event is primary sort key, 2nd column is
# 2ndary key, etc).
if (@sort_events) {
foreach my $sort_event (@sort_events) {
(defined $events{$sort_event}) or
die("--sort event `$sort_event' did not appear in input\n");
}
} else {
@sort_events = @events;
}
foreach my $sort_event (@sort_events) {
push(@sort_order, $events{$sort_event});
}
# If multiple threshold args weren't given via --sort, stick in the single
# threshold (either from --threshold if used, or the default otherwise) for
# the primary sort event, and 0% for the rest.
if (not @thresholds) {
foreach my $e (@sort_order) {
push(@thresholds, 0);
}
$thresholds[0] = $single_threshold;
}
my $curr_obj = "";
my $curr_file;
my $curr_fn;
my $curr_name;
my $curr_line_num = 0;
my $prev_line_num = 0;
my $curr_cobj = "";
my $curr_cfile = "";
my $curr_cfunc = "";
my $curr_cname;
my $curr_call_counter = 0;
my $curr_cfn_CC = [];
my $curr_fn_CC = [];
my $curr_file_ind_CCs = {}; # hash(line_num => CC)
# Read body of input file.
while (<INPUTFILE>) {
$prev_line_num = $curr_line_num;
s/#.*$//; # remove comments
s/^\+(\d+)/$prev_line_num+$1/e;
s/^\-(\d+)/$prev_line_num-$1/e;
s/^\*/$prev_line_num/e;
if (s/^(-?\d+|0x\w+)\s+//) {
$curr_line_num = $1;
if ($has_addr) {
if ($has_line) {
s/^\+(\d+)/$prev_line_num+$1/e;
s/^\-(\d+)/$prev_line_num-$1/e;
s/^\*/$prev_line_num/e;
if (s/^(\d+)\s+//) { $curr_line_num = $1; }
}
else { $curr_line_num = 0; }
}
my $CC = line_to_CC($_);
if ($curr_call_counter>0) {
# print "Read ($curr_name => $curr_cname) $curr_call_counter\n";
if (defined $call_CCs{$curr_name,$curr_cname}) {
add_array_a_to_b($CC, $call_CCs{$curr_name,$curr_cname});
$call_counter{$curr_name,$curr_cname} += $curr_call_counter;
}
else {
$call_CCs{$curr_name,$curr_cname} = $CC;
$call_counter{$curr_name,$curr_cname} = $curr_call_counter;
}
my $tmp = $called_from_line->{$curr_file,$curr_line_num};
if (!defined $tmp) {
$func_of_line{$curr_file,$curr_line_num} = $curr_name;
}
$tmp = {} unless defined $tmp;
$$tmp{$curr_cname} = 1;
$called_from_line->{$curr_file,$curr_line_num} = $tmp;
$call_CCs{$curr_name,$curr_cname,$curr_line_num} = $CC;
$call_counter{$curr_name,$curr_cname,$curr_line_num} = $curr_call_counter;
$curr_call_counter = 0;
# inclusive costs
$curr_cfn_CC = $cfn_totals{$curr_cname};
$curr_cfn_CC = [] unless (defined $curr_cfn_CC);
add_array_a_to_b($CC, $curr_cfn_CC);
$cfn_totals{$curr_cname} = $curr_cfn_CC;
if ($inclusive) {
add_array_a_to_b($CC, $curr_fn_CC);
}
next;
}
add_array_a_to_b($CC, $curr_fn_CC);
# If curr_file is selected, add CC to curr_file list. We look for
# full filename matches; or, if auto-annotating, we have to
# remember everything -- we won't know until the end what's needed.
if ($auto_annotate || defined $user_ann_files{$curr_file}) {
my $tmp = $curr_file_ind_CCs->{$curr_line_num};
$tmp = [] unless defined $tmp;
add_array_a_to_b($CC, $tmp);
$curr_file_ind_CCs->{$curr_line_num} = $tmp;
}
} elsif (s/^fn=(.*)$//) {
# Commit result from previous function
$fn_totals{$curr_name} = $curr_fn_CC if (defined $curr_name);
# Setup new one
$curr_fn = uncompressed_name("fn",$1);
$curr_name = "$curr_file:$curr_fn";
$obj_name{$curr_name} = $curr_obj;
$curr_fn_CC = $fn_totals{$curr_name};
$curr_fn_CC = [] unless (defined $curr_fn_CC);
} elsif (s/^ob=(.*)$//) {
$curr_obj = uncompressed_name("ob",$1);
} elsif (s/^fl=(.*)$//) {
$all_ind_CCs{$curr_file} = $curr_file_ind_CCs
if (defined $curr_file);
$curr_file = uncompressed_name("fl",$1);
$curr_file_ind_CCs = $all_ind_CCs{$curr_file};
$curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs);
} elsif (s/^(fi|fe)=(.*)$//) {
(defined $curr_name) or die("Line $.: Unexpected fi/fe line\n");
$fn_totals{$curr_name} = $curr_fn_CC;
$all_ind_CCs{$curr_file} = $curr_file_ind_CCs;
$curr_file = uncompressed_name("fl",$2);
$curr_name = "$curr_file:$curr_fn";
$curr_file_ind_CCs = $all_ind_CCs{$curr_file};
$curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs);
$curr_fn_CC = $fn_totals{$curr_name};
$curr_fn_CC = [] unless (defined $curr_fn_CC);
} elsif (s/^\s*$//) {
# blank, do nothing
} elsif (s/^cob=(.*)$//) {
$curr_cobj = uncompressed_name("ob",$1);
} elsif (s/^cfi=(.*)$//) {
$curr_cfile = uncompressed_name("fl",$1);
} elsif (s/^cfn=(.*)$//) {
$curr_cfunc = uncompressed_name("fn",$1);
if ($curr_cfile eq "") {
$curr_cname = "$curr_file:$curr_cfunc";
}
else {
$curr_cname = "$curr_cfile:$curr_cfunc";
$curr_cfile = "";
}
my $tmp = $calling_funcs->{$curr_cname};
$tmp = {} unless defined $tmp;
$$tmp{$curr_name} = 1;
$calling_funcs->{$curr_cname} = $tmp;
my $tmp2 = $called_funcs->{$curr_name};
$tmp2 = {} unless defined $tmp2;
$$tmp2{$curr_cname} = 1;
$called_funcs->{$curr_name} = $tmp2;
} elsif (s/^calls=(\d+)//) {
$curr_call_counter = $1;
} elsif (s/^(jump|jcnd)=//) {
#ignore jump information
} elsif (s/^jfi=(.*)$//) {
# side effect needed: possibly add compression mapping
uncompressed_name("fl",$1);
# ignore jump information
} elsif (s/^jfn=(.*)$//) {
# side effect needed: possibly add compression mapping
uncompressed_name("fn",$1);
# ignore jump information
} elsif (s/^totals:\s+//) {
#ignore
} elsif (s/^summary:\s+//) {
$summary_CC = line_to_CC($_);
} else {
warn("WARNING: line $. malformed, ignoring\n");
if ($verbose) { chomp; warn(" line: '$_'\n"); }
}
}
# Check if summary line was present
if (not defined $summary_CC) {
warn("WARNING: missing final summary line, no summary will be printed\n");
}
else {
# Finish up handling final filename/fn_name counts
$fn_totals{"$curr_file:$curr_fn"} = $curr_fn_CC
if (defined $curr_file && defined $curr_fn);
$all_ind_CCs{$curr_file} =
$curr_file_ind_CCs if (defined $curr_file);
(scalar(@$summary_CC) == @events)
or die("Line $.: summary event and total event mismatch\n");
}
# Correct inclusive totals
if ($inclusive) {
foreach my $name (keys %cfn_totals) {
$fn_totals{$name} = $cfn_totals{$name};
}
}
close(INPUTFILE);
}
#-----------------------------------------------------------------------------
# Print options used
#-----------------------------------------------------------------------------
sub print_options ()
{
print($fancy);
print "Profile data file '$input_file'";
if ($creator ne "") { print " (creator: $creator)"; }
print "\n";
print($fancy);
print($desc);
my $target = $cmd;
if ($pid ne "") {
$target .= " (PID $pid";
if ($part ne "") { $target .= ", part $part"; }
if ($thread ne "") { $target .= ", thread $thread"; }
$target .= ")";
}
print("Profiled target: $target\n");
print("Events recorded: @events\n");
print("Events shown: @show_events\n");
print("Event sort order: @sort_events\n");
print("Thresholds: @thresholds\n");
my @include_dirs2 = @include_dirs; # copy @include_dirs
shift(@include_dirs2); # remove "" entry, which is always the first
unshift(@include_dirs2, "") if (0 == @include_dirs2);
my $include_dir = shift(@include_dirs2);
print("Include dirs: $include_dir\n");
foreach my $include_dir (@include_dirs2) {
print(" $include_dir\n");
}
my @user_ann_files = keys %user_ann_files;
unshift(@user_ann_files, "") if (0 == @user_ann_files);
my $user_ann_file = shift(@user_ann_files);
print("User annotated: $user_ann_file\n");
foreach $user_ann_file (@user_ann_files) {
print(" $user_ann_file\n");
}
my $is_on = ($auto_annotate ? "on" : "off");
print("Auto-annotation: $is_on\n");
print("\n");
}
#-----------------------------------------------------------------------------
# Print summary and sorted function totals
#-----------------------------------------------------------------------------
sub mycmp ($$)
{
my ($c, $d) = @_;
# Iterate through sort events (eg. 3,2); return result if two are different
foreach my $i (@sort_order) {
my ($x, $y);
$x = $c->[$i];
$y = $d->[$i];
$x = -1 unless defined $x;
$y = -1 unless defined $y;
my $cmp = $y <=> $x; # reverse sort
if (0 != $cmp) {
return $cmp;
}
}
# Exhausted events, equal
return 0;
}
sub commify ($) {
my ($val) = @_;
1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/);
return $val;
}
# Because the counts can get very big, and we don't want to waste screen space
# and make lines too long, we compute exactly how wide each column needs to be
# by finding the widest entry for each one.
sub compute_CC_col_widths (@)
{
my @CCs = @_;
my $CC_col_widths = [];
# Initialise with minimum widths (from event names)
foreach my $event (@events) {
push(@$CC_col_widths, length($event));
}
# Find maximum width count for each column. @CC_col_width positions
# correspond to @CC positions.
foreach my $CC (@CCs) {
foreach my $i (0 .. scalar(@$CC)-1) {
if (defined $CC->[$i]) {
# Find length, accounting for commas that will be added
my $length = length $CC->[$i];
my $clength = $length + int(($length - 1) / 3);
$CC_col_widths->[$i] = max($CC_col_widths->[$i], $clength);
}
}
}
return $CC_col_widths;
}
# Print the CC with each column's size dictated by $CC_col_widths.
sub print_CC ($$)
{
my ($CC, $CC_col_widths) = @_;
foreach my $i (@show_order) {
my $count = (defined $CC->[$i] ? commify($CC->[$i]) : ".");
my $space = ' ' x ($CC_col_widths->[$i] - length($count));
print("$space$count ");
}
}
sub print_events ($)
{
my ($CC_col_widths) = @_;
foreach my $i (@show_order) {
my $event = $events[$i];
my $event_width = length($event);
my $col_width = $CC_col_widths->[$i];
my $space = ' ' x ($col_width - $event_width);
print("$space$event ");
}
}
# Prints summary and function totals (with separate column widths, so that
# function names aren't pushed over unnecessarily by huge summary figures).
# Also returns a hash containing all the files that are involved in getting the
# events count above the thresholds (ie. all the interesting ones).
sub print_summary_and_fn_totals ()
{
my @fn_fullnames = keys %fn_totals;
# Work out the size of each column for printing (summary and functions
# separately).
my $summary_CC_col_widths = compute_CC_col_widths($summary_CC);
my $fn_CC_col_widths = compute_CC_col_widths(values %fn_totals);
# Header and counts for summary
print($fancy);
print_events($summary_CC_col_widths);
print("\n");
print($fancy);
print_CC($summary_CC, $summary_CC_col_widths);
print(" PROGRAM TOTALS\n");
print("\n");
# Header for functions
print($fancy);
print_events($fn_CC_col_widths);
print(" file:function\n");
print($fancy);
# Sort function names into order dictated by --sort option.
@fn_fullnames = sort {
mycmp($fn_totals{$a}, $fn_totals{$b})
} @fn_fullnames;
# Assertion
(scalar @sort_order == scalar @thresholds) or
die("sort_order length != thresholds length:\n",
" @sort_order\n @thresholds\n");
my $threshold_files = {};
# @curr_totals has the same shape as @sort_order and @thresholds
my @curr_totals = ();
foreach my $e (@thresholds) {
push(@curr_totals, 0);
}
# Print functions, stopping when the threshold has been reached.
foreach my $fn_name (@fn_fullnames) {
# Stop when we've reached all the thresholds
my $reached_all_thresholds = 1;
foreach my $i (0 .. scalar @thresholds - 1) {
my $prop = $curr_totals[$i] * 100;
if ($summary_CC->[$sort_order[$i]] >0) {
$prop = $prop / $summary_CC->[$sort_order[$i]];
}
$reached_all_thresholds &= ($prop >= $thresholds[$i]);
}
last if $reached_all_thresholds;
if ($tree_caller || $tree_calling) { print "\n"; }
if ($tree_caller && ($fn_name ne "???:???")) {
# Print function callers
my $tmp1 = $calling_funcs->{$fn_name};
if (defined $tmp1) {
foreach my $calling (keys %$tmp1) {
if (defined $call_counter{$calling,$fn_name}) {
print_CC($call_CCs{$calling,$fn_name}, $fn_CC_col_widths);
print" < $calling (";
print $call_counter{$calling,$fn_name} . "x)";
if (defined $obj_name{$calling}) {
print " [$obj_name{$calling}]";
}
print "\n";
}
}
}
}
# Print function results
my $fn_CC = $fn_totals{$fn_name};
print_CC($fn_CC, $fn_CC_col_widths);
if ($tree_caller || $tree_calling) { print " * "; }
print(" $fn_name");
if (defined $obj_name{$fn_name}) {
print " [$obj_name{$fn_name}]";
}
print "\n";
if ($tree_calling && ($fn_name ne "???:???")) {
# Print called functions
my $tmp2 = $called_funcs->{$fn_name};
if (defined $tmp2) {
foreach my $called (keys %$tmp2) {
if (defined $call_counter{$fn_name,$called}) {
print_CC($call_CCs{$fn_name,$called}, $fn_CC_col_widths);
print" > $called (";
print $call_counter{$fn_name,$called} . "x)";
if (defined $obj_name{$called}) {
print " [$obj_name{$called}]";
}
print "\n";
}
}
}
}
# Update the threshold counts
my $filename = $fn_name;
$filename =~ s/:.+$//; # remove function name
$threshold_files->{$filename} = 1;
foreach my $i (0 .. scalar @sort_order - 1) {
if ($inclusive) {
$curr_totals[$i] = $summary_CC->[$sort_order[$i]] -
$fn_CC->[$sort_order[$i]]
if (defined $fn_CC->[$sort_order[$i]]);
} else {
$curr_totals[$i] += $fn_CC->[$sort_order[$i]]
if (defined $fn_CC->[$sort_order[$i]]);
}
}
}
print("\n");
return $threshold_files;
}
#-----------------------------------------------------------------------------
# Annotate selected files
#-----------------------------------------------------------------------------
# Issue a warning that the source file is more recent than the input file.
sub warning_on_src_more_recent_than_inputfile ($)
{
my $src_file = $_[0];
my $warning = <<END
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@ Source file '$src_file' is more recent than input file '$input_file'.
@ Annotations may not be correct.
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
END
;
print($warning);
}
# If there is information about lines not in the file, issue a warning
# explaining possible causes.
sub warning_on_nonexistent_lines ($$$)
{
my ($src_more_recent_than_inputfile, $src_file, $excess_line_nums) = @_;
my $cause_and_solution;
if ($src_more_recent_than_inputfile) {
$cause_and_solution = <<END
@@ cause: '$src_file' has changed since information was gathered.
@@ If so, a warning will have already been issued about this.
@@ solution: Recompile program and rerun under "valgrind --cachesim=yes" to
@@ gather new information.
END
# We suppress warnings about .h files
} elsif ($src_file =~ /\.h$/) {
$cause_and_solution = <<END
@@ cause: bug in the Valgrind's debug info reader that screws up with .h
@@ files sometimes
@@ solution: none, sorry
END
} else {
$cause_and_solution = <<END
@@ cause: not sure, sorry
END
}
my $warning = <<END
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@
@@ Information recorded about lines past the end of '$src_file'.
@@
@@ Probable cause and solution:
$cause_and_solution@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
END
;
print($warning);
}
sub annotate_ann_files($)
{
my ($threshold_files) = @_;
my %all_ann_files;
my @unfound_auto_annotate_files;
my $printed_totals_CC = [];
# If auto-annotating, add interesting files (but not "???")
if ($auto_annotate) {
delete $threshold_files->{"???"};
%all_ann_files = (%user_ann_files, %$threshold_files)
} else {
%all_ann_files = %user_ann_files;
}
# Track if we did any annotations.
my $did_annotations = 0;
LOOP:
foreach my $src_file (keys %all_ann_files) {
my $opened_file = "";
my $full_file_name = "";
foreach my $include_dir (@include_dirs) {
my $try_name = $include_dir . $src_file;
if (open(INPUTFILE, "< $try_name")) {
$opened_file = $try_name;
$full_file_name = ($include_dir eq ""
? $src_file
: "$include_dir + $src_file");
last;
}
}
if (not $opened_file) {
# Failed to open the file. If chosen on the command line, die.
# If arose from auto-annotation, print a little message.
if (defined $user_ann_files{$src_file}) {
die("File $src_file not opened in any of: @include_dirs\n");
} else {
push(@unfound_auto_annotate_files, $src_file);
}
} else {
# File header (distinguish between user- and auto-selected files).
print("$fancy");
my $ann_type =
(defined $user_ann_files{$src_file} ? "User" : "Auto");
print("-- $ann_type-annotated source: $full_file_name\n");
print("$fancy");
# Get file's CCs
my $src_file_CCs = $all_ind_CCs{$src_file};
if (!defined $src_file_CCs) {
print(" No information has been collected for $src_file\n\n");
next LOOP;
}
$did_annotations = 1;
# Numeric, not lexicographic sort!
my @line_nums = sort {$a <=> $b} keys %$src_file_CCs;
# If $src_file more recent than cachegrind.out, issue warning
my $src_more_recent_than_inputfile = 0;
if ((stat $opened_file)[9] > (stat $input_file)[9]) {
$src_more_recent_than_inputfile = 1;
warning_on_src_more_recent_than_inputfile($src_file);
}
# Work out the size of each column for printing
my $CC_col_widths = compute_CC_col_widths(values %$src_file_CCs);
# Events header
print_events($CC_col_widths);
print("\n\n");
# Shift out 0 if it's in the line numbers (from unknown entries,
# likely due to bugs in Valgrind's stabs debug info reader)
shift(@line_nums) if (0 == $line_nums[0]);
# Finds interesting line ranges -- all lines with a CC, and all
# lines within $context lines of a line with a CC.
my $n = @line_nums;
my @pairs;
for (my $i = 0; $i < $n; $i++) {
push(@pairs, $line_nums[$i] - $context); # lower marker
while ($i < $n-1 &&
$line_nums[$i] + 2*$context >= $line_nums[$i+1]) {
$i++;
}
push(@pairs, $line_nums[$i] + $context); # upper marker
}
# Annotate chosen lines, tracking total counts of lines printed
$pairs[0] = 1 if ($pairs[0] < 1);
while (@pairs) {
my $low = shift @pairs;
my $high = shift @pairs;
while ($. < $low-1) {
my $tmp = <INPUTFILE>;
last unless (defined $tmp); # hack to detect EOF
}
my $src_line;
# Print line number, unless start of file
print("-- line $low " . '-' x 40 . "\n") if ($low != 1);
while (($. < $high) && ($src_line = <INPUTFILE>)) {
if (defined $line_nums[0] && $. == $line_nums[0]) {
print_CC($src_file_CCs->{$.}, $CC_col_widths);
add_array_a_to_b($src_file_CCs->{$.},
$printed_totals_CC);
shift(@line_nums);
} else {
print_CC( [], $CC_col_widths);
}
print(" $src_line");
my $tmp = $called_from_line->{$src_file,$.};
my $func = $func_of_line{$src_file,$.};
if (defined $tmp) {
foreach my $called (keys %$tmp) {
if (defined $call_CCs{$func,$called,$.}) {
print_CC($call_CCs{$func,$called,$.}, $CC_col_widths);
print " => $called (";
print $call_counter{$func,$called,$.} . "x)\n";
}
}
}
}
# Print line number, unless EOF
if ($src_line) {
print("-- line $high " . '-' x 40 . "\n");
} else {
last;
}
}
# If there was info on lines past the end of the file...
if (@line_nums) {
foreach my $line_num (@line_nums) {
print_CC($src_file_CCs->{$line_num}, $CC_col_widths);
print(" <bogus line $line_num>\n");
}
print("\n");
warning_on_nonexistent_lines($src_more_recent_than_inputfile,
$src_file, \@line_nums);
}
print("\n");
# Print summary of counts attributed to file but not to any
# particular line (due to incomplete debug info).
if ($src_file_CCs->{0}) {
print_CC($src_file_CCs->{0}, $CC_col_widths);
print(" <counts for unidentified lines in $src_file>\n\n");
}
close(INPUTFILE);
}
}
# Print list of unfound auto-annotate selected files.
if (@unfound_auto_annotate_files) {
print("$fancy");
print("The following files chosen for auto-annotation could not be found:\n");
print($fancy);
foreach my $f (@unfound_auto_annotate_files) {
print(" $f\n");
}
print("\n");
}
# If we did any annotating, print what proportion of events were covered by
# annotated lines above.
if ($did_annotations) {
my $percent_printed_CC;
foreach (my $i = 0; $i < @$summary_CC; $i++) {
$percent_printed_CC->[$i] =
sprintf("%.0f",
$printed_totals_CC->[$i] / $summary_CC->[$i] * 100);
}
my $pp_CC_col_widths = compute_CC_col_widths($percent_printed_CC);
print($fancy);
print_events($pp_CC_col_widths);
print("\n");
print($fancy);
print_CC($percent_printed_CC, $pp_CC_col_widths);
print(" percentage of events annotated\n\n");
}
}
#----------------------------------------------------------------------------
# "main()"
#----------------------------------------------------------------------------
process_cmd_line();
read_input_file();
print_options();
my $threshold_files = print_summary_and_fn_totals();
annotate_ann_files($threshold_files);
##--------------------------------------------------------------------##
##--- end vg_annotate.in ---##
##--------------------------------------------------------------------##
/*
* heat.h
*
* Iterative solver for heat distribution
*/
#include <stdio.h>
#include <stdlib.h>
#include "input.h"
#include "heat.h"
#include "timing.h"
void usage( char *s )
{
fprintf(stderr,
"Usage: %s <input file> [result file]\n\n", s);
}
int main( int argc, char *argv[] )
{
unsigned iter;
FILE *infile, *resfile;
char *resfilename;
double *tmp;
// algorithmic parameters
algoparam_t param;
int np;
double runtime, flop;
double residual;
// check arguments
if( argc < 2 )
{
usage( argv[0] );
return 1;
}
// check input file
if( !(infile=fopen(argv[1], "r")) )
{
fprintf(stderr,
"\nError: Cannot open \"%s\" for reading.\n\n", argv[1]);
usage(argv[0]);
return 1;
}
// check result file
resfilename= (argc>=3) ? argv[2]:"heat.ppm";
if( !(resfile=fopen(resfilename, "w")) )
{
fprintf(stderr,
"\nError: Cannot open \"%s\" for writing.\n\n",
resfilename);
usage(argv[0]);
return 1;
}
// check input
if( !read_input(infile, &param) )
{
fprintf(stderr, "\nError: Error parsing input file.\n\n");
usage(argv[0]);
return 1;
}
print_params(&param);
// set the visualization resolution
param.visres = param.max_res;
param.u = 0;
param.uhelp = 0;
param.uvis = 0;
param.act_res = param.initial_res;
// loop over different resolutions
while(1) {
// free allocated memory of previous experiment
if (param.u != 0)
finalize(&param);
if( !initialize(&param) )
{
fprintf(stderr, "Error in Jacobi initialization.\n\n");
usage(argv[0]);
}
fprintf(stderr, "Resolution: %5u\r", param.act_res);
// full size (param.act_res are only the inner points)
np = param.act_res + 2;
// starting time
runtime = wtime();
residual = 999999999;
iter = 0;
while(1) {
switch( param.algorithm ) {
case 0: // JACOBI
residual = relax_jacobi_return_residual(param.u, param.uhelp, np, np);
tmp = param.u;
param.u = param.uhelp;
param.uhelp = tmp;
break;
case 1: // GAUSS
relax_gauss(param.u, np, np);
residual = residual_gauss( param.u, param.uhelp, np, np);
break;
}
iter++;
// solution good enough ?
if (residual < 0.000005) break;
// max. iteration reached ? (no limit with maxiter=0)
if (param.maxiter>0 && iter>=param.maxiter) break;
if (iter % 100 == 0)
fprintf(stderr, "residual %f, %d iterations\n", residual, iter);
}
// Flop count after <i> iterations
flop = iter * 11.0 * param.act_res * param.act_res;
// stopping time
runtime = wtime() - runtime;
fprintf(stderr, "Resolution: %5u, ", param.act_res);
fprintf(stderr, "Time: %04.3f ", runtime);
fprintf(stderr, "(%3.3f GFlop => %6.2f MFlop/s, ",
flop/1000000000.0,
flop/runtime/1000000);
fprintf(stderr, "residual %f, %d iterations)\n", residual, iter);
// for plot...
printf("%5d %f\n", param.act_res, flop/runtime/1000000);
if (param.act_res + param.res_step_size > param.max_res) break;
param.act_res += param.res_step_size;
}
coarsen( param.u, np, np,
param.uvis, param.visres+2, param.visres+2 );
write_image( resfile, param.uvis,
param.visres+2,
param.visres+2 );
finalize( &param );
return 0;
}
/*
* heat.h
*
* Global definitions for the iterative solver
*/
#ifndef JACOBI_H_INCLUDED
#define JACOBI_H_INCLUDED
#include <stdio.h>
// configuration
typedef struct
{
float posx;
float posy;
float range;
float temp;
}
heatsrc_t;
typedef struct
{
unsigned maxiter; // maximum number of iterations
unsigned act_res;
unsigned max_res; // spatial resolution
unsigned initial_res;
unsigned res_step_size;
int algorithm; // 0=>Jacobi, 1=>Gauss
unsigned visres; // visualization resolution
double *u, *uhelp;
double *uvis;
unsigned numsrcs; // number of heat sources
heatsrc_t *heatsrcs;
}
algoparam_t;
// function declarations
// misc.c
int initialize( algoparam_t *param );
int finalize( algoparam_t *param );
void write_image( FILE * f, double *u,
unsigned sizex, unsigned sizey );
int coarsen(double *uold, unsigned oldx, unsigned oldy ,
double *unew, unsigned newx, unsigned newy );
// Gauss-Seidel: relax_gauss.c
double residual_gauss( double *u, double *utmp,
unsigned sizex, unsigned sizey );
void relax_gauss( double *u,
unsigned sizex, unsigned sizey );
// Jacobi: relax_jacobi.c
#if 0
double residual_jacobi( double *u,
unsigned sizex, unsigned sizey );
#endif
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey );
#endif // JACOBI_H_INCLUDED
//
// input.c
//
#include <stdio.h>
#include <stdlib.h>
#include "input.h"
#define BUFSIZE 100
int read_input( FILE *infile, algoparam_t *param )
{
int i, n;
char buf[BUFSIZE];
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->maxiter) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->initial_res) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->max_res) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%u", &(param->res_step_size) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf(buf, "%d", &(param->algorithm) );
if( n!=1 )
return 0;
fgets(buf, BUFSIZE, infile);
n = sscanf(buf, "%u", &(param->numsrcs) );
if( n!=1 )
return 0;
(param->heatsrcs) =
(heatsrc_t*) malloc( sizeof(heatsrc_t) * (param->numsrcs) );
for( i=0; i<param->numsrcs; i++ )
{
fgets(buf, BUFSIZE, infile);
n = sscanf( buf, "%f %f %f %f",
&(param->heatsrcs[i].posx),
&(param->heatsrcs[i].posy),
&(param->heatsrcs[i].range),
&(param->heatsrcs[i].temp) );
if( n!=4 )
return 0;
}
return 1;
}
void print_params( algoparam_t *param )
{
int i;
fprintf(stderr, "Resolutions : (%u, %u, ... %u)\n",
param->initial_res,
param->initial_res + param->res_step_size,
param->max_res);
fprintf(stderr, "Iterations : %u\n", param->maxiter);
fprintf(stderr, "Algorithm : %d (%s)\n",
param->algorithm,
(param->algorithm == 0) ? "Jacobi":"Gauss-Jacobi" );
fprintf(stderr, "Num. Heat sources : %u\n", param->numsrcs);
for( i=0; i<param->numsrcs; i++ )
{
fprintf(stderr, " %2d: (%2.2f, %2.2f) %2.2f %2.2f \n",
i+1,
param->heatsrcs[i].posx,
param->heatsrcs[i].posy,
param->heatsrcs[i].range,
param->heatsrcs[i].temp );
}
}
//
// input.h
//
#ifndef INPUT_H_INCLUDED
#define INPUT_H_INCLUDED
#include <stdio.h>
#include "heat.h"
int read_input( FILE *infile, algoparam_t *param );
void print_params( algoparam_t *param );
#endif // INPUT_H_INCLUDED
/*
* misc.c
*
* Helper functions for
* - initialization
* - finalization,
* - writing out a picture
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include "heat.h"
/*
* Initialize the iterative solver
* - allocate memory for matrices
* - set boundary conditions according to configuration
*/
int initialize( algoparam_t *param )
{
int i, j;
double dist;
// total number of points (including border)
const int np = param->act_res + 2;
//
// allocate memory
//
(param->u) = (double*)calloc( sizeof(double),np*np );
(param->uhelp) = (double*)calloc( sizeof(double),np*np );
(param->uvis) = (double*)calloc( sizeof(double),
(param->visres+2) *
(param->visres+2) );
if( !(param->u) || !(param->uhelp) || !(param->uvis) )
{
fprintf(stderr, "Error: Cannot allocate memory\n");
return 0;
}
for( i=0; i<param->numsrcs; i++ )
{
/* top row */
for( j=0; j<np; j++ )
{
dist = sqrt( pow((double)j/(double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[j] +=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[j] = (param->u)[j];
}
}
/* bottom row */
for( j=0; j<np; j++ )
{
dist = sqrt( pow((double)j/(double)(np-1) -
param->heatsrcs[i].posx, 2)+
pow(1-param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[(np-1)*np+j]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[(np-1)*np+j] = (param->u)[(np-1)*np+j];
}
}
/* leftmost column */
for( j=1; j<np-1; j++ )
{
dist = sqrt( pow(param->heatsrcs[i].posx, 2)+
pow((double)j/(double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*np ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*np ] = (param->u)[ j*np ];
}
}
/* rightmost column */
for( j=1; j<np-1; j++ )
{
dist = sqrt( pow(1-param->heatsrcs[i].posx, 2)+
pow((double)j/(double)(np-1) -
param->heatsrcs[i].posy, 2));
if( dist <= param->heatsrcs[i].range )
{
(param->u)[ j*np+(np-1) ]+=
(param->heatsrcs[i].range-dist) /
param->heatsrcs[i].range *
param->heatsrcs[i].temp;
(param->uhelp)[ j*np+(np-1) ] = (param->u)[ j*np+(np-1) ];
}
}
}
return 1;
}
/*
* free used memory
*/
int finalize( algoparam_t *param )
{
if( param->u ) {
free(param->u);
param->u = 0;
}
if( param->uhelp ) {
free(param->uhelp);
param->uhelp = 0;
}
if( param->uvis ) {
free(param->uvis);
param->uvis = 0;
}
return 1;
}
/*
* write the given temperature u matrix to rgb values
* and write the resulting image to file f
*/
void write_image( FILE * f, double *u,
unsigned sizex, unsigned sizey )
{
// RGB table
unsigned char r[1024], g[1024], b[1024];
int i, j, k;
double min, max;
j=1023;
// prepare RGB table
for( i=0; i<256; i++ )
{
r[j]=255; g[j]=i; b[j]=0;
j--;
}
for( i=0; i<256; i++ )
{
r[j]=255-i; g[j]=255; b[j]=0;
j--;
}
for( i=0; i<256; i++ )
{
r[j]=0; g[j]=255; b[j]=i;
j--;
}
for( i=0; i<256; i++ )
{
r[j]=0; g[j]=255-i; b[j]=255;
j--;
}
min=DBL_MAX;
max=-DBL_MAX;
// find minimum and maximum
for( i=0; i<sizey; i++ )
{
for( j=0; j<sizex; j++ )
{
if( u[i*sizex+j]>max )
max=u[i*sizex+j];
if( u[i*sizex+j]<min )
min=u[i*sizex+j];
}
}
fprintf(f, "P3\n");
fprintf(f, "%u %u\n", sizex, sizey);
fprintf(f, "%u\n", 255);
for( i=0; i<sizey; i++ )
{
for( j=0; j<sizex; j++ )
{
k=(int)(1024.0*(u[i*sizex+j]-min)/(max-min));
fprintf(f, "%d %d %d ", r[k], g[k], b[k]);
}
fprintf(f, "\n");
}
}
int coarsen( double *uold, unsigned oldx, unsigned oldy ,
double *unew, unsigned newx, unsigned newy )
{
int i, j;
int stepx;
int stepy;
int stopx = newx;
int stopy = newy;
if (oldx>newx)
stepx=oldx/newx;
else {
stepx=1;
stopx=oldx;
}
if (oldy>newy)
stepy=oldy/newy;
else {
stepy=1;
stopy=oldy;
}
// NOTE: this only takes the top-left corner,
// and doesnt' do any real coarsening
for( i=0; i<stopy-1; i++ )
{
for( j=0; j<stopx-1; j++ )
{
unew[i*newx+j]=uold[i*oldx*stepy+j*stepx];
}
}
return 1;
}
#!/usr/bin/perl
#
# pfmon2cg, version 0.02
# Converter for pfmon output to Cachegrind profile format
#
# 4/2007, Josef Weidendorfer
#
#
# Read in pfmon data, e.g. written from the command
#
# pfmon --with-header --smpl-outfile=pfmon.out \
# -e l2_misses --short-smpl-periods=100000 -- <command>
# (sampling points every 100000 L2 Misses)
#
# This gives a list of sampled instruction addresses ($ipps)
while(<>) {
if (/^#/) {
if (/command line\s*:\s*(.*)$/) { $command = $1; }
if (/PMD\d+: (.*?)\s*(,|=)/) { $events .= "$1 "; }
next;
}
if (/^entry/) {
($iip) = ($entry =~ /IIP:(\S+)/);
if ($iip ne "") { $iips .= "$iip\n"; }
$entry = "";
}
chomp;
$entry .= $_;
}
($iip) = ($entry =~ /IIP:(\S+)/);
if ($iip ne "") { $iips .= "$iip\n"; }
($pid) = ($entry =~ /PID:(\S+)/);
open OUT, ">.p2cg";
print OUT $iips;
close OUT;
# Let addr2line convert the instruction addresses into
# source code references (needs debug info)
#
# This creates hashes with event sums
# Strip command line arguments
$onlycommand = $command;
$onlycommand =~ s/\s.*//;
$cmd = "addr2line -f -e $onlycommand < .p2cg";
#print "#Run Cmd $cmd\n";
open ATOL, "$cmd < .p2cg|";
while(<ATOL>) {
chomp;
$func = $_;
$fileline = <ATOL>;
chomp $fileline;
($file,$line) = ($fileline =~ /(.*?):(.*)/);
#print "Func $func, file $file, line $line\n";
if ($file{$func} eq "") {
push @funcs, $func;
$file{$func} = $file;
}
if ($count{$func,$line} eq "") {
$tmp = $lines->{$func};
$tmp = [] unless defined $tmp;
push @$tmp, $line;
$lines->{$func} = $tmp;
}
$count{$func,$line}++;
$totals++;
}
close ATOL;
#
# Dump out Cachegrind format
#
# First header info...
print "version: 1\n";
print "creator: pf2mon 0.02\n";
print "pid: $pid\n";
print "cmd: $command\n\n";
print "positions: line\n";
print "events: $events\n";
print "summary: $totals\n\n";
# Now profiling data.
#
# We only support static linked binaries for now, so all code
# is in the binary
print "ob=$onlycommand\n";
foreach $f (@funcs) {
print "\nfl=$file{$f}\n";
print "fn=$f\n";
$tmp = $lines->{$f};
foreach $l (@$tmp) {
print "$l $count{$f,$l}\n";
}
}
/*
* relax_gauss.c
*
* Gauss-Seidel Relaxation
*
*/
#include "heat.h"
/*
* Residual (length of error vector)
* between current solution and next after a Gauss-Seidel step
*
* Temporary array utmp needed to not change current solution
*
* Flop count in inner body is 7
*/
double residual_gauss( double *u, double *utmp,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
double unew, diff, sum=0.0;
// first row (boundary condition) into utmp
for( j=1; j<sizex-1; j++ )
utmp[0*sizex + j] = u[0*sizex + j];
// first column (boundary condition) into utmp
for( i=1; i<sizey-1; i++ )
utmp[i*sizex + 0] = u[i*sizex + 0];
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
unew = 0.25 * (utmp[ i*sizex + (j-1) ]+ // new left
u [ i*sizex + (j+1) ]+ // right
utmp[ (i-1)*sizex + j ]+ // new top
u [ (i+1)*sizex + j ]); // bottom
diff = unew - u[i*sizex + j];
sum += diff * diff;
utmp[i*sizex + j] = unew;
}
}
return sum;
}
/*
* One Gauss-Seidel iteration step
*
* Flop count in inner body is 4
*/
void relax_gauss( double *u,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
u[i*sizex+j]= 0.25 * (u[ i*sizex + (j-1) ]+
u[ i*sizex + (j+1) ]+
u[ (i-1)*sizex + j ]+
u[ (i+1)*sizex + j ]);
}
}
}
/*
* relax_jacobi.c
*
* Jacobi Relaxation
*
*/
#include "heat.h"
#if 0
/*
* Residual (length of error vector)
* between current solution and next after a Jacobi step
*/
double residual_jacobi( double *u,
unsigned sizex, unsigned sizey )
{
unsigned i, j;
double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
unew = 0.25 * (u[ i*sizex + (j-1) ]+ // left
u[ i*sizex + (j+1) ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
diff = unew - u[i*sizex + j];
sum += diff * diff;
}
}
return sum;
}
#endif
/*
* One Jacobi iteration step
*/
double relax_jacobi_return_residual( double *u, double *utmp,
unsigned sizex, unsigned sizey )
{
int i, j;
double unew, diff, sum=0.0;
for( i=1; i<sizey-1; i++ )
{
for( j=1; j<sizex-1; j++ )
{
{
utmp[i*sizex + j]= 0.25 * (u[ i*sizex+j -1 ]+ // left
u[ i*sizex+j +1 ]+ // right
u[ (i-1)*sizex + j ]+ // top
u[ (i+1)*sizex + j ]); // bottom
diff = utmp[i*sizex + j] - u[i*sizex +j];
sum += diff * diff;
}
}
}
return sum;
}
50 # iterations
100 # initial resolution
2900 # max resolution (spatial resolution)
200 # resolution step size
0 # Algorithm 0=Jacobi 1=Gauss
2 # number of heat sources
0.0 0.0 1.0 1.0 # (x,y), size temperature
1.0 1.0 1.0 0.5
0 # iterations
100 # initial resolution
100 # max resolution (spatial resolution)
200 # resolution step size
1 # Algorithm 0=Jacobi 1=Gauss
2 # number of heat sources
0.0 0.0 1.0 1.0 # (x,y), size temperature
1.0 1.0 1.0 0.5
//
// timing.c
//
#include <sys/time.h>
#include "timing.h"
double wtime()
{
struct timeval tv;
gettimeofday(&tv, 0);
return tv.tv_sec+1e-6*tv.tv_usec;
}
//
// timing.h
//
#ifndef TIMING_H_INCLUDED
#define TIMING_H_INCLUDED
double wtime();
#endif // TIMING_H_IINCLUDED
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment