Commit 0a5d21e5 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Power Estimator code pushed. Many changes needed. Untested.

Modified Cache Simulator, filled with bugs!
Signed-off-by: Gaurav Kukreja's avatarGaurav Kukreja <gaurav@gauravk.in>
parent 28213702
......@@ -11,3 +11,4 @@ examples/crc32/app_dir/in_large.h
*.gdbo
*.gdbx
*.gch
in_large.h
......@@ -9,15 +9,20 @@
#ifndef CACHE_SIM_H
#define CACHE_SIM_H
#include "cacheSimResult.h"
/**
* Simulates Instruction Cache access by benchmark
*
* @param address Starting address of instructions in the basic block
* @param nBytes Number of bytes of instructions accessed in the basic block
* @param
*
* @return number of clock cycles spent
*/
extern unsigned long long simICache(unsigned long address, unsigned int nBytes);
extern unsigned long long simICache(unsigned long address,
unsigned int nBytes,
struct csim_result_t *result);
/**
* Simulates Data Cache access by benchmark
......@@ -27,17 +32,19 @@ extern unsigned long long simICache(unsigned long address, unsigned int nBytes);
*
* @return number of clock cycles spent
*/
extern unsigned long long simDCache(unsigned long address, unsigned int isReadAccess);
extern unsigned long long simDCache(unsigned long address,
unsigned int isReadAccess,
struct csim_result_t *result);
/**
* Initialize the cache data structures
*/
extern void cacheSimInit();
extern void cacheSimInit(struct csim_result_t *result);
/**
* Frees data structures and cleans up
*
*/
extern void cacheSimFini();
extern void cacheSimFini(struct csim_result_t *result);
#endif // CACHE_SIM_H
#ifndef CACHE_SIM_RESULT_H
#define CACHE_SIM_RESULT_H
struct csim_result_t
{
unsigned long long cyclesConsumed;
unsigned long long L1Hits;
unsigned long long L2Hits;
unsigned long long L2Misses;
unsigned long long prefetches;
};
#endif // CACHE_SIM_RESULT_H
......@@ -5,6 +5,8 @@ CFLAGS = -O3
HEADERS = $(CACHESIM_HEADERS)
CFLAGS += -I$(HEADERS)
# Cache Simulation Source Code
SOURCES = cacheSim.c cacheSimStat.c cacheSimStat.h cacheSimHwMod.h $(HEADERS)/cacheSim.h
OBJECTS = cacheSim.o cacheSimStat.o
......
......@@ -47,14 +47,16 @@
*
* @return number of clock cycles spent
*/
unsigned long long simICache(unsigned long address, unsigned int nBytes)
unsigned long long simICache(unsigned long address,
unsigned int nBytes,
struct csim_result_t *result)
{
unsigned long long nCycles;
cacheSimStat.access_type = ACCESS_TYPE_INVALID;
cacheSimStat.nCycles = 0;
cacheSimStat.powerMicroJ = 0;
nCycles = hwMod.simICache(address, nBytes);
nCycles = hwMod.simICache(address, nBytes, result);
#ifdef COLLECT_STAT
cacheSimStatCollect();
......@@ -70,14 +72,16 @@ unsigned long long simICache(unsigned long address, unsigned int nBytes)
*
* @return number of clock cycles spent
*/
unsigned long long simDCache(unsigned long address, unsigned int isReadAccess)
unsigned long long simDCache(unsigned long address,
unsigned int isReadAccess,
struct csim_result_t *result)
{
unsigned long long nCycles;
cacheSimStat.access_type = ACCESS_TYPE_INVALID;
cacheSimStat.nCycles = 0;
cacheSimStat.powerMicroJ = 0;
nCycles = hwMod.simDCache(address, isReadAccess);
nCycles = hwMod.simDCache(address, isReadAccess, result);
#ifdef COLLECT_STAT
cacheSimStatCollect();
......@@ -89,17 +93,17 @@ unsigned long long simDCache(unsigned long address, unsigned int isReadAccess)
/**
* Initialize the cache data structures
*/
void cacheSimInit()
void cacheSimInit(struct csim_result_t *result)
{
hwMod.cacheSimInit();
hwMod.cacheSimInit(result);
}
/**
* Frees data structures and cleans up
*/
void cacheSimFini()
void cacheSimFini(struct csim_result_t *result)
{
hwMod.cacheSimFini();
hwMod.cacheSimFini(result);
#ifdef COLLECT_STAT
cacheSimStatPrint();
......
......@@ -9,6 +9,7 @@
#define CACHE_SIM_HW_MOD_H
#include "cacheSimStat.h"
#include "cacheSimResult.h"
/**
* Cache Simulation Hardware Model Descriptor
......@@ -23,7 +24,9 @@ struct cacheSimHwMod_t
*
* @return number of clock cycles spent
*/
unsigned long long (*simICache) (unsigned long address, unsigned int nBytes);
unsigned long long (*simICache) (unsigned long address,
unsigned int nBytes,
struct csim_result_t *result);
/**
* Simulates Data Cache access by benchmark
......@@ -33,7 +36,9 @@ struct cacheSimHwMod_t
*
* @return number of clock cycles spent
*/
unsigned long long (*simDCache) (unsigned long address, unsigned int isReadAccess);
unsigned long long (*simDCache) (unsigned long address,
unsigned int isReadAccess,
struct csim_result_t *result);
/**
* Initialize the cache data structures
......@@ -41,12 +46,12 @@ struct cacheSimHwMod_t
* @param configFile Path to the json config file which describes cache
* organization
*/
void (*cacheSimInit) ();
void (*cacheSimInit) (struct csim_result_t *result);
/**
* Frees data structures and cleans up
*/
void (*cacheSimFini) ();
void (*cacheSimFini) (struct csim_result_t *result);
};
......
......@@ -303,7 +303,8 @@ inline unsigned long getIndexFromAddress(unsigned long address,
/**** HWMOD FUNCTIONS *********************************************************/
unsigned long long cortexA5_simICache(unsigned long address,
unsigned int nBytes)
unsigned int nBytes,
struct csim_result_t *result)
{
unsigned int latency = 0;
unsigned long tag;
......@@ -397,25 +398,41 @@ unsigned long long cortexA5_simICache(unsigned long address,
unsigned long long cortexA5_simDCache(unsigned long address,
unsigned int isReadAccess)
unsigned int isReadAccess,
struct csim_result_t *result)
{
unsigned int latency = 0;
unsigned long long latency = 0;
unsigned long tag;
unsigned long index;
int setIndex = 0;
int replaceIndex;
int i;
if (isReadAccess == 0 && L1DCacheConf.isWriteThrough == 1) // Write Access
if (isReadAccess)
{
// Simply increment latency by time to write to memory
latency += memWriteLatency;
L1D_Hit_Writethrough++;
return latency;
// Check for prefetch!
prevAccess_t *access = prevAccessList_tail;
for (i = 0; i < prefetch_table_entries && access != NULL; i++)
{
if (address == access->address + L2CacheConf.lineLenBytes)
{
//printf("0x%lx - 0x%lx\n", access->address, address);
if (access->sequentialAccess > 5)
{
result->cyclesConsumed += memReadPrefetchLatency;
latency += memReadPrefetchLatency;
}
insertAccess(&prevAccessList_head, &prevAccessList_tail, address, access->sequentialAccess+1);
result->prefetches++;
return latency;
}
access = access->prev;
}
// If here, data was not prefetched!
insertAccess(&prevAccessList_head, &prevAccessList_tail, address, 0);
}
// For writeback, there is no latency. We can safely take this assumption,
// as we are only using a Single Core System.
// Lookup in L1
tag = getTagFromAddress(address, L1DCacheConf.tagLenBits, L1DCacheConf.tagMask);
index = getIndexFromAddress(address, L1DCacheConf.subIndexLenBits, L1DCacheConf.indexMask);
......@@ -426,11 +443,17 @@ unsigned long long cortexA5_simDCache(unsigned long address,
{
if (L1DCache[setIndex][index].tag == tag)
{
result->cyclesConsumed += L1DCacheConf.hitLatency;
latency += L1DCacheConf.hitLatency;
if (isReadAccess)
{
L1D_Hit_Read++;
else
result->L1Hits++;
}
else // Write Access
{
L1D_Hit_Writeback++;
}
return latency;
}
}
......@@ -439,16 +462,33 @@ unsigned long long cortexA5_simDCache(unsigned long address,
replaceIndex = setIndex;
}
}
// L1 Miss has occured!
L1D_Miss++;
latency += L1DCacheConf.missLatency;
// If here, data was not found in L1 Cache
// Data will be present for next access!
// This is executed for both read and write instructions
if (replaceIndex == -1)
replaceIndex = random() % L1DCacheConf.numSets;
L1DCache[replaceIndex][index].tag = tag;
SET_CACHELINE_VALID(L1DCache[replaceIndex][index].flags);
// if (isReadAccess)
// {
// L1 Miss has occured!
L1D_Miss++;
result->cyclesConsumed += L1DCacheConf.missLatency;
latency += L1DCacheConf.missLatency;
// }
// else // Write Access, do nothing, data has been put in L1, return!
if (!isReadAccess)
{
result->cyclesConsumed += L1DCacheConf.missLatency;
latency += L1DCacheConf.missLatency;
return latency;
}
// No write instructions will come here!
// Lookup in L2
tag = getTagFromAddress(address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(address, L2CacheConf.subIndexLenBits, L2CacheConf.indexMask);
......@@ -459,11 +499,10 @@ unsigned long long cortexA5_simDCache(unsigned long address,
{
if (L2Cache[setIndex][index].tag == tag)
{
result->cyclesConsumed += L2CacheConf.hitLatency;
latency += L2CacheConf.hitLatency;
if (isReadAccess)
L2_Hit_Read++;
else
L2_Hit_Writeback++;
L2_Hit_Read++;
result->L2Hits++;
return latency;
}
}
......@@ -472,10 +511,12 @@ unsigned long long cortexA5_simDCache(unsigned long address,
replaceIndex = setIndex;
}
}
// If here, data was not found in L2.
// L2 Miss has occured!
L1D_Miss--;
L2D_Miss++;
result->L2Misses++;
result->cyclesConsumed += L2CacheConf.missLatency;
latency += L2CacheConf.missLatency;
// Data will be present for next access!
......@@ -484,33 +525,23 @@ unsigned long long cortexA5_simDCache(unsigned long address,
L2Cache[replaceIndex][index].tag = tag;
SET_CACHELINE_VALID(L2Cache[replaceIndex][index].flags);
prevAccess_t *access = prevAccessList_tail;
for (i = 0; i < prefetch_table_entries && access != NULL; i++)
{
if (address == access->address + L2CacheConf.lineLenBytes)
{
//printf("0x%lx - 0x%lx\n", access->address, address);
if (access->sequentialAccess > 5)
{
latency += memReadPrefetchLatency;
}
insertAccess(&prevAccessList_head, &prevAccessList_tail, address, access->sequentialAccess+1);
return latency;
}
access = access->prev;
}
// Fetch Data from the memory
result->cyclesConsumed += memReadLatency;
latency += memReadLatency;
insertAccess(&prevAccessList_head, &prevAccessList_tail, address, 0);
return latency;
}
void cortexA5_cacheSimInit()
void cortexA5_cacheSimInit(struct csim_result_t *result)
{
// Allocate space for caches
initCacheParams();
result->L1Hits = 0;
result->L2Hits = 0;
result->L2Misses = 0;
result->prefetches = 0;
result->cyclesConsumed = 0;
L1DCache = (cacheLine_t **) alloc2D(L1DCacheConf.numSets,
L1DCacheConf.numLines, sizeof(cacheLine_t));
L1ICache = (cacheLine_t **) alloc2D(L1ICacheConf.numSets,
......@@ -525,10 +556,15 @@ void cortexA5_cacheSimInit()
return;
}
void cortexA5_cacheSimFini()
void cortexA5_cacheSimFini(struct csim_result_t *result)
{
printf("Statistics : \n");
printf ("\nTotal L1 Hits = %llu\n", result->L1Hits);
printf ("Total L2 Hits = %llu\n", result->L2Hits);
printf ("Total L2 Misses = %llu\n", result->L2Misses);
printf ("Total Prefetches = %llu\n", result->prefetches);
printf("\nL1 Data Cache\n");
printf("\t Hit Read = %ld\n", L1D_Hit_Read);
printf("\t Hit Writeback = %ld\n", L1D_Hit_Writeback);
......
......@@ -588,7 +588,8 @@ unsigned int generic_simL1DCache(unsigned long address,
*
* @return number of clock cycles spent
*/
unsigned long long generic_simICache(unsigned long address, unsigned int nBytes)
unsigned long long generic_simICache(unsigned long address, unsigned int nBytes,
struct csim_result_t *result)
{
unsigned long long nCycles = 0;
unsigned int ret;
......@@ -615,7 +616,8 @@ unsigned long long generic_simICache(unsigned long address, unsigned int nBytes)
*
* @return number of clock cycles spent
*/
unsigned long long generic_simDCache(unsigned long address, unsigned int isReadAccess)
unsigned long long generic_simDCache(unsigned long address, unsigned int isReadAccess,
struct csim_result_t *result)
{
unsigned long long nCycles = 0;
unsigned int ret;
......@@ -638,7 +640,7 @@ unsigned long long generic_simDCache(unsigned long address, unsigned int isReadA
/**
* Initialize the cache data structures
*/
void generic_cacheSimInit()
void generic_cacheSimInit(struct csim_result_t *result)
{
readConfigFile();
......@@ -673,7 +675,7 @@ void generic_cacheSimInit()
* Frees data structures and cleans up
*
*/
void generic_cacheSimFini()
void generic_cacheSimFini(struct csim_result_t *result)
{
free(L1DCache);
}
......
......@@ -13,6 +13,7 @@
extern unsigned long SP;
extern unsigned long long memAccessCycles;
extern unsigned long long pipelineCycles;
extern struct csim_result_t csim_result;
/***********************************************************
Copyright 1992 by Stichting Mathematisch Centrum, Amsterdam, The
......@@ -100,7 +101,6 @@ void adpcm_coder (short indata[], unsigned long indata_addr, char outdata[], un
uintptr_t ivtmp_28;
int bufferstep;
int outputbuffer;
unsigned long outputbuffer_addr = 0x8;
int index;
int vpdiff;
int valpred;
......@@ -109,25 +109,24 @@ void adpcm_coder (short indata[], unsigned long indata_addr, char outdata[], un
int delta;
int sign;
signed char * outp;
unsigned long outp_addr = 0x0;
adpcm_coderbb_2:
// # PRED: ENTRY [100.0%] (fallthru,exec)
SP = SP + 0x30;
memAccessCycles += simDCache((SP + 0x4), 1); // Spilling Register
memAccessCycles += simDCache((SP + 0xc), 1); // Spilling Register
memAccessCycles += simDCache((SP + 0xc), 1); // Reading Spilt Register
memAccessCycles += simDCache(0x4a8, 1); // PC Relative Load
memAccessCycles += simDCache((SP + 0x4), 1, &csim_result); // Spilling Register
memAccessCycles += simDCache((SP + 0xc), 1, &csim_result); // Spilling Register
memAccessCycles += simDCache((SP + 0xc), 1, &csim_result); // Reading Spilt Register
memAccessCycles += simDCache(0x4a8, 1, &csim_result); // PC Relative Load
// Simulating I Cache for obj block 0
memAccessCycles += simICache(0x36c, 44);
memAccessCycles += simICache(0x36c, 44, &csim_result);
// TODO: UnmappedLS: Load GlobalVar coder_1_state at line 247
// TODO: UnmappedLS: Load GlobalVar coder_1_state at line 249
pipelineCycles += 23 - (enterBlock(0xf3, 0xfd) ? 7 : 0);
valpred = state->valprev;
memAccessCycles += simDCache(state_addr, 1);
memAccessCycles += simDCache(state_addr, 1, &csim_result);
index = state->index;
memAccessCycles += simDCache(state_addr, 1);
memAccessCycles += simDCache(stepsizeTable_addr + (4 * (index)), 1);
memAccessCycles += simDCache(state_addr, 1, &csim_result);
memAccessCycles += simDCache(stepsizeTable_addr + (4 * (index)), 1, &csim_result);
step = stepsizeTable[index];
if (len > 0)
goto adpcm_coderbb_3;
......@@ -137,23 +136,27 @@ pipelineCycles += 23 - (enterBlock(0xf3, 0xfd) ? 7 : 0);
adpcm_coderbb_3:
// # PRED: 2 [91.0%] (true,exec)
memAccessCycles += simDCache(0x4a8, 1); // PC Relative Load
memAccessCycles += simDCache((SP + outp_addr), 0);
memAccessCycles += simDCache((SP + 0x0), 1, &csim_result); // Spilling Register
memAccessCycles += simDCache(0x4a8, 1, &csim_result); // PC Relative Load
// Simulating I Cache for obj block 1
memAccessCycles += simICache(0x398, 32);
memAccessCycles += simICache(0x398, 32, &csim_result);
pipelineCycles += 15 - (enterBlock(0xfe, 0x105) ? 7 : 0);
outp = outdata;
memAccessCycles += simDCache(outdata_addr, 1);
memAccessCycles += simDCache(outdata_addr, 1, &csim_result);
ivtmp_28 = 0;
bufferstep = 1;
// # SUCC: 4 [100.0%] (fallthru,exec)
adpcm_coderbb_4:
// # PRED: 18 [91.0%] (true,exec) 3 [100.0%] (fallthru,exec)
memAccessCycles += simDCache((SP + 0x4), 1); // Reading Spilt Register
memAccessCycles += simDCache((SP + 0x4), 1, &csim_result); // Reading Spilt Register
memAccessCycles += simDCache((SP + 0x8), 1, &csim_result); // Spilling Register
memAccessCycles += simDCache((SP + 0x8), 1, &csim_result); // Reading Spilt Register
memAccessCycles += simDCache((SP + 0x0), 1, &csim_result); // Reading Spilt Register
memAccessCycles += simDCache((SP + 0x0), 1, &csim_result); // Spilling Register
pipelineCycles += 48 - (enterBlock(0x106, 0x137) ? 7 : 0);
diff = (int) *(short int *)((uintptr_t)indata + (uintptr_t)ivtmp_28) - valpred;
memAccessCycles += simDCache(indata_addr + (+ivtmp_28), 1);
memAccessCycles += simDCache(indata_addr + (+ivtmp_28), 1, &csim_result);
if (diff < 0)
goto adpcm_coderbb_5;
else
......@@ -249,11 +252,11 @@ adpcm_coderbb_15:
valpred_41 = (valpred_34>-32768)?valpred_34:-32768;
valpred = (valpred_41<32767)?valpred_41:32767;
delta_37 = delta | sign;
memAccessCycles += simDCache(indexTable_addr + (4 * (delta_37)), 1);
memAccessCycles += simDCache(indexTable_addr + (4 * (delta_37)), 1, &csim_result);
index_38 = indexTable[delta_37] + index;
index_40 = (index_38>0)?index_38:0;
index = (index_40<88)?index_40:88;
memAccessCycles += simDCache(stepsizeTable_addr + (4 * (index)), 1);
memAccessCycles += simDCache(stepsizeTable_addr + (4 * (index)), 1, &csim_result);
step = stepsizeTable[index];
if (bufferstep != 0)
goto adpcm_coderbb_16;
......@@ -263,15 +266,12 @@ adpcm_coderbb_15:
adpcm_coderbb_16:
// # PRED: 15 [50.0%] (true,exec)
memAccessCycles += simDCache((SP + outputbuffer_addr), 0);
outputbuffer = delta_37 << 4 & 255;
goto adpcm_coderbb_18;
// # SUCC: 18 [100.0%] (fallthru,exec)
adpcm_coderbb_17:
// # PRED: 15 [50.0%] (false,exec)
memAccessCycles += simDCache((SP + outputbuffer_addr), 1);
memAccessCycles += simDCache((SP + outp_addr), 0);
*outp = (signed char) delta_37 & 15 | (signed char) outputbuffer;
outp = (uintptr_t)outp + 1;
// # SUCC: 18 [100.0%] (fallthru,exec)
......@@ -279,10 +279,9 @@ memAccessCycles += simDCache((SP + outp_addr), 0);
adpcm_coderbb_18:
// # PRED: 16 [100.0%] (fallthru,exec) 17 [100.0%] (fallthru,exec)
// Simulating I Cache for obj block 2
memAccessCycles += simICache(0x3b8, 200);
memAccessCycles += simICache(0x3b8, 200, &csim_result);
// TODO: UnmappedLS: Store GlobalVar adpcmdata at line 306
// TODO: UnmappedLS: Load GlobalVar pcmdata at line 263
// TODO: UnmappedLS: Load LocalVar outp at line 305
// TODO: UnmappedLS: Store GlobalVar pcmdata at line 306
bufferstep = bufferstep == 0;
len = len + -1;
ivtmp_28 = ivtmp_28 + 2;
......@@ -294,6 +293,8 @@ memAccessCycles += simICache(0x3b8, 200);
adpcm_coderbb_19:
// # PRED: 18 [9.0%] (false,exec)
memAccessCycles += simDCache((SP + 0x8), 1, &csim_result); // Reading Spilt Register
memAccessCycles += simDCache((SP + 0x0), 1, &csim_result); // Reading Spilt Register
pipelineCycles += 10 - (enterBlock(0x138, 0x13b) ? 7 : 0);
if (bufferstep == 0)
goto adpcm_coderbb_20;
......@@ -303,26 +304,24 @@ pipelineCycles += 10 - (enterBlock(0x138, 0x13b) ? 7 : 0);
adpcm_coderbb_20:
// # PRED: 19 [67.0%] (true,exec)
memAccessCycles += simDCache((SP + outputbuffer_addr), 1);
*outp = (signed char) (signed char) outputbuffer;
// # SUCC: 21 [100.0%] (fallthru,exec)
adpcm_coderbb_21:
// # PRED: 19 [33.0%] (false,exec) 20 [100.0%] (fallthru,exec) 2 [9.0%] (false,exec)
// Simulating I Cache for obj block 3
memAccessCycles += simICache(0x480, 16);
// TODO: UnmappedLS: Load LocalVar outp at line 314
// TODO: UnmappedLS: Store GlobalVar stepsizeTable at line 315
memAccessCycles += simDCache((SP + 0xc), 1); // Reading Spilt Register
memAccessCycles += simICache(0x480, 16, &csim_result);
// TODO: UnmappedLS: Store GlobalVar adpcmdata at line 315
memAccessCycles += simDCache((SP + 0xc), 1, &csim_result); // Reading Spilt Register
// Simulating I Cache for obj block 4
memAccessCycles += simICache(0x490, 24);
memAccessCycles += simICache(0x490, 24, &csim_result);
// TODO: UnmappedLS: Store GlobalVar coder_1_state at line 317
// TODO: UnmappedLS: Store GlobalVar coder_1_state at line 318
pipelineCycles += 19 - (enterBlock(0x13c, 0x141) ? 7 : 0);
state->valprev = (short int) (short int) valpred;
memAccessCycles += simDCache(state_addr, 0);
memAccessCycles += simDCache(state_addr, 0, &csim_result);
state->index = (char) (char) index;
memAccessCycles += simDCache(state_addr, 0);
memAccessCycles += simDCache(state_addr, 0, &csim_result);
return;
// # SUCC: EXIT [100.0%]
......
......@@ -13,6 +13,7 @@
unsigned long SP = 0x1234;
unsigned long long memAccessCycles = 0;
unsigned long long pipelineCycles = 0;
struct csim_result_t csim_result;
/*
** Timing - Test timing on adpcm coder and decoder.
......@@ -59,22 +60,18 @@ int main() {
long unsigned int j;
int i;
unsigned int ARR_SIZE_0;
unsigned long ARR_SIZE_0_addr = 0x0;
unsigned long ivtmp_34_addr = 0; // MANUAL
unsigned long ivtmp_28_addr = 0; //MANUAL
mainbb_2:
// # PRED: ENTRY [100.0%] (fallthru,exec)
cacheSimInit();
cacheSimInit(&csim_result);
branchPred_init();
SP = SP + 0x30;
memAccessCycles += simDCache(0x354, 1); // PC Relative Load
memAccessCycles += simDCache(0x358, 1); // PC Relative Load
memAccessCycles += simDCache(ARR_SIZE_addr, 1);
memAccessCycles += simDCache((SP + ARR_SIZE_0_addr), 0);
memAccessCycles += simDCache(0x354, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache((SP + 0x0), 1, &csim_result); // Spilling Register
memAccessCycles += simDCache(0x358, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache(ARR_SIZE_addr, 1, &csim_result);
// Simulating I Cache for obj block 0
memAccessCycles += simICache(0x200, 36);
memAccessCycles += simICache(0x200, 36, &csim_result);
pipelineCycles += 27 - (enterBlock(0x96, 0x9e) ? 7 : 0);
ARR_SIZE_0 = ARR_SIZE;
j = ARR_SIZE_0 / 10240;
......@@ -86,13 +83,13 @@ pipelineCycles += 27 - (enterBlock(0x96, 0x9e) ? 7 : 0);
mainbb_14:
// # PRED: 2 [91.0%] (true,exec)
memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
memAccessCycles += simDCache((SP + 0x4), 1); // Spilling Register
memAccessCycles += simDCache(0x360, 1); // PC Relative Load
memAccessCycles += simDCache(0x364, 1); // PC Relative Load
memAccessCycles += simDCache(0x368, 1); // PC Relative Load
memAccessCycles += simDCache(0x35c, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache((SP + 0x4), 1, &csim_result); // Spilling Register
memAccessCycles += simDCache(0x360, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache(0x364, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache(0x368, 1, &csim_result); // PC Relative Load
// Simulating I Cache for obj block 1
memAccessCycles += simICache(0x224, 40);
memAccessCycles += simICache(0x224, 40, &csim_result);
pipelineCycles += 21 - (enterBlock(0x9f, 0xa8) ? 7 : 0);
end_43 = 0;
count = 0;
......@@ -110,30 +107,26 @@ pipelineCycles += 9 - (enterBlock(0xa9, 0xab) ? 7 : 0);
mainbb_4:
// # PRED: 3 [99.0%] (true,exec)
memAccessCycles += simDCache((SP + 0x4), 1); // Reading Spilt Register
memAccessCycles += simDCache((SP + 0x4), 1, &csim_result); // Reading Spilt Register
// Simulating I Cache for obj block 3
memAccessCycles += simICache(0x258, 20);
memAccessCycles += simICache(0x258, 20, &csim_result);
pipelineCycles += 13 - (enterBlock(0xac, 0xb0) ? 7 : 0);
i_45 = (int) end_43;
ivtmp_34 = (uintptr_t)&in_Data[i_45];
ivtmp_34_addr = in_Data_addr + (2 * i_45);
end_44 = end_43;
// # SUCC: 5 [100.0%] (fallthru,exec)
mainbb_5:
// # PRED: 5 [99.0%] (true,exec) 4 [100.0%] (fallthru,exec)
memAccessCycles += simDCache(pcmdata_addr + (2 * (end_44-end_43)), 0);
memAccessCycles += simDCache(pcmdata_addr + (2 * (end_44-end_43)), 0, &csim_result);
// Simulating I Cache for obj block 4
memAccessCycles += simICache(0x26c, 36);
memAccessCycles += simICache(0x26c, 36, &csim_result);
// TODO: UnmappedLS: Load GlobalVar in_Data at line 179
pipelineCycles += 16 - (enterBlock(0xb1, 0xb9) ? 7 : 0);
pcmdata[end_44 - end_43] = *(short int*)((uintptr_t)ivtmp_34);
//Manual Annotation
memAccessCycles += simDCache(ivtmp_34_addr, 1);
i_45 = i_45 + 1;
end_44 = (long unsigned int) i_45;
ivtmp_34 = ivtmp_34 + 2;
ivtmp_34_addr = ivtmp_34_addr + 2;
if (end_44 < end_46)
goto mainbb_5;
else
......@@ -143,7 +136,7 @@ pipelineCycles += 16 - (enterBlock(0xb1, 0xb9) ? 7 : 0);
mainbb_6:
// # PRED: 5 [1.0%] (false,exec) 3 [1.0%] (false,exec)
// Simulating I Cache for obj block 5
memAccessCycles += simICache(0x290, 40);
memAccessCycles += simICache(0x290, 40, &csim_result);
pipelineCycles += 14 - (enterBlock(0xba, 0xc3) ? 7 : 0);
adpcm_coder (&pcmdata, pcmdata_addr, &adpcmdata, adpcmdata_addr, 10240, &coder_1_state, coder_1_state_addr);
count = count + 1;
......@@ -156,17 +149,17 @@ pipelineCycles += 14 - (enterBlock(0xba, 0xc3) ? 7 : 0);
mainbb_13:
// # PRED: 6 [91.0%] (true,exec)
// Simulating I Cache for obj block 2
memAccessCycles += simICache(0x24c, 12);
memAccessCycles += simICache(0x24c, 12, &csim_result);
end_43 = end_46;
goto mainbb_3;
// # SUCC: 3 [100.0%] (fallthru)
mainbb_7:
// # PRED: 6 [9.0%] (false,exec) 2 [9.0%] (false,exec)
memAccessCycles += simDCache(0x358, 1); // PC Relative Load
memAccessCycles += simDCache((SP + ARR_SIZE_0_addr), 1);
memAccessCycles += simDCache(0x358, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache((SP + 0x0), 1, &csim_result); // Reading Spilt Register
// Simulating I Cache for obj block 6
memAccessCycles += simICache(0x2b8, 32);
memAccessCycles += simICache(0x2b8, 32, &csim_result);
pipelineCycles += 19 - (enterBlock(0xc4, 0xcb) ? 7 : 0);
if (ARR_SIZE_0 % 10240 != 0)
goto mainbb_8;
......@@ -176,12 +169,12 @@ pipelineCycles += 19 - (enterBlock(0xc4, 0xcb) ? 7 : 0);
mainbb_8:
// # PRED: 7 [61.0%] (true,exec)
memAccessCycles += simDCache(0x354, 1); // PC Relative Load
memAccessCycles += simDCache(0x354, 1, &csim_result); // PC Relative Load
// Simulating I Cache for obj block 7
memAccessCycles += simICache(0x2d8, 24);
memAccessCycles += simICache(0x2d8, 24, &csim_result);
pipelineCycles += 14 - (enterBlock(0xcc, 0xd1) ? 7 : 0);
start_40 = j * 10240;
memAccessCycles += simDCache(ARR_SIZE_addr, 1);
memAccessCycles += simDCache(ARR_SIZE_addr, 1, &csim_result);
end = ARR_SIZE;
if (start_40 < end)
goto mainbb_9;
......@@ -191,31 +184,28 @@ pipelineCycles += 14 - (enterBlock(0xcc, 0xd1) ? 7 : 0);
mainbb_9:
// # PRED: 8 [99.0%] (true,exec)
memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
memAccessCycles += simDCache(0x360, 1); // PC Relative Load
memAccessCycles += simDCache(0x35c, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache(0x360, 1, &csim_result); // PC Relative Load
// Simulating I Cache for obj block 8
memAccessCycles += simICache(0x2f0, 28);
memAccessCycles += simICache(0x2f0, 28, &csim_result);
pipelineCycles += 13 - (enterBlock(0xd2, 0xd8) ? 7 : 0);
i = (int) start_40;
ivtmp_28 = (uintptr_t)&in_Data[i];
ivtmp_28_addr = in_Data_addr + (2 * i);
D_2229 = (int) end;
start = start_40;
// # SUCC: 10 [100.0%] (fallthru,exec)
mainbb_10:
// # PRED: 10 [99.0%] (true,exec) 9 [100.0%] (fallthru,exec)
memAccessCycles += simDCache(pcmdata_addr + (2 * (start-start_40)), 0);
memAccessCycles += simDCache(pcmdata_addr + (2 * (start-start_40)), 0, &csim_result);
// Simulating I Cache for obj block 9
memAccessCycles += simICache(0x30c, 36);
memAccessCycles += simICache(0x30c, 36, &csim_result);
// TODO: UnmappedLS: Inaccurately Matched Load at line 219
pipelineCycles += 16 - (enterBlock(0xd9, 0xe1) ? 7 : 0);
pcmdata[start - start_40] = *(short int*)((uintptr_t)ivtmp_28);
memAccessCycles += simDCache(ivtmp_28_addr, 1);
i = i + 1;
start = (long unsigned int) i;
ivtmp_28 = ivtmp_28 + 2;
ivtmp_28_addr = ivtmp_28_addr + 2;
if (i != D_2229)
goto mainbb_10;
else
......@@ -224,11 +214,11 @@ pipelineCycles += 16 - (enterBlock(0xd9, 0xe1) ? 7 : 0);
mainbb_11:
// # PRED: 10 [1.0%] (false,exec) 8 [1.0%] (false,exec)
memAccessCycles += simDCache(0x360, 1); // PC Relative Load
memAccessCycles += simDCache(0x364, 1); // PC Relative Load
memAccessCycles += simDCache(0x368, 1); // PC Relative Load
memAccessCycles += simDCache(0x360, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache(0x364, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache(0x368, 1, &csim_result); // PC Relative Load
// Simulating I Cache for obj block 10
memAccessCycles += simICache(0x330, 20);
memAccessCycles += simICache(0x330, 20, &csim_result);
pipelineCycles += 11 - (enterBlock(0xe2, 0xe6) ? 7 : 0);
adpcm_coder (&pcmdata, pcmdata_addr, &adpcmdata, adpcmdata_addr, (int) (end - start_40), &coder_1_state, coder_1_state_addr);
// # SUCC: 12 [100.0%] (fallthru,exec)
......@@ -236,10 +226,10 @@ pipelineCycles += 11 - (enterBlock(0xe2, 0xe6) ? 7 : 0);
mainbb_12:
// # PRED: 7 [39.0%] (false,exec) 11 [100.0%] (fallthru,exec)
// Simulating I Cache for obj block 11
memAccessCycles += simICache(0x344, 16);
memAccessCycles += simICache(0x344, 16, &csim_result);
printf("memAccessCycles = \%llu\n", memAccessCycles);
printf("pipelineCycles = \%llu\n", pipelineCycles);
cacheSimFini();
cacheSimFini(&csim_result);
pipelineCycles += 18 - (enterBlock(0xe7, 0xea) ? 7 : 0);
return 0;
// # SUCC: EXIT [100.0%]
......
......@@ -13,6 +13,7 @@
unsigned long SP = 0x1234;
unsigned long long memAccessCycles = 0;
unsigned long long pipelineCycles = 0;
struct csim_result_t csim_result;
#include <stdio.h>
......@@ -39,29 +40,29 @@ void sieve_func() {
uintptr_t D_2240;
uintptr_t D_2230;
uintptr_t ivtmp_36;
unsigned long ivtmp_36_addr;
int j;
int i;
unsigned int sieve[500000];
unsigned long sieve_addr = 0x0;
sieve_funcbb_2:
// # PRED: ENTRY [100.0%] (fallthru,exec)
SP = SP + 0x1e84a0;
memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
memAccessCycles += simDCache(0x35c, 1, &csim_result); // PC Relative Load
// Simulating I Cache for obj block 0
memAccessCycles += simICache(0x200, 40);
memAccessCycles += simICache(0x200, 40, &csim_result);
pipelineCycles += 24 - (enterBlock(0x96, 0x9f) ? 7 : 0);
ivtmp_68 = 0;
// # SUCC: 3 [100.0%] (fallthru,exec)
sieve_funcbb_3:
// # PRED: 3 [99.0%] (true,exec) 2 [100.0%] (fallthru,exec)
memAccessCycles += simDCache(results_addr + (+ivtmp_68), 0);
memAccessCycles += simDCache((SP + 0x0), 1, &csim_result); // Spilling Register
memAccessCycles += simDCache(results_addr + (+ivtmp_68), 0, &csim_result);
// Simulating I Cache for obj block 1
memAccessCycles += simICache(0x228, 28);
memAccessCycles += simICache(0x228, 28, &csim_result);
pipelineCycles += 13 - (enterBlock(0xa0, 0xa6) ? 7 : 0);
*(unsigned int*)((uintptr_t)&results + (uintptr_t)ivtmp_68) = 0;
memAccessCycles += simDCache((SP + sieve_addr + (+ivtmp_68)), 0);
*(unsigned int*)((uintptr_t)&sieve + (uintptr_t)ivtmp_68) = 1;
ivtmp_68 = ivtmp_68 + 4;
if (ivtmp_68 != 2000000)
......@@ -73,7 +74,7 @@ pipelineCycles += 13 - (enterBlock(0xa0, 0xa6) ? 7 : 0);
sieve_funcbb_17:
// # PRED: 3 [1.0%] (false,exec)
// Simulating I Cache for obj block 2
memAccessCycles += simICache(0x244, 52);
memAccessCycles += simICache(0x244, 52, &csim_result);
pipelineCycles += 21 - (enterBlock(0xa7, 0xb3) ? 7 : 0);
ivtmp_49 = 6;
ivtmp_58 = 4;
......@@ -82,11 +83,11 @@ pipelineCycles += 21 - (enterBlock(0xa7, 0xb3) ? 7 : 0);
sieve_funcbb_4:
// # PRED: 7 [99.0%] (true,exec) 17 [100.0%] (fallthru)
memAccessCycles += simDCache((SP + 0x0), 1, &csim_result); // Reading Spilt Register
// Simulating I Cache for obj block 3
memAccessCycles += simICache(0x278, 16);
memAccessCycles += simICache(0x278, 16, &csim_result);
pipelineCycles += 9 - (enterBlock(0xb4, 0xb7) ? 7 : 0);
D_2263 = (unsigned int) i_72;
memAccessCycles += simDCache((SP + sieve_addr + (+D_2263*4)), 1);
if (*(unsigned int*)((uintptr_t)&sieve + (uintptr_t)D_2263 * 4) != 0)
goto sieve_funcbb_5;
else
......@@ -96,7 +97,7 @@ pipelineCycles += 9 - (enterBlock(0xb4, 0xb7) ? 7 : 0);
sieve_funcbb_5:
// # PRED: 4 [50.0%] (true,exec)
// Simulating I Cache for obj block 4
memAccessCycles += simICache(0x288, 12);
memAccessCycles += simICache(0x288, 12, &csim_result);
pipelineCycles += 8 - (enterBlock(0xb8, 0xba) ? 7 : 0);
j_76 = (int) ivtmp_58;
if (j_76 <= 499999)
......@@ -108,16 +109,16 @@ pipelineCycles += 8 - (enterBlock(0xb8, 0xba) ? 7 : 0);
sieve_funcbb_18:
// # PRED: 5 [91.0%] (true,exec)
// Simulating I Cache for obj block 5
memAccessCycles += simICache(0x294, 4);
memAccessCycles += simICache(0x294, 4, &csim_result);
pipelineCycles += 8 - (enterBlock(0xbb, 0xbb) ? 7 : 0);
ivtmp_74 = ivtmp_49;
// # SUCC: 6 [100.0%] (fallthru)
sieve_funcbb_6:
// # PRED: 6 [91.0%] (true,exec) 18 [100.0%] (fallthru)
memAccessCycles += simDCache((SP + sieve_addr + (4 * (j_76))), 0);
memAccessCycles += simDCache((SP + 0x4), 1, &csim_result); // Spilling Register
// Simulating I Cache for obj block 6
memAccessCycles += simICache(0x298, 40);
memAccessCycles += simICache(0x298, 40, &csim_result);
pipelineCycles += 17 - (enterBlock(0xbc, 0xc5) ? 7 : 0);
sieve[j_76] = 0;
D_2252 = (unsigned int) j_76 + D_2263;
......@@ -132,7 +133,7 @@ pipelineCycles += 17 - (enterBlock(0xbc, 0xc5) ? 7 : 0);
sieve_funcbb_7:
// # PRED: 4 [50.0%] (false,exec) 6 [9.0%] (false,exec) 5 [9.0%] (false,exec)
// Simulating I Cache for obj block 7
memAccessCycles += simICache(0x2c0, 24);
memAccessCycles += simICache(0x2c0, 24, &csim_result);
pipelineCycles += 19 - (enterBlock(0xc6, 0xcb) ? 7 : 0);
i_72 = i_72 + 1;
ivtmp_58 = ivtmp_58 + 2;
......@@ -145,9 +146,9 @@ pipelineCycles += 19 - (enterBlock(0xc6, 0xcb) ? 7 : 0);
sieve_funcbb_8:
// # PRED: 7 [1.0%] (false,exec)
memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
memAccessCycles += simDCache(0x35c, 1, &csim_result); // PC Relative Load
// Simulating I Cache for obj block 8
memAccessCycles += simICache(0x2d8, 24);
memAccessCycles += simICache(0x2d8, 24, &csim_result);
pipelineCycles += 13 - (enterBlock(0xcc, 0xd1) ? 7 : 0);
j = 2;
i = 0;
......@@ -155,9 +156,9 @@ pipelineCycles += 13 - (enterBlock(0xcc, 0xd1) ? 7 : 0);
sieve_funcbb_9:
// # PRED: 11 [99.0%] (true,exec) 8 [100.0%] (fallthru,exec)
memAccessCycles += simDCache((SP + 0x0), 1, &csim_result); // Reading Spilt Register
pipelineCycles += 12 - (enterBlock(0xd2, 0xd8) ? 7 : 0);
D_2240 = (unsigned int) j;
memAccessCycles += simDCache((SP + sieve_addr + (+D_2240*4)), 1);
if (*(unsigned int*)((uintptr_t)&sieve + (uintptr_t)D_2240 * 4) != 0)
goto sieve_funcbb_10;
else
......@@ -166,7 +167,7 @@ pipelineCycles += 12 - (enterBlock(0xd2, 0xd8) ? 7 : 0);
sieve_funcbb_10:
// # PRED: 9 [50.0%] (true,exec)
memAccessCycles += simDCache(results_addr + (4 * (i)), 0);
memAccessCycles += simDCache(results_addr + (4 * (i)), 0, &csim_result);
results[i] = D_2240;
i = i + 1;
// # SUCC: 11 [100.0%] (fallthru,exec)
......@@ -174,7 +175,7 @@ memAccessCycles += simDCache(results_addr + (4 * (i)), 0);
sieve_funcbb_11:
// # PRED: 9 [50.0%] (false,exec) 10 [100.0%] (fallthru,exec)
// Simulating I Cache for obj block 9
memAccessCycles += simICache(0x2f0, 28);
memAccessCycles += simICache(0x2f0, 28, &csim_result);
j = j + 1;
if (j != 500000)
goto sieve_funcbb_9;
......@@ -184,10 +185,10 @@ memAccessCycles += simICache(0x2f0, 28);
sieve_funcbb_12:
// # PRED: 11 [1.0%] (false,exec)
memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
memAccessCycles += simDCache(results_addr + (4 * (0)), 1);
memAccessCycles += simDCache(0x35c, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache(results_addr + (4 * (0)), 1, &csim_result);
// Simulating I Cache for obj block 10
memAccessCycles += simICache(0x30c, 16);
memAccessCycles += simICache(0x30c, 16, &csim_result);
pipelineCycles += 9 - (enterBlock(0xd9, 0xdc) ? 7 : 0);
if (results[0] == 0)
goto sieve_funcbb_16;
......@@ -198,18 +199,20 @@ pipelineCycles += 9 - (enterBlock(0xd9, 0xdc) ? 7 : 0);
sieve_funcbb_13:
// # PRED: 12 [95.5%] (false,exec)
// Simulating I Cache for obj block 11
memAccessCycles += simICache(0x31c, 12);
memAccessCycles += simICache(0x31c, 12, &csim_result);
pipelineCycles += 10 - (enterBlock(0xdd, 0xdf) ? 7 : 0);
ivtmp_36 = (uintptr_t)&results;
ivtmp_36_addr = results_addr;
D_2230 = ivtmp_36 + 1999996;
// # SUCC: 14 [100.0%] (fallthru,exec)
sieve_funcbb_14:
// # PRED: 15 [98.9%] (true,exec) 13 [100.0%] (fallthru,exec)
// Simulating I Cache for obj block 12
memAccessCycles += simICache(0x328, 12);
memAccessCycles += simICache(0x328, 12, &csim_result);
// TODO: UnmappedLS: Load GlobalVar results at line 224
pipelineCycles += 8 - (enterBlock(0xe0, 0xe2) ? 7 : 0);
memAccessCycles += simDCache(ivtmp_36_addr + 4, 1, &csim_result);
if (*(unsigned int*)((uintptr_t)ivtmp_36 + 4) == 0)
goto sieve_funcbb_16;
else
......@@ -219,9 +222,10 @@ pipelineCycles += 8 - (enterBlock(0xe0, 0xe2) ? 7 : 0);
sieve_funcbb_15:
// # PRED: 14 [95.5%] (false,exec)
// Simulating I Cache for obj block 13
memAccessCycles += simICache(0x334, 12);
memAccessCycles += simICache(0x334, 12, &csim_result);
pipelineCycles += 9 - (enterBlock(0xe3, 0xe5) ? 7 : 0);
ivtmp_36 = ivtmp_36 + 4;
ivtmp_36_addr = ivtmp_36_addr + 4;
if (ivtmp_36 != D_2230)
goto sieve_funcbb_14;
else
......@@ -230,10 +234,10 @@ pipelineCycles += 9 - (enterBlock(0xe3, 0xe5) ? 7 : 0);
sieve_funcbb_16:
// # PRED: 14 [4.5%] (true,exec) 15 [1.1%] (false,exec) 12 [4.5%] (true,exec)
memAccessCycles += simDCache(0x360, 1); // PC Relative Load
memAccessCycles += simDCache(m_addr, 0);
memAccessCycles += simDCache(0x360, 1, &csim_result); // PC Relative Load
memAccessCycles += simDCache(m_addr, 0, &csim_result);
// Simulating I Cache for obj block 14
memAccessCycles += simICache(0x340, 28);
memAccessCycles += simICache(0x340, 28, &csim_result);
pipelineCycles += 20 - (enterBlock(0xe6, 0xec) ? 7 : 0);
m.v = 0;
return;
......@@ -246,16 +250,16 @@ pipelineCycles += 20 - (enterBlock(0xe6, 0xec) ? 7 : 0);
int main (void) {
mainbb_2:
// # PRED: ENTRY [100.0%] (fallthru,exec)
cacheSimInit();
cacheSimInit(&csim_result);
branchPred_init();
SP = SP + 0x8;
// Simulating I Cache for obj block 0
memAccessCycles += simICache(0x364, 20);
memAccessCycles += simICache(0x364, 20, &csim_result);
pipelineCycles += 12 - (enterBlock(0xf1, 0xf5) ? 7 : 0);
sieve_func ();
printf("memAccessCycles = \%llu\n", memAccessCycles);
printf("pipelineCycles = \%llu\n", pipelineCycles);
cacheSimFini();
cacheSimFini(&csim_result);
return 0;
// # SUCC: EXIT [100.0%]
......
......@@ -82,6 +82,11 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables,
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
annot_str = "struct csim_result_t csim_result;"
annot = Annotation(annot_str, ISCFileName, lineNum, False)
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
else:
annot_str = "extern unsigned long SP;"
annot = Annotation(annot_str, ISCFileName, lineNum, False)
......@@ -98,6 +103,11 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables,
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
annot_str = "extern struct csim_result_t csim_result;"
annot = Annotation(annot_str, ISCFileName, lineNum, False)
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
if inMultiLineVarInit == 1:
m = re_VarDeclInitMultiLineEnd.match(line)
......@@ -326,7 +336,7 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl
currFuncListParamsToAnnot.append(param.name)
if funcISC.functionName == "main":
annot_str = "cacheSimInit();"
annot_str = "cacheSimInit(&csim_result);"
annot = Annotation(annot_str, funcISC.fileName, funcISC.cfg.listBlocks[0].startLine-1, False)
addAnnotationToDict(dictAnnotLoadStore, funcISC.cfg.listBlocks[0].startLine-1, annot)
annot_str = "branchPred_init();"
......@@ -353,7 +363,7 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl
for i in range(len(blockLSInfo)):
lsInfo = blockLSInfo.pop(0)
if lsInfo.isLoad and lsInfo.isPCRelLoad:
annot_str = "memAccessCycles += simDCache(0x%x, 1); // PC Relative Load" % (lsInfo.PCRelAdd)
annot_str = "memAccessCycles += simDCache(0x%x, 1, &csim_result); // PC Relative Load" % (lsInfo.PCRelAdd)
annot = Annotation(annot_str, funcISC.fileName, blockISC.startLine-1, False)
addAnnotationToDict(dictAnnotLoadStore,
blockISC.startLine-1,
......@@ -361,14 +371,14 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl
continue
elif lsInfo.isSpiltRegister:
if lsInfo.isLoad:
annot_str = "memAccessCycles += simDCache((SP + 0x%x), 1); // Reading Spilt Register" % (lsInfo.spiltRegAdd)
annot_str = "memAccessCycles += simDCache((SP + 0x%x), 1, &csim_result); // Reading Spilt Register" % (lsInfo.spiltRegAdd)
annot = Annotation(annot_str, funcISC.fileName, blockISC.startLine-1, False)
addAnnotationToDict(dictAnnotLoadStore,
blockISC.startLine-1,
annot)
continue
else:
annot_str = "memAccessCycles += simDCache((SP + 0x%x), 1); // Spilling Register" % (lsInfo.spiltRegAdd)
annot_str = "memAccessCycles += simDCache((SP + 0x%x), 1, &csim_result); // Spilling Register" % (lsInfo.spiltRegAdd)
annot = Annotation(annot_str, funcISC.fileName, blockISC.startLine-1, False)
addAnnotationToDict(dictAnnotLoadStore,
blockISC.startLine-1,
......@@ -389,12 +399,12 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl
if access.isIndexed:
if access.ifIndexedIsArray:
param = find(lambda p: p.name == access.varName, funcISC.listParams)
annot_str = "memAccessCycles += simDCache(%s_addr + (sizeof(%s) * (%s)), %d);" % (access.varName, param.type, access.index, access.isRead)
annot_str = "memAccessCycles += simDCache(%s_addr + (sizeof(%s) * (%s)), %d, &csim_result);" % (access.varName, param.type, access.index, access.isRead)
else:
param = find(lambda p: p.name == access.varName, funcISC.listParams)
annot_str = "memAccessCycles += simDCache(%s_addr + (%s), %d);" % (access.varName, access.index, access.isRead)
annot_str = "memAccessCycles += simDCache(%s_addr + (%s), %d, &csim_result);" % (access.varName, access.index, access.isRead)
else:
annot_str = "memAccessCycles += simDCache(%s_addr, %d);" % (access.varName, access.isRead)
annot_str = "memAccessCycles += simDCache(%s_addr, %d, &csim_result);" % (access.varName, access.isRead)
annot = Annotation(annot_str, funcISC.fileName, lineNumISC, False)
annot.debug()
addAnnotationToDict(dictAnnotLoadStore,
......@@ -410,19 +420,19 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl
if access.isIndexed:
if access.ifIndexedIsArray:
if var.isLocal:
annot_str = "memAccessCycles += simDCache((SP + %s_addr + (%d * (%s))), %d);" % (access.varName, var.size/var.length, access.index, access.isRead)
annot_str = "memAccessCycles += simDCache((SP + %s_addr + (%d * (%s))), %d, &csim_result);" % (access.varName, var.size/var.length, access.index, access.isRead)
else:
annot_str = "memAccessCycles += simDCache(%s_addr + (%d * (%s)), %d);" % (access.varName, var.size/var.length, access.index, access.isRead)
annot_str = "memAccessCycles += simDCache(%s_addr + (%d * (%s)), %d, &csim_result);" % (access.varName, var.size/var.length, access.index, access.isRead)
else:
if var.isLocal:
annot_str = "memAccessCycles += simDCache((SP + %s_addr + (%s)), %d);" % (access.varName, access.index, access.isRead)
annot_str = "memAccessCycles += simDCache((SP + %s_addr + (%s)), %d, &csim_result);" % (access.varName, access.index, access.isRead)
else:
annot_str = "memAccessCycles += simDCache(%s_addr + (%s), %d);" % (access.varName, access.index, access.isRead)
annot_str = "memAccessCycles += simDCache(%s_addr + (%s), %d, &csim_result);" % (access.varName, access.index, access.isRead)
else:
if var.isLocal:
annot_str = "memAccessCycles += simDCache((SP + %s_addr), %d);" % (access.varName, access.isRead)
annot_str = "memAccessCycles += simDCache((SP + %s_addr), %d, &csim_result);" % (access.varName, access.isRead)
else:
annot_str = "memAccessCycles += simDCache(%s_addr, %d);" % (access.varName, access.isRead)
annot_str = "memAccessCycles += simDCache(%s_addr, %d, &csim_result);" % (access.varName, access.isRead)
annot = Annotation(annot_str, funcISC.fileName, lineNumISC-1, False)
annot.debug()
addAnnotationToDict(dictAnnotLoadStore,
......@@ -449,7 +459,7 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl
addAnnotationToDict(dictAnnotLoadStore,
blockISC.startLine-1,
annot)
annot_str = "memAccessCycles += simICache(0x%x, %d);" % (blockStartAddress, blockSizeRounded)
annot_str = "memAccessCycles += simICache(0x%x, %d, &csim_result);" % (blockStartAddress, blockSizeRounded)
annot = Annotation(annot_str, funcISC.fileName, blockISC.startLine-1, False)
addAnnotationToDict(dictAnnotLoadStore,
blockISC.startLine-1,
......@@ -478,7 +488,7 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl
annot_str = 'printf("pipelineCycles = \%llu\\n", pipelineCycles);'
annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False)
addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot)
annot_str = 'cacheSimFini();'
annot_str = 'cacheSimFini(&csim_result);'
annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False)
addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot)
break
......
This diff is collapsed.
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>power_estimator</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
</projectDescription>
/**
* Power Estimator:
*
* Objective: API to call power estimation.
*
* Description: The power estimator, takes as input some trace statistics over a
* period of time, and generates the amount of energy used over the period.
*/
unsigned long long estimate_energy(unsigned long long execCycles,
unsigned long L1_Hits,
unsigned long L2_Hits,
unsigned long L2_Misses);
/**
* Power Estimator:
*
* Objective: API to call power estimation.
*
* Description: The power estimator, takes as input some trace statistics over a
* period of time, and generates the amount of energy used over the period.
*/
#include <stdio.h>
#define POWER_OUTPUT_FILE "power_trace.dat"
// Input Data from Target Hardware
double CPU_A_ABV = 131;
double CPU_C_ABV = 22.39;
double CPU_freq = 256;
double CPU_volt = 0.905;
double CPU_A_power;
double CPU_C_power;
double L2_A_ABV = 24.23925;
double L2_C_ABV = 8.652627;
double L2_freq = 128;
double L2_volt = 0.905;
double L2_A_power;
double L2_C_power;
unsigned int L2_Hit_Cycles = 8;
double MEM_A_ABV = 42.82 + 91.73 + 33.32 + 3.4;
double MEM_C_ABV = 26.21 + 64.38 + 16.39 + 3.4;
double MEM_freq = 400;
double MEM_volt = 1.2;
double MEM_A_power;
double MEM_C_power;
unsigned int MEM_Access_Cycles = 50;
FILE *output_fp;
unsigned long long totalCycles = 0;
unsigned long long prev_L2_Hits = 0;
unsigned long long prev_memAccesses = 0;
double totalEnergy = 0.0;
/**
* @brief API to estimate the power
*
* @param Number of cycles spent in active state of CPU
* @param Number of cycles spent in fetching data from memory
* @param Number of L2 Hits Occured
* @param Number of L2 Misses Occured
*
* @return Amount of Energy spent in the period of time in uJ.
*/
double estimate_power(char *blockName,
unsigned long long execCycles,
unsigned long long memAccessCycles,
unsigned long long L2_Hits,
unsigned long long memAccesses)
{
unsigned long long startCycle;
unsigned long long currBlock_L2_Hits;
unsigned long long currBlock_memAccesses;
double energy = 0.0;
double power = 0.0;
startCycle = totalCycles;
totalCycles += execCycles + memAccessCycles;
// CPU
energy = CPU_A_power * execCycles / CPU_freq;
energy += CPU_C_power * memAccessCycles / CPU_freq;
// L2
currBlock_L2_Hits = prev_L2_Hits - L2_Hits;
energy += L2_A_power * currBlock_L2_Hits * L2_Hit_Cycles / L2_freq;
energy += L2_C_power * (((totalCycles - startCycle) / CPU_freq) - (currBlock_L2_Hits * L2_Hit_Cycles / L2_freq));
// MEM
currBlock_memAccesses = prev_memAccesses - memAccesses;
energy += MEM_A_power * currBlock_memAccesses * MEM_Access_Cycles / MEM_freq;
energy += MEM_C_power * (((totalCycles - startCycle) / CPU_freq) - (currBlock_memAccesses * MEM_Access_Cycles / MEM_freq));
totalEnergy += energy;
power = energy / ((totalCycles - startCycle) / CPU_freq);
fprintf(output_fp, "%s, %llu, %f, %llu, %llu, %lu, %lu\n",
blockName, startCycles, power, execCycles, memAccessCycles,
currBlock_L2_Hits, currBlock_memAccesses);
return power;
}
void power_estimator_init()
{
output_fp = fopen(POWER_OUTPUT_FILE, "w");
CPU_A_power = CPU_A_ABV * CPU_volt * CPU_volt * CPU_freq;
CPU_C_power = CPU_C_ABV * CPU_volt * CPU_volt * CPU_freq;
L2_A_power = L2_A_ABV * L2_volt * L2_volt * L2_freq;
L2_C_power = L2_C_ABV * L2_volt * L2_volt * L2_freq;
MEM_A_power = MEM_A_ABV * MEM_volt * MEM_volt * MEM_freq;
MEM_C_power = MEM_C_ABV * MEM_volt * MEM_volt * MEM_freq;
}
void power_estimator_fini()
{
fclose(fp);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment