Commit 679f6800 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Fixed many small bugs in cache_sim, should be perfect now

Signed-off-by: Gaurav Kukreja's avatarGaurav Kukreja <gaurav@gauravk.in>
parent 635059af
......@@ -11,6 +11,7 @@ CSIM_DIR = $(current_dir)
# Hardware Model to use
CACHESIM_HWMOD = cortexA5
#CACHESIM_HWMOD = generic
CACHESIM_SRC = $(CSIM_DIR)/src
CACHESIM_HEADERS = $(CSIM_DIR)/headers/
......
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "cacheSimHwMod.h"
......@@ -31,6 +32,7 @@ struct cacheConfig
unsigned int tagLenBits;
unsigned long indexMask;
unsigned int indexLenBits;
unsigned int subIndexLenBits;
// Features
unsigned int isWriteThrough;
......@@ -91,7 +93,6 @@ int log_base2(int val)
void initCacheParams ()
{
int subIndexLen = 0;
int i;
/*** L1 DCache *****************/
......@@ -103,7 +104,7 @@ void initCacheParams ()
L1DCacheConf.numLines = L1DCacheConf.cacheSizeBytes /
(L1DCacheConf.lineLenBytes * L1DCacheConf.numSets);
subIndexLen = log_base2(L1DCacheConf.lineLenBytes);
L1DCacheConf.subIndexLenBits = log_base2(L1DCacheConf.lineLenBytes);
L1DCacheConf.indexLenBits = log_base2(L1DCacheConf.numLines);
L1DCacheConf.indexMask = 0;
for (i = 0; i < L1DCacheConf.indexLenBits; i++)
......@@ -111,16 +112,16 @@ void initCacheParams ()
L1DCacheConf.indexMask = L1DCacheConf.indexMask << 1;
L1DCacheConf.indexMask |= 0x00000001;
}
L1DCacheConf.indexMask = L1DCacheConf.indexMask << subIndexLen;
L1DCacheConf.indexMask = L1DCacheConf.indexMask << L1DCacheConf.subIndexLenBits;
L1DCacheConf.tagLenBits = ADDRESS_LEN_BITS - L1DCacheConf.indexMask - subIndexLen;
L1DCacheConf.tagLenBits = ADDRESS_LEN_BITS - L1DCacheConf.indexLenBits - L1DCacheConf.subIndexLenBits;
L1DCacheConf.tagMask = 0;
for (i = 0; i < L1DCacheConf.tagLenBits; i++)
{
L1DCacheConf.tagMask = L1DCacheConf.tagMask << 1;
L1DCacheConf.tagMask |= 0x00000001;
}
L1DCacheConf.tagMask = L1DCacheConf.tagMask << (L1DCacheConf.indexLenBits + subIndexLen);
L1DCacheConf.tagMask = L1DCacheConf.tagMask << (L1DCacheConf.indexLenBits + L1DCacheConf.subIndexLenBits);
L1DCacheConf.isWriteThrough = 0;
......@@ -137,7 +138,7 @@ void initCacheParams ()
L1ICacheConf.numLines = L1ICacheConf.cacheSizeBytes /
(L1ICacheConf.lineLenBytes * L1ICacheConf.numSets);
subIndexLen = log_base2(L1ICacheConf.lineLenBytes);
L1ICacheConf.subIndexLenBits = log_base2(L1ICacheConf.lineLenBytes);
L1ICacheConf.indexLenBits = log_base2(L1ICacheConf.numLines);
L1ICacheConf.indexMask = 0;
for (i = 0; i < L1ICacheConf.indexLenBits; i++)
......@@ -145,16 +146,16 @@ void initCacheParams ()
L1ICacheConf.indexMask = L1ICacheConf.indexMask << 1;
L1ICacheConf.indexMask |= 0x00000001;
}
L1ICacheConf.indexMask = L1ICacheConf.indexMask << subIndexLen;
L1ICacheConf.indexMask = L1ICacheConf.indexMask << L1ICacheConf.subIndexLenBits;
L1ICacheConf.tagLenBits = ADDRESS_LEN_BITS - L1ICacheConf.indexMask - subIndexLen;
L1ICacheConf.tagLenBits = ADDRESS_LEN_BITS - L1ICacheConf.indexLenBits - L1ICacheConf.subIndexLenBits;
L1ICacheConf.tagMask = 0;
for (i = 0; i < L1ICacheConf.tagLenBits; i++)
{
L1ICacheConf.tagMask = L1ICacheConf.tagMask << 1;
L1ICacheConf.tagMask |= 0x00000001;
}
L1ICacheConf.tagMask = L1ICacheConf.tagMask << (L1ICacheConf.indexLenBits + subIndexLen);
L1ICacheConf.tagMask = L1ICacheConf.tagMask << (L1ICacheConf.indexLenBits + L1ICacheConf.subIndexLenBits);
L1ICacheConf.isWriteThrough = 0;
......@@ -166,12 +167,13 @@ void initCacheParams ()
L2CacheConf.lineLenBytes = 32;
L2CacheConf.cacheSizeBytes = 32 * 1024; // 32 KB
L2CacheConf.numSets = 2;
L2CacheConf.numSets = 4;
L2CacheConf.numLines = L2CacheConf.cacheSizeBytes /
(L2CacheConf.lineLenBytes * L2CacheConf.numSets);
subIndexLen = log_base2(L2CacheConf.lineLenBytes);
L2CacheConf.subIndexLenBits = log_base2(L2CacheConf.lineLenBytes);
// assert(5 == subIndexLen);
L2CacheConf.indexLenBits = log_base2(L2CacheConf.numLines);
L2CacheConf.indexMask = 0;
for (i = 0; i < L2CacheConf.indexLenBits; i++)
......@@ -179,16 +181,16 @@ void initCacheParams ()
L2CacheConf.indexMask = L2CacheConf.indexMask << 1;
L2CacheConf.indexMask |= 0x00000001;
}
L2CacheConf.indexMask = L2CacheConf.indexMask << subIndexLen;
L2CacheConf.indexMask = L2CacheConf.indexMask << L2CacheConf.subIndexLenBits;
L2CacheConf.tagLenBits = ADDRESS_LEN_BITS - L2CacheConf.indexMask - subIndexLen;
L2CacheConf.tagLenBits = ADDRESS_LEN_BITS - L2CacheConf.indexLenBits - L2CacheConf.subIndexLenBits;
L2CacheConf.tagMask = 0;
for (i = 0; i < L2CacheConf.tagLenBits; i++)
{
L2CacheConf.tagMask = L2CacheConf.tagMask << 1;
L2CacheConf.tagMask |= 0x00000001;
}
L2CacheConf.tagMask = L2CacheConf.tagMask << (L2CacheConf.indexLenBits + subIndexLen);
L2CacheConf.tagMask = L2CacheConf.tagMask << (L2CacheConf.indexLenBits + L2CacheConf.subIndexLenBits);
L2CacheConf.isWriteThrough = 0;
......@@ -247,43 +249,75 @@ unsigned long long cortexA5_simICache(unsigned long address,
unsigned int latency = 0;
unsigned long tag;
unsigned long index;
unsigned long _address;
int setIndex = 0;
int replaceIndex;
tag = getTagFromAddress(address, L1ICacheConf.tagLenBits, L1ICacheConf.tagMask);
index = getIndexFromAddress(address, L1ICacheConf.indexLenBits, L1ICacheConf.indexMask);
for (_address = address; _address <= address + nBytes; _address += 4)
{
tag = getTagFromAddress(_address, L1ICacheConf.tagLenBits, L1ICacheConf.tagMask);
index = getIndexFromAddress(_address, L1ICacheConf.subIndexLenBits, L1ICacheConf.indexMask);
replaceIndex = -1;
for (setIndex = 0; setIndex < L1ICacheConf.numSets; setIndex++)
{
if (L1ICache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L1ICache[setIndex][index].flags))
if (IS_CACHELINE_VALID(L1ICache[setIndex][index].flags))
{
if (L1ICache[setIndex][index].tag == tag)
{
latency += L1ICacheConf.hitLatency;
L1I_Hit_Read++;
return latency;
}
}
else
{
replaceIndex = setIndex;
}
}
// L1 Miss has occured!
L1I_Miss++;
latency += L1ICacheConf.missLatency;
tag = getTagFromAddress(address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(address, L2CacheConf.indexLenBits, L2CacheConf.indexMask);
// Data will be present for next access!
if (replaceIndex == -1)
replaceIndex = random() % L1ICacheConf.numSets;
L1ICache[replaceIndex][index].tag = tag;
SET_CACHELINE_VALID(L1ICache[replaceIndex][index].flags);
tag = getTagFromAddress(_address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(_address, L2CacheConf.subIndexLenBits, L2CacheConf.indexMask);
replaceIndex = -1;
for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{
if (L2Cache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L2Cache[setIndex][index].flags))
if (IS_CACHELINE_VALID(L2Cache[setIndex][index].flags))
{
if (L2Cache[setIndex][index].tag == tag)
{
latency += L2CacheConf.hitLatency;
L2_Hit_Read++;
return latency;
}
}
else
{
replaceIndex = setIndex;
}
}
// L2 Miss has occured!
L2_Miss++;
latency += L2CacheConf.missLatency;
// Data will be present for next access!
if (replaceIndex == -1)
replaceIndex = random() % L2CacheConf.numSets;
L2Cache[replaceIndex][index].tag = tag;
SET_CACHELINE_VALID(L2Cache[replaceIndex][index].flags);
latency += memReadLatency;
}
return latency;
}
......@@ -296,23 +330,27 @@ unsigned long long cortexA5_simDCache(unsigned long address,
unsigned long tag;
unsigned long index;
int setIndex = 0;
int replaceIndex;
if (isReadAccess == 0 && L1DCacheConf.isWriteThrough == 1) // Write Access
{
// Simply increment latency by time to write to memory
latency += memWriteLatency;
L1D_Hit_Writethrough++;
return latency;
}
// For writeback, there is no latency. We can safely take this assumption,
// as we are only using a Single Core System.
tag = getTagFromAddress(address, L1DCacheConf.tagLenBits, L1DCacheConf.tagMask);
index = getIndexFromAddress(address, L1DCacheConf.indexLenBits, L1DCacheConf.indexMask);
index = getIndexFromAddress(address, L1DCacheConf.subIndexLenBits, L1DCacheConf.indexMask);
replaceIndex = -1;
for (setIndex = 0; setIndex < L1DCacheConf.numSets; setIndex++)
{
if (L1DCache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L1DCache[setIndex][index].flags))
if (IS_CACHELINE_VALID(L1DCache[setIndex][index].flags))
{
if (L1DCache[setIndex][index].tag == tag)
{
latency += L1DCacheConf.hitLatency;
if (isReadAccess)
......@@ -322,17 +360,30 @@ unsigned long long cortexA5_simDCache(unsigned long address,
return latency;
}
}
else
{
replaceIndex = setIndex;
}
}
// L1 Miss has occured!
L1D_Miss++;
latency += L1DCacheConf.missLatency;
// Data will be present for next access!
if (replaceIndex == -1)
replaceIndex = random() % L1DCacheConf.numSets;
L1DCache[replaceIndex][index].tag = tag;
SET_CACHELINE_VALID(L1DCache[replaceIndex][index].flags);
tag = getTagFromAddress(address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(address, L2CacheConf.indexLenBits, L2CacheConf.indexMask);
index = getIndexFromAddress(address, L2CacheConf.subIndexLenBits, L2CacheConf.indexMask);
replaceIndex = -1;
for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{
if (L2Cache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L2Cache[setIndex][index].flags))
if (IS_CACHELINE_VALID(L2Cache[setIndex][index].flags))
{
if (L2Cache[setIndex][index].tag == tag)
{
latency += L2CacheConf.hitLatency;
if (isReadAccess)
......@@ -342,10 +393,22 @@ unsigned long long cortexA5_simDCache(unsigned long address,
return latency;
}
}
else
{
replaceIndex = setIndex;
}
}
// L2 Miss has occured!
L2_Miss++;
latency += L2CacheConf.missLatency;
// Data will be present for next access!
if (replaceIndex == -1)
replaceIndex = random() % L2CacheConf.numSets;
L2Cache[replaceIndex][index].tag = tag;
SET_CACHELINE_VALID(L2Cache[replaceIndex][index].flags);
latency += memReadLatency;
return latency;
}
......@@ -353,7 +416,9 @@ unsigned long long cortexA5_simDCache(unsigned long address,
void cortexA5_cacheSimInit()
{
// Allocate space for caches
printf("%s: %d\n", __func__, __LINE__);
initCacheParams();
printf("%s: %d\n", __func__, __LINE__);
L1DCache = (cacheLine_t **) alloc2D(L1DCacheConf.numSets,
L1DCacheConf.numLines, sizeof(cacheLine_t));
......@@ -362,15 +427,12 @@ void cortexA5_cacheSimInit()
L2Cache = (cacheLine_t **) alloc2D(L2CacheConf.numSets,
L2CacheConf.numLines, sizeof(cacheLine_t));
printf("%s: %d\n", __func__, __LINE__);
return;
}
void cortexA5_cacheSimFini()
{
free(L1DCache);
free(L1ICache);
free(L2Cache);
printf("Statistics : \n");
printf("\nL1 Data Cache\n");
......@@ -387,6 +449,10 @@ void cortexA5_cacheSimFini()
printf("\t Hit Writeback = %ld\n", L2_Hit_Writeback);
printf("\t Miss = %ld\n", L2_Miss);
free(L1DCache);
free(L1ICache);
free(L2Cache);
return;
}
......
......@@ -8,10 +8,10 @@
#define HERE printf("%s: %s: %d\n", __FILE__, __func__, __LINE__)
#define MAX_REPEATS 2
#define MAX_ACCESSES 524288
#define MAX_REPEATS 1
#define MAX_ACCESSES 128
#define START_ADD 0x12345678
#define START_ADD 0x0
int main(int argc, char **argv)
{
......@@ -20,7 +20,7 @@ int main(int argc, char **argv)
cacheSimInit();
for(unsigned long j = 0; j < MAX_REPEATS; j++)
for(unsigned long i = 0; i < MAX_ACCESSES; i++)
for(unsigned long i = 0; i < MAX_ACCESSES; i+=4)
{
simDCache(address + i, 1);
}
......
#include <stdio.h>
#define ADDRESS_LEN_BITS 32
int log_base2(int val)
{
int ret = 0;
while (val >>= 1) ++ret;
return ret;
}
inline unsigned long getTagFromAddress(unsigned long address,
unsigned int tagLengthBits, unsigned long tagMask)
{
return (address & tagMask) >> (ADDRESS_LEN_BITS - tagLengthBits);
}
inline unsigned long getIndexFromAddress(unsigned long address,
unsigned int offsetLengthBits, unsigned long indexMask)
{
return (address & indexMask) >> offsetLengthBits;
}
void main()
{
unsigned long address = 0x12345678;
unsigned long tag;
unsigned long index;
int i;
unsigned long indexMask;
indexMask = 0;
for (i = 0; i < 8; i++)
{
indexMask = indexMask << 1;
indexMask |= 0x00000001;
}
indexMask = indexMask << log_base2(16);
tag = getTagFromAddress(address, 20, 0xFFFFF000);
index = getIndexFromAddress(address, 4, indexMask);
printf("log_base2(16) = %d\n", log_base2(16));
printf("IndexMask = %lx\n", indexMask);
printf("Tag = %lx\n", tag);
printf("Index = %lx\n", index);
}
......@@ -11,6 +11,7 @@
#include "cacheSim.h"
unsigned long SP = 0x1234;
unsigned long long memAccessCycles = 0;
unsigned long long pipelineCycles = 0;
#include <stdio.h>
......@@ -45,9 +46,10 @@ void sieve_func() {
sieve_funcbb_2:
// # PRED: ENTRY [100.0%] (fallthru,exec)
SP = SP + 0x1e84a0;
memAccessCycles += simDCache(0x35c, 4); // PC Relative Load
memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
// Simulating I Cache for obj block 0
memAccessCycles += simICache(0x200, 40);
pipelineCycles += 24;
ivtmp_68 = 0;
// # SUCC: 3 [100.0%] (fallthru,exec)
......@@ -56,6 +58,7 @@ sieve_funcbb_3:
memAccessCycles += simDCache(results_addr + (4 * (+ivtmp_68)), 0);
// Simulating I Cache for obj block 1
memAccessCycles += simICache(0x228, 28);
pipelineCycles += 13;
*(unsigned int*)((uintptr_t)&results + (uintptr_t)ivtmp_68) = 0;
memAccessCycles += simDCache((SP + sieve_addr + (4 * (+ivtmp_68))), 0);
*(unsigned int*)((uintptr_t)&sieve + (uintptr_t)ivtmp_68) = 1;
......@@ -70,6 +73,7 @@ sieve_funcbb_17:
// # PRED: 3 [1.0%] (false,exec)
// Simulating I Cache for obj block 2
memAccessCycles += simICache(0x244, 52);
pipelineCycles += 21;
ivtmp_49 = 6;
ivtmp_58 = 4;
i_72 = 2;
......@@ -79,6 +83,7 @@ sieve_funcbb_4:
// # PRED: 7 [99.0%] (true,exec) 17 [100.0%] (fallthru)
// Simulating I Cache for obj block 3
memAccessCycles += simICache(0x278, 16);
pipelineCycles += 9;
D_2263 = (unsigned int) i_72;
memAccessCycles += simDCache((SP + sieve_addr + (4 * (+D_2263*4))), 1);
if (*(unsigned int*)((uintptr_t)&sieve + (uintptr_t)D_2263 * 4) != 0)
......@@ -91,6 +96,7 @@ sieve_funcbb_5:
// # PRED: 4 [50.0%] (true,exec)
// Simulating I Cache for obj block 4
memAccessCycles += simICache(0x288, 12);
pipelineCycles += 8;
j_76 = (int) ivtmp_58;
if (j_76 <= 499999)
goto sieve_funcbb_18;
......@@ -102,6 +108,7 @@ sieve_funcbb_18:
// # PRED: 5 [91.0%] (true,exec)
// Simulating I Cache for obj block 5
memAccessCycles += simICache(0x294, 4);
pipelineCycles += 8;
ivtmp_74 = ivtmp_49;
// # SUCC: 6 [100.0%] (fallthru)
......@@ -110,6 +117,7 @@ sieve_funcbb_6:
memAccessCycles += simDCache((SP + sieve_addr + (4 * (j_76))), 0);
// Simulating I Cache for obj block 6
memAccessCycles += simICache(0x298, 40);
pipelineCycles += 17;
sieve[j_76] = 0;
D_2252 = (unsigned int) j_76 + D_2263;
j_76 = (int) D_2252;
......@@ -124,6 +132,7 @@ sieve_funcbb_7:
// # PRED: 4 [50.0%] (false,exec) 6 [9.0%] (false,exec) 5 [9.0%] (false,exec)
// Simulating I Cache for obj block 7
memAccessCycles += simICache(0x2c0, 24);
pipelineCycles += 19;
i_72 = i_72 + 1;
ivtmp_58 = ivtmp_58 + 2;
ivtmp_49 = ivtmp_49 + 3;
......@@ -135,15 +144,17 @@ memAccessCycles += simICache(0x2c0, 24);
sieve_funcbb_8:
// # PRED: 7 [1.0%] (false,exec)
memAccessCycles += simDCache(0x35c, 4); // PC Relative Load
memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
// Simulating I Cache for obj block 8
memAccessCycles += simICache(0x2d8, 24);
pipelineCycles += 13;
j = 2;
i = 0;
// # SUCC: 9 [100.0%] (fallthru,exec)
sieve_funcbb_9:
// # PRED: 11 [99.0%] (true,exec) 8 [100.0%] (fallthru,exec)
pipelineCycles += 12;
D_2240 = (unsigned int) j;
memAccessCycles += simDCache((SP + sieve_addr + (4 * (+D_2240*4))), 1);
if (*(unsigned int*)((uintptr_t)&sieve + (uintptr_t)D_2240 * 4) != 0)
......@@ -172,10 +183,11 @@ memAccessCycles += simICache(0x2f0, 28);
sieve_funcbb_12:
// # PRED: 11 [1.0%] (false,exec)
memAccessCycles += simDCache(0x35c, 4); // PC Relative Load
memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
memAccessCycles += simDCache(results_addr + (4 * (0)), 1);
// Simulating I Cache for obj block 10
memAccessCycles += simICache(0x30c, 16);
pipelineCycles += 9;
if (results[0] == 0)
goto sieve_funcbb_16;
else
......@@ -186,6 +198,7 @@ sieve_funcbb_13:
// # PRED: 12 [95.5%] (false,exec)
// Simulating I Cache for obj block 11
memAccessCycles += simICache(0x31c, 12);
pipelineCycles += 10;
ivtmp_36 = (uintptr_t)&results;
D_2230 = ivtmp_36 + 1999996;
// # SUCC: 14 [100.0%] (fallthru,exec)
......@@ -195,7 +208,7 @@ sieve_funcbb_14:
// Simulating I Cache for obj block 12
memAccessCycles += simICache(0x328, 12);
// TODO: UnmappedLS: Load GlobalVar results at line 224
memAccessCycles += simDCache(results_addr + ivtmp_36 - (D_2230 - 1999996), 1);
pipelineCycles += 8;
if (*(unsigned int*)((uintptr_t)ivtmp_36 + 4) == 0)
goto sieve_funcbb_16;
else
......@@ -206,6 +219,7 @@ sieve_funcbb_15:
// # PRED: 14 [95.5%] (false,exec)
// Simulating I Cache for obj block 13
memAccessCycles += simICache(0x334, 12);
pipelineCycles += 9;
ivtmp_36 = ivtmp_36 + 4;
if (ivtmp_36 != D_2230)
goto sieve_funcbb_14;
......@@ -215,10 +229,11 @@ memAccessCycles += simICache(0x334, 12);
sieve_funcbb_16:
// # PRED: 14 [4.5%] (true,exec) 15 [1.1%] (false,exec) 12 [4.5%] (true,exec)
memAccessCycles += simDCache(0x360, 4); // PC Relative Load
memAccessCycles += simDCache(0x360, 1); // PC Relative Load
memAccessCycles += simDCache(m_addr, 0);
// Simulating I Cache for obj block 14
memAccessCycles += simICache(0x340, 28);
pipelineCycles += 20;
m.v = 0;
return;
// # SUCC: EXIT [100.0%]
......@@ -234,8 +249,10 @@ cacheSimInit();
SP = SP + 0x8;
// Simulating I Cache for obj block 0
memAccessCycles += simICache(0x364, 20);
pipelineCycles += 12;
sieve_func ();
printf("memAccessCycles = \%llu\n", memAccessCycles);
printf("pipelineCycles = \%llu\n", pipelineCycles);
cacheSimFini();
return 0;
// # SUCC: EXIT [100.0%]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment