Commit 5fe25a49 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Updated Cache Sim Implementation, mostly complete

Signed-off-by: Gaurav Kukreja's avatarGaurav Kukreja <gaurav@gauravk.in>
parent 4c4e8d5e
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
struct csim_result_t struct csim_result_t
{ {
unsigned long long cyclesConsumed; unsigned long long latency;
unsigned long long L1Hits; unsigned long long L1Hits;
unsigned long long L2Hits; unsigned long long L2Hits;
unsigned long long L2Misses; unsigned long long L2Misses;
......
...@@ -66,9 +66,9 @@ cacheLine_t **L1DCache; ...@@ -66,9 +66,9 @@ cacheLine_t **L1DCache;
cacheLine_t **L1ICache; cacheLine_t **L1ICache;
cacheLine_t **L2Cache; cacheLine_t **L2Cache;
unsigned int memWriteLatency = 80; unsigned int memWriteLatency = 55;
unsigned int memReadLatency = 80; unsigned int memReadLatency = 55;
unsigned int memReadPrefetchLatency = 15; unsigned int memReadPrefetchLatency = 0;
unsigned long L1D_Hit_Read = 0; unsigned long L1D_Hit_Read = 0;
unsigned long L1D_Hit_Writeback = 0; unsigned long L1D_Hit_Writeback = 0;
...@@ -219,13 +219,13 @@ void initCacheParams () ...@@ -219,13 +219,13 @@ void initCacheParams ()
L1ICacheConf.isWriteThrough = 0; L1ICacheConf.isWriteThrough = 0;
L1ICacheConf.hitLatency = 1; L1ICacheConf.hitLatency = 1;
L1ICacheConf.missLatency = 1; L1ICacheConf.missLatency = 0;
/*** L2 Cache *****************/ /*** L2 Cache *****************/
L2CacheConf.lineLenBytes = 32; L2CacheConf.lineLenBytes = 32;
L2CacheConf.cacheSizeBytes = 512 * 1024; // 32 KB L2CacheConf.cacheSizeBytes = 256 * 1024; // 256 KB
L2CacheConf.numSets = 16; L2CacheConf.numSets = 16;
L2CacheConf.numLines = L2CacheConf.cacheSizeBytes / L2CacheConf.numLines = L2CacheConf.cacheSizeBytes /
...@@ -253,16 +253,16 @@ void initCacheParams () ...@@ -253,16 +253,16 @@ void initCacheParams ()
L2CacheConf.isWriteThrough = 0; L2CacheConf.isWriteThrough = 0;
L2CacheConf.hitLatency = 16; L2CacheConf.hitLatency = 14;
L2CacheConf.missLatency = 16; L2CacheConf.missLatency = 14;
} }
/** /**
* Allocates a 2 dimensional array. To be used to allocate space for cache lines * Allocates a 2 dimensional array. To be used to allocate space for cache lines
* *
* @param rows number of sets * @param rows number of cache lines
* @param cols number of cache lines * @param cols number of sets
* @param size size of the data structure to be stored * @param size size of the data structure to be stored
* *
* @return pointer to array of pointers pointing to rows of data. * @return pointer to array of pointers pointing to rows of data.
...@@ -311,7 +311,7 @@ unsigned long long cortexA5_simICache(unsigned long address, ...@@ -311,7 +311,7 @@ unsigned long long cortexA5_simICache(unsigned long address,
unsigned long index; unsigned long index;
unsigned long _address; unsigned long _address;
int setIndex = 0; int setIndex = 0;
int replaceIndex; int invalidSetIndex;
int hit = 0; int hit = 0;
unsigned long address_32; unsigned long address_32;
...@@ -321,8 +321,8 @@ unsigned long long cortexA5_simICache(unsigned long address, ...@@ -321,8 +321,8 @@ unsigned long long cortexA5_simICache(unsigned long address,
tag = getTagFromAddress(_address, L1ICacheConf.tagLenBits, L1ICacheConf.tagMask); tag = getTagFromAddress(_address, L1ICacheConf.tagLenBits, L1ICacheConf.tagMask);
index = getIndexFromAddress(_address, L1ICacheConf.subIndexLenBits, L1ICacheConf.indexMask); index = getIndexFromAddress(_address, L1ICacheConf.subIndexLenBits, L1ICacheConf.indexMask);
replaceIndex = -1;
hit = 0; hit = 0;
invalidSetIndex = -1;
for (setIndex = 0; setIndex < L1ICacheConf.numSets; setIndex++) for (setIndex = 0; setIndex < L1ICacheConf.numSets; setIndex++)
{ {
if (IS_CACHELINE_VALID(L1ICache[setIndex][index].flags)) if (IS_CACHELINE_VALID(L1ICache[setIndex][index].flags))
...@@ -331,13 +331,15 @@ unsigned long long cortexA5_simICache(unsigned long address, ...@@ -331,13 +331,15 @@ unsigned long long cortexA5_simICache(unsigned long address,
{ {
latency += L1ICacheConf.hitLatency; latency += L1ICacheConf.hitLatency;
L1I_Hit_Read++; L1I_Hit_Read++;
result->L1Hits++;
result->latency += L1ICacheConf.hitLatency;
hit = 1; hit = 1;
break; break;
} }
} }
else else
{ {
replaceIndex = setIndex; invalidSetIndex = setIndex;
} }
} }
...@@ -347,17 +349,67 @@ unsigned long long cortexA5_simICache(unsigned long address, ...@@ -347,17 +349,67 @@ unsigned long long cortexA5_simICache(unsigned long address,
// L1 Miss has occured! // L1 Miss has occured!
L1I_Miss++; L1I_Miss++;
latency += L1ICacheConf.missLatency; latency += L1ICacheConf.missLatency;
result->latency += L1ICacheConf.missLatency;
// Data will be present for next access! // Data will be present for next access!
if (replaceIndex == -1) if (invalidSetIndex == -1)
replaceIndex = random() % L1ICacheConf.numSets; {
L1ICache[replaceIndex][index].tag = tag; invalidSetIndex = random() % L1ICacheConf.numSets;
SET_CACHELINE_VALID(L1ICache[replaceIndex][index].flags);
#define EXCLUSIVE_L2_CACHE
#ifdef EXCLUSIVE_L2_CACHE
{
unsigned long L1ReplaceAdd;
/**
* Any address is contained in either L1 or L2 Cache. Evicted
* data from L1 Cache, is stored to L2 Cache.
*/
// Extract the replaced address
L1ReplaceAdd = L1ICache[index][invalidSetIndex].tag <<
(ADDRESS_LEN_BITS - L1ICacheConf.tagLenBits);
L1ReplaceAdd += index << L1ICacheConf.subIndexLenBits;
// Store replaced address in L2.
{
unsigned long l2Tag;
unsigned long l2Index;
unsigned int l2InvalidSetIndex;
l2Tag = getTagFromAddress(L1ReplaceAdd, L2CacheConf.tagLenBits,
L2CacheConf.tagMask);
l2Index = getIndexFromAddress(L1ReplaceAdd, L2CacheConf.subIndexLenBits,
L2CacheConf.indexMask);
l2InvalidSetIndex = -1;
for(setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{
if(!IS_CACHELINE_VALID(L2Cache[l2Index][setIndex].flags))
{
l2InvalidSetIndex = setIndex;
}
}
if(l2InvalidSetIndex == -1)
{
l2InvalidSetIndex = random() % L2CacheConf.numSets;
}
L2Cache[l2Index][l2InvalidSetIndex].tag = l2Tag;
// Add Latency to write to L2
// TODO: May not be needed, due to Store Buffering !!!!
// latency += L2CacheConf.hitLatency;
// result->latency += L2CacheConf.hitLatency;
}
}
#endif
}
L1ICache[invalidSetIndex][index].tag = tag;
SET_CACHELINE_VALID(L1ICache[invalidSetIndex][index].flags);
tag = getTagFromAddress(_address, L2CacheConf.tagLenBits, L2CacheConf.tagMask); tag = getTagFromAddress(_address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(_address, L2CacheConf.subIndexLenBits, L2CacheConf.indexMask); index = getIndexFromAddress(_address, L2CacheConf.subIndexLenBits, L2CacheConf.indexMask);
replaceIndex = -1;
hit = 0; hit = 0;
for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++) for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{ {
...@@ -367,32 +419,26 @@ unsigned long long cortexA5_simICache(unsigned long address, ...@@ -367,32 +419,26 @@ unsigned long long cortexA5_simICache(unsigned long address,
{ {
latency += L2CacheConf.hitLatency; latency += L2CacheConf.hitLatency;
L2_Hit_Read++; L2_Hit_Read++;
result->L2Hits++;
result->latency += L2CacheConf.hitLatency;
hit = 1; hit = 1;
break; break;
} }
} }
else
{
replaceIndex = setIndex;
}
} }
if (hit) if(hit)
continue; continue;
// L2 Miss has occured! // L2 Miss has occured!
L1I_Miss--; L1I_Miss--;
L2I_Miss++; L2I_Miss++;
result->L2Misses++;
latency += L2CacheConf.missLatency; latency += L2CacheConf.missLatency;
// Data will be present for next access! result->latency += L2CacheConf.missLatency;
if (replaceIndex == -1)
replaceIndex = random() % L2CacheConf.numSets;
L2Cache[replaceIndex][index].tag = tag;
SET_CACHELINE_VALID(L2Cache[replaceIndex][index].flags);
latency += memReadLatency; latency += memReadLatency;
result->latency += memReadLatency;
} }
return latency; return latency;
} }
...@@ -404,80 +450,151 @@ unsigned long long cortexA5_simDCache(unsigned long address, ...@@ -404,80 +450,151 @@ unsigned long long cortexA5_simDCache(unsigned long address,
unsigned long tag; unsigned long tag;
unsigned long index; unsigned long index;
int setIndex = 0; int setIndex = 0;
int replaceIndex; int invalidSetIndex;
int i; unsigned long L1ReplaceAdd = 0;
unsigned int L1ReplaceAddFlags = 0;
if (isReadAccess == 0 && L1DCacheConf.isWriteThrough == 1) // Write Access
{ // TODO : Implement the mechanism for Write Through Cache.
// Simply increment latency by time to write to memory /**
latency += memWriteLatency; * For write through, the latency will be memWriteLatency. Allocate the data
L1D_Hit_Writethrough++; * in L1 Cache.
return latency; *
} * For write back cache, we need to check if data is already in cache. We do
// For writeback, there is no latency. We can safely take this assumption, * that later.
// as we are only using a Single Core System. */
tag = getTagFromAddress(address, L1DCacheConf.tagLenBits, L1DCacheConf.tagMask); tag = getTagFromAddress(address, L1DCacheConf.tagLenBits, L1DCacheConf.tagMask);
index = getIndexFromAddress(address, L1DCacheConf.subIndexLenBits, L1DCacheConf.indexMask); index = getIndexFromAddress(address, L1DCacheConf.subIndexLenBits, L1DCacheConf.indexMask);
replaceIndex = -1; invalidSetIndex = -1;
for (setIndex = 0; setIndex < L1DCacheConf.numSets; setIndex++) for (setIndex = 0; setIndex < L1DCacheConf.numSets; setIndex++)
{ {
if (IS_CACHELINE_VALID(L1DCache[setIndex][index].flags)) if (IS_CACHELINE_VALID(L1DCache[index][setIndex].flags))
{ {
if (L1DCache[setIndex][index].tag == tag) if (L1DCache[index][setIndex].tag == tag)
{ {
// Add the latency
latency += L1DCacheConf.hitLatency; latency += L1DCacheConf.hitLatency;
if (isReadAccess) if (isReadAccess)
{ {
L1D_Hit_Read++; L1D_Hit_Read++;
result->L1Hits++; result->L1Hits++;
} }
else else // Write Access
{
L1D_Hit_Writeback++; L1D_Hit_Writeback++;
SET_CACHELINE_DIRTY(L1DCache[index][setIndex].flags);
}
// If data was found in L1 Cache, we return from here!!
result->latency += latency;
return latency; return latency;
} }
} }
else else
{ {
replaceIndex = setIndex; invalidSetIndex = setIndex;
} }
} }
// L1 Miss has occured! // L1 Miss has occured!
L1D_Miss++; L1D_Miss++;
latency += L1DCacheConf.missLatency; latency += L1DCacheConf.missLatency;
// Data will be present for next access! // Allocate Space for Data which will be present next time!!
if (replaceIndex == -1) if (invalidSetIndex == -1)
replaceIndex = random() % L1DCacheConf.numSets; {
L1DCache[replaceIndex][index].tag = tag; // Replace a random cache line
SET_CACHELINE_VALID(L1DCache[replaceIndex][index].flags); invalidSetIndex = random() % L1DCacheConf.numSets;
#define EXCLUSIVE_L2_CACHE
#ifdef EXCLUSIVE_L2_CACHE
/**
* Any address is contained in either L1 or L2 Cache. Eviceted data
* from L1 Cache, is stored to L2 Cache.
*/
// Extract the replaced address
L1ReplaceAdd = L1DCache[index][invalidSetIndex].tag <<
(ADDRESS_LEN_BITS - L1DCacheConf.tagLenBits);
L1ReplaceAdd += index << L1DCacheConf.subIndexLenBits;
L1ReplaceAddFlags = L1DCache[index][invalidSetIndex].flags;
// Store replaced address in L2.
{
unsigned long l2Tag;
unsigned long l2Index;
unsigned int l2InvalidSetIndex;
l2Tag = getTagFromAddress(L1ReplaceAdd, L2CacheConf.tagLenBits,
L2CacheConf.tagMask);
l2Index = getIndexFromAddress(L1ReplaceAdd, L2CacheConf.subIndexLenBits,
L2CacheConf.indexMask);
l2InvalidSetIndex = -1;
for(setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{
if(!IS_CACHELINE_VALID(L2Cache[l2Index][setIndex].flags))
{
l2InvalidSetIndex = setIndex;
}
}
if(l2InvalidSetIndex == -1)
{
l2InvalidSetIndex = random() % L2CacheConf.numSets;
if(IS_CACHELINE_DIRTY(L2Cache[l2Index][l2InvalidSetIndex].flags))
{
// Write Back to memory!
latency += memWriteLatency;
L2_Hit_Writeback++;
}
}
L2Cache[l2Index][l2InvalidSetIndex].tag = l2Tag;
L2Cache[l2Index][l2InvalidSetIndex].flags = L1ReplaceAddFlags;
// Add Latency to write to L2
// TODO: May not be needed, due to Store Buffering !!!!
// latency += L2CacheConf.hitLatency;
}
#endif
}
// Make new data available
L1DCache[index][invalidSetIndex].tag = tag;
SET_CACHELINE_VALID(L1DCache[index][invalidSetIndex].flags);
if(!isReadAccess)
SET_CACHELINE_DIRTY(L1DCache[index][invalidSetIndex].flags);
else
SET_CACHELINE_CLEAN(L1DCache[index][invalidSetIndex].flags);
// Lookup Data in L2 Cache
tag = getTagFromAddress(address, L2CacheConf.tagLenBits, L2CacheConf.tagMask); tag = getTagFromAddress(address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(address, L2CacheConf.subIndexLenBits, L2CacheConf.indexMask); index = getIndexFromAddress(address, L2CacheConf.subIndexLenBits, L2CacheConf.indexMask);
replaceIndex = -1;
for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++) for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{ {
if (IS_CACHELINE_VALID(L2Cache[setIndex][index].flags)) if (IS_CACHELINE_VALID(L2Cache[index][setIndex].flags))
{ {
if (L2Cache[setIndex][index].tag == tag) if (L2Cache[index][setIndex].tag == tag)
{ {
/**
* If data is found in L2, just mark the line as invalid,
* to mark that the data has been moved to L1. The data was
* already made available in L1.
*
* Add the latency.
*/
latency += L2CacheConf.hitLatency; latency += L2CacheConf.hitLatency;
if (isReadAccess) SET_CACHELINE_INVALID(L2Cache[index][setIndex].flags);
{ SET_CACHELINE_CLEAN(L2Cache[index][setIndex].flags);
L2_Hit_Read++; L2_Hit_Read++;
result->L2Hits++; result->L2Hits++;
} result->latency += latency;
else
L2_Hit_Writeback++;
return latency; return latency;
} }
} }
else
{
replaceIndex = setIndex;
}
} }
// L2 Miss has occured! // L2 Miss has occured!
...@@ -486,37 +603,59 @@ unsigned long long cortexA5_simDCache(unsigned long address, ...@@ -486,37 +603,59 @@ unsigned long long cortexA5_simDCache(unsigned long address,
result->L2Misses++; result->L2Misses++;
latency += L2CacheConf.missLatency; latency += L2CacheConf.missLatency;
// Data will be present for next access! /**
if (replaceIndex == -1) * Data will directly be loaded to L1 Cache. Nothing to do, if L2 Miss.
replaceIndex = random() % L2CacheConf.numSets; */
L2Cache[replaceIndex][index].tag = tag;
SET_CACHELINE_VALID(L2Cache[replaceIndex][index].flags);
if(isReadAccess) #define DATA_PREFETCH
#ifdef DATA_PREFETCH
/**
* Checking for data prefetch at the end! Probably, need to do this in the
* beginning.
*/
if(1)
{ {
int i;
prevAccess_t *access = prevAccessList_tail; prevAccess_t *access = prevAccessList_tail;
for (i = 0; i < prefetch_table_entries && access != NULL; i++) for (i = 0; i < prefetch_table_entries && access != NULL; i++)
{ {
if (address == access->address + L2CacheConf.lineLenBytes) if (address == access->address + L2CacheConf.lineLenBytes)
{ {
//printf("0x%lx - 0x%lx\n", access->address, address);
if (access->sequentialAccess > 15) if (access->sequentialAccess > 15)
{ {
latency += memReadPrefetchLatency; /**
* Data would have been prefetched !!
* Important:
* 1. Data has already been loaded into L1 earlier.
* 2. Latencies added if here, are L1 Miss and L2 Miss.
* These latencies need not be added if prefetched!!
* 3. Incremement L1 Hit, and count for prefetches!
* 4. Decrement L2 Miss count.
*/
latency = memReadPrefetchLatency;
result->prefetches++; result->prefetches++;
result->L1Hits++; result->L1Hits++;
result->L2Misses--; result->L2Misses--;
} }
insertAccess(&prevAccessList_head, &prevAccessList_tail, address, access->sequentialAccess+1); insertAccess(&prevAccessList_head, &prevAccessList_tail, address, access->sequentialAccess+1);
result->latency += latency;
return latency; return latency;
} }
access = access->prev; access = access->prev;
} }
insertAccess(&prevAccessList_head, &prevAccessList_tail, address, 0); insertAccess(&prevAccessList_head, &prevAccessList_tail, address, 0);
} }
#endif
/**
* Data must be fetched from the memory !!!
* Add the memReadLatency only!
*/
latency += memReadLatency; latency += memReadLatency;
result->latency += latency;
return latency; return latency;
} }
...@@ -529,14 +668,14 @@ void cortexA5_cacheSimInit(struct csim_result_t *result) ...@@ -529,14 +668,14 @@ void cortexA5_cacheSimInit(struct csim_result_t *result)
result->L2Hits = 0; result->L2Hits = 0;
result->L2Misses = 0; result->L2Misses = 0;
result->prefetches = 0; result->prefetches = 0;
result->cyclesConsumed = 0; result->latency = 0;
L1DCache = (cacheLine_t **) alloc2D(L1DCacheConf.numSets, L1DCache = (cacheLine_t **) alloc2D(L1DCacheConf.numLines,
L1DCacheConf.numLines, sizeof(cacheLine_t)); L1DCacheConf.numSets, sizeof(cacheLine_t));
L1ICache = (cacheLine_t **) alloc2D(L1ICacheConf.numSets, L1ICache = (cacheLine_t **) alloc2D(L1ICacheConf.numLines,
L1ICacheConf.numLines, sizeof(cacheLine_t)); L1ICacheConf.numSets, sizeof(cacheLine_t));
L2Cache = (cacheLine_t **) alloc2D(L2CacheConf.numSets, L2Cache = (cacheLine_t **) alloc2D(L2CacheConf.numLines,
L2CacheConf.numLines, sizeof(cacheLine_t)); L2CacheConf.numSets, sizeof(cacheLine_t));
prevAccessList_head = NULL; prevAccessList_head = NULL;
prevAccessList_tail = NULL; prevAccessList_tail = NULL;
...@@ -553,7 +692,7 @@ void cortexA5_cacheSimFini(struct csim_result_t *result) ...@@ -553,7 +692,7 @@ void cortexA5_cacheSimFini(struct csim_result_t *result)
printf ("Total L2 Hits = %llu\n", result->L2Hits); printf ("Total L2 Hits = %llu\n", result->L2Hits);
printf ("Total L2 Misses = %llu\n", result->L2Misses); printf ("Total L2 Misses = %llu\n", result->L2Misses);
printf ("Total Prefetches = %llu\n", result->prefetches); printf ("Total Prefetches = %llu\n", result->prefetches);
printf ("Mem Access Cycles = %llu\n", result->cyclesConsumed); printf ("Mem Access Cycles = %llu\n", result->latency);
printf("\nL1 Data Cache\n"); printf("\nL1 Data Cache\n");
printf("\t Hit Read = %ld\n", L1D_Hit_Read); printf("\t Hit Read = %ld\n", L1D_Hit_Read);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment