Commit 635059af authored by Gaurav Kukreja's avatar Gaurav Kukreja

Combined commit for 2 major changes

 * Added pipeline_sim for simulating pipeline, not completely tested
 * Added Cache HW Mod for Cortex A5.
Signed-off-by: Gaurav Kukreja's avatarGaurav Kukreja <gaurav@gauravk.in>
parent 70b806db
......@@ -10,7 +10,7 @@ current_dir := $(patsubst %/,%,$(dir $(mkfile_path)))
CSIM_DIR = $(current_dir)
# Hardware Model to use
CACHESIM_HWMOD = generic
CACHESIM_HWMOD = cortexA5
CACHESIM_SRC = $(CSIM_DIR)/src
CACHESIM_HEADERS = $(CSIM_DIR)/headers/
......
......@@ -15,6 +15,11 @@ ifeq ($(CACHESIM_HWMOD),generic)
OBJECTS += genericHwMod.o
endif
ifeq ($(CACHESIM_HWMOD),cortexA5)
SOURCES += cortexA5HwMod.c
OBJECTS += cortexA5HwMod.o
endif
all: cacheSim
cacheSim: $(SOURCES)
......
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cacheSimHwMod.h"
#define CACHELINE_VALID_BIT (1 << 0)
#define IS_CACHELINE_VALID(flags) (flags & CACHELINE_VALID_BIT)
#define SET_CACHELINE_VALID(flags) (flags |= CACHELINE_VALID_BIT)
#define SET_CACHELINE_INVALID(flags) (flags &= ~CACHELINE_VALID_BIT)
#define CACHELINE_DIRTY_BIT (1 << 1)
#define IS_CACHELINE_DIRTY(flags) (flags & CACHELINE_DIRTY_BIT)
#define SET_CACHELINE_DIRTY(flags) (flags |= CACHELINE_DIRTY_BIT)
#define SET_CACHELINE_CLEAN(flags) (flags &= ~CACHELINE_DIRTY_BIT)
#define ADDRESS_LEN_BITS 32
/**** DATA STRUCTURES *********************************************************/
struct cacheConfig
{
// Size
unsigned int lineLenBytes;
unsigned int cacheSizeBytes;
unsigned int numSets;
// Derived
unsigned int numLines;
unsigned long tagMask;
unsigned int tagLenBits;
unsigned long indexMask;
unsigned int indexLenBits;
// Features
unsigned int isWriteThrough;
// Latencies
// unsigned int hitLat;
// unsigned int missLat;
unsigned int hitLatency;
unsigned int missLatency;
};
typedef struct cacheConfig cacheConfig_t;
/**
* Stores all data related to a cache line.
*/
struct cacheLine
{
unsigned int flags;
unsigned long tag;
};
typedef struct cacheLine cacheLine_t;
/**** GLOBAL VARIABLES ********************************************************/
cacheConfig_t L1DCacheConf;
cacheConfig_t L1ICacheConf;
cacheConfig_t L2CacheConf;
cacheLine_t **L1DCache;
cacheLine_t **L1ICache;
cacheLine_t **L2Cache;
unsigned int memWriteLatency = 100;
unsigned int memReadLatency = 100;
unsigned long L1D_Hit_Read = 0;
unsigned long L1D_Hit_Writeback = 0;
unsigned long L1D_Hit_Writethrough = 0;
unsigned long L1D_Miss = 0;
unsigned long L1I_Hit_Read = 0;
unsigned long L1I_Hit_Writeback = 0;
unsigned long L1I_Hit_Writethrough = 0;
unsigned long L1I_Miss = 0;
unsigned long L2_Hit_Read = 0;
unsigned long L2_Hit_Writeback = 0;
unsigned long L2_Hit_Writethrough = 0;
unsigned long L2_Miss = 0;
/**** LOCAL FUNCTIONS *********************************************************/
int log_base2(int val)
{
int ret = 0;
while (val >>= 1) ++ret;
return ret;
}
void initCacheParams ()
{
int subIndexLen = 0;
int i;
/*** L1 DCache *****************/
L1DCacheConf.lineLenBytes = 32;
L1DCacheConf.cacheSizeBytes = 4 * 1024; // 4 KB
L1DCacheConf.numSets = 4;
L1DCacheConf.numLines = L1DCacheConf.cacheSizeBytes /
(L1DCacheConf.lineLenBytes * L1DCacheConf.numSets);
subIndexLen = log_base2(L1DCacheConf.lineLenBytes);
L1DCacheConf.indexLenBits = log_base2(L1DCacheConf.numLines);
L1DCacheConf.indexMask = 0;
for (i = 0; i < L1DCacheConf.indexLenBits; i++)
{
L1DCacheConf.indexMask = L1DCacheConf.indexMask << 1;
L1DCacheConf.indexMask |= 0x00000001;
}
L1DCacheConf.indexMask = L1DCacheConf.indexMask << subIndexLen;
L1DCacheConf.tagLenBits = ADDRESS_LEN_BITS - L1DCacheConf.indexMask - subIndexLen;
L1DCacheConf.tagMask = 0;
for (i = 0; i < L1DCacheConf.tagLenBits; i++)
{
L1DCacheConf.tagMask = L1DCacheConf.tagMask << 1;
L1DCacheConf.tagMask |= 0x00000001;
}
L1DCacheConf.tagMask = L1DCacheConf.tagMask << (L1DCacheConf.indexLenBits + subIndexLen);
L1DCacheConf.isWriteThrough = 0;
L1DCacheConf.hitLatency = 2;
L1DCacheConf.missLatency = 2;
/*** L1 ICache *****************/
L1ICacheConf.lineLenBytes = 32;
L1ICacheConf.cacheSizeBytes = 4 * 1024; // 4 KB
L1ICacheConf.numSets = 2;
L1ICacheConf.numLines = L1ICacheConf.cacheSizeBytes /
(L1ICacheConf.lineLenBytes * L1ICacheConf.numSets);
subIndexLen = log_base2(L1ICacheConf.lineLenBytes);
L1ICacheConf.indexLenBits = log_base2(L1ICacheConf.numLines);
L1ICacheConf.indexMask = 0;
for (i = 0; i < L1ICacheConf.indexLenBits; i++)
{
L1ICacheConf.indexMask = L1ICacheConf.indexMask << 1;
L1ICacheConf.indexMask |= 0x00000001;
}
L1ICacheConf.indexMask = L1ICacheConf.indexMask << subIndexLen;
L1ICacheConf.tagLenBits = ADDRESS_LEN_BITS - L1ICacheConf.indexMask - subIndexLen;
L1ICacheConf.tagMask = 0;
for (i = 0; i < L1ICacheConf.tagLenBits; i++)
{
L1ICacheConf.tagMask = L1ICacheConf.tagMask << 1;
L1ICacheConf.tagMask |= 0x00000001;
}
L1ICacheConf.tagMask = L1ICacheConf.tagMask << (L1ICacheConf.indexLenBits + subIndexLen);
L1ICacheConf.isWriteThrough = 0;
L1ICacheConf.hitLatency = 2;
L1ICacheConf.missLatency = 2;
/*** L2 Cache *****************/
L2CacheConf.lineLenBytes = 32;
L2CacheConf.cacheSizeBytes = 32 * 1024; // 32 KB
L2CacheConf.numSets = 2;
L2CacheConf.numLines = L2CacheConf.cacheSizeBytes /
(L2CacheConf.lineLenBytes * L2CacheConf.numSets);
subIndexLen = log_base2(L2CacheConf.lineLenBytes);
L2CacheConf.indexLenBits = log_base2(L2CacheConf.numLines);
L2CacheConf.indexMask = 0;
for (i = 0; i < L2CacheConf.indexLenBits; i++)
{
L2CacheConf.indexMask = L2CacheConf.indexMask << 1;
L2CacheConf.indexMask |= 0x00000001;
}
L2CacheConf.indexMask = L2CacheConf.indexMask << subIndexLen;
L2CacheConf.tagLenBits = ADDRESS_LEN_BITS - L2CacheConf.indexMask - subIndexLen;
L2CacheConf.tagMask = 0;
for (i = 0; i < L2CacheConf.tagLenBits; i++)
{
L2CacheConf.tagMask = L2CacheConf.tagMask << 1;
L2CacheConf.tagMask |= 0x00000001;
}
L2CacheConf.tagMask = L2CacheConf.tagMask << (L2CacheConf.indexLenBits + subIndexLen);
L2CacheConf.isWriteThrough = 0;
L2CacheConf.hitLatency = 14;
L2CacheConf.missLatency = 14;
}
/**
* Allocates a 2 dimensional array. To be used to allocate space for cache lines
*
* @param rows number of sets
* @param cols number of cache lines
* @param size size of the data structure to be stored
*
* @return pointer to array of pointers pointing to rows of data.
*/
void** alloc2D(unsigned int rows, unsigned int cols, size_t size)
{
void** ret;
void *data;
int i;
size_t arrSize = (rows * sizeof(void*)) + (rows * cols * size);
ret = malloc(arrSize);
memset(ret, 0, arrSize);
data = (void*) (ret + rows);
for(i=0; i<rows; i++)
{
ret[i] = data + i * cols * size;
}
return ret;
}
inline unsigned long getTagFromAddress(unsigned long address,
unsigned int tagLengthBits, unsigned long tagMask)
{
return (address & tagMask) >> (ADDRESS_LEN_BITS - tagLengthBits);
}
inline unsigned long getIndexFromAddress(unsigned long address,
unsigned int offsetLengthBits, unsigned long indexMask)
{
return (address & indexMask) >> offsetLengthBits;
}
/**** HWMOD FUNCTIONS *********************************************************/
unsigned long long cortexA5_simICache(unsigned long address,
unsigned int nBytes)
{
unsigned int latency = 0;
unsigned long tag;
unsigned long index;
int setIndex = 0;
tag = getTagFromAddress(address, L1ICacheConf.tagLenBits, L1ICacheConf.tagMask);
index = getIndexFromAddress(address, L1ICacheConf.indexLenBits, L1ICacheConf.indexMask);
for (setIndex = 0; setIndex < L1ICacheConf.numSets; setIndex++)
{
if (L1ICache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L1ICache[setIndex][index].flags))
{
latency += L1ICacheConf.hitLatency;
L1I_Hit_Read++;
return latency;
}
}
// L1 Miss has occured!
L1I_Miss++;
latency += L1ICacheConf.missLatency;
tag = getTagFromAddress(address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(address, L2CacheConf.indexLenBits, L2CacheConf.indexMask);
for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{
if (L2Cache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L2Cache[setIndex][index].flags))
{
latency += L2CacheConf.hitLatency;
L2_Hit_Read++;
return latency;
}
}
// L2 Miss has occured!
L2_Miss++;
latency += L2CacheConf.missLatency;
latency += memReadLatency;
return latency;
}
unsigned long long cortexA5_simDCache(unsigned long address,
unsigned int isReadAccess)
{
unsigned int latency = 0;
unsigned long tag;
unsigned long index;
int setIndex = 0;
if (isReadAccess == 0 && L1DCacheConf.isWriteThrough == 1) // Write Access
{
// Simply increment latency by time to write to memory
latency += memWriteLatency;
L1D_Hit_Writethrough++;
}
// For writeback, there is no latency. We can safely take this assumption,
// as we are only using a Single Core System.
tag = getTagFromAddress(address, L1DCacheConf.tagLenBits, L1DCacheConf.tagMask);
index = getIndexFromAddress(address, L1DCacheConf.indexLenBits, L1DCacheConf.indexMask);
for (setIndex = 0; setIndex < L1DCacheConf.numSets; setIndex++)
{
if (L1DCache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L1DCache[setIndex][index].flags))
{
latency += L1DCacheConf.hitLatency;
if (isReadAccess)
L1D_Hit_Read++;
else
L1D_Hit_Writeback++;
return latency;
}
}
// L1 Miss has occured!
L1D_Miss++;
latency += L1DCacheConf.missLatency;
tag = getTagFromAddress(address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(address, L2CacheConf.indexLenBits, L2CacheConf.indexMask);
for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{
if (L2Cache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L2Cache[setIndex][index].flags))
{
latency += L2CacheConf.hitLatency;
if (isReadAccess)
L2_Hit_Read++;
else
L2_Hit_Writeback++;
return latency;
}
}
// L2 Miss has occured!
L2_Miss++;
latency += L2CacheConf.missLatency;
latency += memReadLatency;
return latency;
}
void cortexA5_cacheSimInit()
{
// Allocate space for caches
initCacheParams();
L1DCache = (cacheLine_t **) alloc2D(L1DCacheConf.numSets,
L1DCacheConf.numLines, sizeof(cacheLine_t));
L1ICache = (cacheLine_t **) alloc2D(L1ICacheConf.numSets,
L1ICacheConf.numLines, sizeof(cacheLine_t));
L2Cache = (cacheLine_t **) alloc2D(L2CacheConf.numSets,
L2CacheConf.numLines, sizeof(cacheLine_t));
return;
}
void cortexA5_cacheSimFini()
{
free(L1DCache);
free(L1ICache);
free(L2Cache);
printf("Statistics : \n");
printf("\nL1 Data Cache\n");
printf("\t Hit Read = %ld\n", L1D_Hit_Read);
printf("\t Hit Writeback = %ld\n", L1D_Hit_Writeback);
printf("\t Miss = %ld\n", L1D_Miss);
printf("\nL1 Instruction Cache\n");
printf("\t Hit Read = %ld\n", L1I_Hit_Read);
printf("\t Miss = %ld\n", L1I_Miss);
printf("\nL2 Unified Cache\n");
printf("\t Hit Read = %ld\n", L2_Hit_Read);
printf("\t Hit Writeback = %ld\n", L2_Hit_Writeback);
printf("\t Miss = %ld\n", L2_Miss);
return;
}
struct cacheSimHwMod_t hwMod = {
.simDCache = &cortexA5_simDCache,
.simICache = &cortexA5_simICache,
.cacheSimInit = &cortexA5_cacheSimInit,
.cacheSimFini = &cortexA5_cacheSimFini
};
import logging
class Annotation:
def __init__(self, annotation, fileName, lineNum, replace = False):
self.fileName = fileName
self.lineNum = lineNum
self.annotation = annotation
self.replace = replace
def debug(self):
logging.debug("%s:%d: %s" % (self.fileName, self.lineNum, self.annotation))
def debugDictAnnot(dictAnnot):
for lineNum in dictAnnot.iterkeys():
for annot in dictAnnot[lineNum]:
annot.debug()
def addAnnotationToDict(dict, lineNum, annot):
if lineNum not in dict:
print("adding annotation on line %d" % lineNum)
dict[lineNum] = [annot]
else:
for a in dict[lineNum]:
if a.annotation == annot.annotation and a.fileName == annot.fileName:
return
dict[lineNum].append(annot)
\ No newline at end of file
......@@ -14,7 +14,7 @@ re_mvnInst = re.compile("\s*(?:mvn)s?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)"
(Cond, Reg, Operand2, EndLine))
ArithOpcode = "(?P<arithOpcode>add|adc|sub|sbc|rsb|rsc|mul|mla)s?" # There are more that I have ignored for now
re_arithInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*(?:%s)(?:%s)" %
re_arithInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*(?P<op2>%s)(?:%s)" %
(ArithOpcode, Cond, Reg, Reg, Operand2, EndLine))
ArithLongOpcode = "(?P<arithLongOpcode>umull|umlal|smull|smlal)"
......@@ -25,7 +25,7 @@ LogicOpcode = "(?P<logicOpcode>and|eor|orr|bic)s?"
re_logicInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*(?:%s)(?:%s)" %
(LogicOpcode, Cond, Reg, Reg, Operand2, EndLine))
re_shiftInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*#(?P<op2ImedVal>\d*)(?:%s)" %
re_shiftInst = re.compile("\s*(?P<shiftOpcode>%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*#(?P<op2ImedVal>\d*)(?:%s)" %
(ShiftOpcode, Cond, Reg, Reg, EndLine))
BranchOpcode = "(?P<branchOpcode>b|bl|bx|blx|bxj)"
......@@ -41,8 +41,8 @@ AMode2_2 = "\[(?P<am2_2BaseReg>%s),\s*#(?P<am2_2ImedOff>-?\d*)\]" % (Reg)
AMode2_3 = "\[(?P<am2_3BaseReg>%s),\s*(?P<am2_3OffsetReg>-?%s)\]" % (Reg, Reg)
AMode2_4 = "\[(?P<am2_4BaseReg>%s),\s*(?P<am2_4OffsetReg>%s),\s*(?:%s)\s*#\d*\]" % (Reg, Reg, ShiftOpcode)
AMode2_5 = "\[(?P<am2_5BaseReg>%s)\],\s*#(?P<am2_5ImedOff>-?\d*)" % (Reg)
AMode2_6 = "\[(?P<am2_6BaseReg>%s)\],\s*-?(?:%s)" % (Reg, Reg)
AMode2_7 = "\[(?P<am2_7BaseReg>%s)\],\s*(?:%s),\s*(?:%s)\s*#\d*" % (Reg, Reg, ShiftOpcode)
AMode2_6 = "\[(?P<am2_6BaseReg>%s)\],\s*-?(?P<am2_6OffsetReg>%s)" % (Reg, Reg)
AMode2_7 = "\[(?P<am2_7BaseReg>%s)\],\s*(?P<am2_7OffsetReg>%s),\s*(?:%s)\s*#\d*" % (Reg, Reg, ShiftOpcode)
AMode2 = ("(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)" % (AMode2_1,
AMode2_2,
......@@ -59,7 +59,7 @@ re_loadInst = re.compile("\s*ldrs?(?:%s)?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%
re_storeInst = re.compile("\s*strs?(?:%s)?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)" %
(LoadStoreType, Cond, Reg, AMode2, EndLine))
re_cmpInst = re.compile("\s*(?:cmp|cmn)\s*(?:%s),\s*(?:%s)(?:%s)" %
re_cmpInst = re.compile("\s*(?:cmp|cmn)\s*(?P<op1Reg>%s),\s*(?:%s)(?:%s)" %
(Reg, Operand2, EndLine))
re_pushInst = re.compile("\s*push\s*\{(?P<pushRegs>(?:%s)(?:,\s*(?:%s))*)\}(?:%s)" %
......
......@@ -11,6 +11,7 @@
#include "cacheSim.h"
extern unsigned long SP;
extern unsigned long long memAccessCycles;
extern unsigned long long pipelineCycles;
/***********************************************************
Copyright 1992 by Stichting Mathematisch Centrum, Amsterdam, The
......@@ -120,6 +121,7 @@ memAccessCycles += simDCache(0x4a8, 1); // PC Relative Load
memAccessCycles += simICache(0x36c, 44);
// TODO: UnmappedLS: Load GlobalVar coder_1_state at line 247
// TODO: UnmappedLS: Load GlobalVar coder_1_state at line 249
pipelineCycles += 23;
valpred = state->valprev;
memAccessCycles += simDCache(state_addr, 1);
index = state->index;
......@@ -138,6 +140,7 @@ memAccessCycles += simDCache(0x4a8, 1); // PC Relative Load
memAccessCycles += simDCache((SP + outp_addr), 0);
// Simulating I Cache for obj block 1
memAccessCycles += simICache(0x398, 32);
pipelineCycles += 15;
outp = outdata;
memAccessCycles += simDCache(outdata_addr, 1);
ivtmp_28 = 0;
......@@ -147,6 +150,7 @@ memAccessCycles += simICache(0x398, 32);
adpcm_coderbb_4:
// # PRED: 18 [91.0%] (true,exec) 3 [100.0%] (fallthru,exec)
memAccessCycles += simDCache((SP + 0x4), 1); // Reading Spilt Register
pipelineCycles += 48;
diff = (int) *(short int *)((uintptr_t)indata + (uintptr_t)ivtmp_28) - valpred;
memAccessCycles += simDCache(indata_addr + (sizeof(short ) * (+ivtmp_28)), 1);
if (diff < 0)
......@@ -289,6 +293,7 @@ memAccessCycles += simICache(0x3b8, 200);
adpcm_coderbb_19:
// # PRED: 18 [9.0%] (false,exec)
pipelineCycles += 10;
if (bufferstep == 0)
goto adpcm_coderbb_20;
else
......@@ -312,6 +317,7 @@ memAccessCycles += simDCache((SP + 0xc), 1); // Reading Spilt Register
memAccessCycles += simICache(0x490, 24);
// TODO: UnmappedLS: Store GlobalVar coder_1_state at line 317
// TODO: UnmappedLS: Store GlobalVar coder_1_state at line 318
pipelineCycles += 19;
state->valprev = (short int) (short int) valpred;
memAccessCycles += simDCache(state_addr, 0);
state->index = (char) (char) index;
......
......@@ -11,6 +11,7 @@
#include "cacheSim.h"
unsigned long SP = 0x1234;
unsigned long long memAccessCycles = 0;
unsigned long long pipelineCycles = 0;
/*
** Timing - Test timing on adpcm coder and decoder.
......@@ -69,6 +70,7 @@ memAccessCycles += simDCache(ARR_SIZE_addr, 1);
memAccessCycles += simDCache((SP + ARR_SIZE_0_addr), 0);
// Simulating I Cache for obj block 0
memAccessCycles += simICache(0x200, 36);
pipelineCycles += 27;
ARR_SIZE_0 = ARR_SIZE;
j = ARR_SIZE_0 / 10240;
if (j != 0)
......@@ -86,12 +88,14 @@ memAccessCycles += simDCache(0x364, 1); // PC Relative Load
memAccessCycles += simDCache(0x368, 1); // PC Relative Load
// Simulating I Cache for obj block 1
memAccessCycles += simICache(0x224, 40);
pipelineCycles += 21;
end_43 = 0;
count = 0;
// # SUCC: 3 [100.0%] (fallthru)
mainbb_3:
// # PRED: 13 [100.0%] (fallthru) 14 [100.0%] (fallthru)
pipelineCycles += 9;
end_46 = end_43 + 10240;
if (end_43 < end_46)
goto mainbb_4;
......@@ -104,6 +108,7 @@ mainbb_4:
memAccessCycles += simDCache((SP + 0x4), 1); // Reading Spilt Register
// Simulating I Cache for obj block 3
memAccessCycles += simICache(0x258, 20);
pipelineCycles += 13;
i_45 = (int) end_43;
ivtmp_34 = (uintptr_t)&in_Data[i_45];
end_44 = end_43;
......@@ -115,6 +120,7 @@ memAccessCycles += simDCache(pcmdata_addr + (2 * (end_44-end_43)), 0);
// Simulating I Cache for obj block 4
memAccessCycles += simICache(0x26c, 36);
// TODO: UnmappedLS: Load GlobalVar in_Data at line 179
pipelineCycles += 16;
pcmdata[end_44 - end_43] = *(short int*)((uintptr_t)ivtmp_34);
i_45 = i_45 + 1;
end_44 = (long unsigned int) i_45;
......@@ -129,6 +135,7 @@ mainbb_6:
// # PRED: 5 [1.0%] (false,exec) 3 [1.0%] (false,exec)
// Simulating I Cache for obj block 5
memAccessCycles += simICache(0x290, 40);
pipelineCycles += 14;
adpcm_coder (&pcmdata, pcmdata_addr, &adpcmdata, adpcmdata_addr, 10240, &coder_1_state, coder_1_state_addr);
count = count + 1;
if (j > count)
......@@ -151,6 +158,7 @@ memAccessCycles += simDCache(0x358, 1); // PC Relative Load
memAccessCycles += simDCache((SP + ARR_SIZE_0_addr), 1);
// Simulating I Cache for obj block 6
memAccessCycles += simICache(0x2b8, 32);
pipelineCycles += 19;
if (ARR_SIZE_0 % 10240 != 0)
goto mainbb_8;
else
......@@ -162,6 +170,7 @@ mainbb_8:
memAccessCycles += simDCache(0x354, 1); // PC Relative Load
// Simulating I Cache for obj block 7
memAccessCycles += simICache(0x2d8, 24);
pipelineCycles += 14;
start_40 = j * 10240;
memAccessCycles += simDCache(ARR_SIZE_addr, 1);
end = ARR_SIZE;
......@@ -177,6 +186,7 @@ memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
memAccessCycles += simDCache(0x360, 1); // PC Relative Load
// Simulating I Cache for obj block 8
memAccessCycles += simICache(0x2f0, 28);
pipelineCycles += 13;
i = (int) start_40;
ivtmp_28 = (uintptr_t)&in_Data[i];
D_2229 = (int) end;
......@@ -189,6 +199,7 @@ memAccessCycles += simDCache(pcmdata_addr + (2 * (start-start_40)), 0);
// Simulating I Cache for obj block 9
memAccessCycles += simICache(0x30c, 36);
// TODO: UnmappedLS: Inaccurately Matched Load at line 219
pipelineCycles += 16;
pcmdata[start - start_40] = *(short int*)((uintptr_t)ivtmp_28);
i = i + 1;
start = (long unsigned int) i;
......@@ -206,6 +217,7 @@ memAccessCycles += simDCache(0x364, 1); // PC Relative Load
memAccessCycles += simDCache(0x368, 1); // PC Relative Load
// Simulating I Cache for obj block 10
memAccessCycles += simICache(0x330, 20);
pipelineCycles += 11;
adpcm_coder (&pcmdata, pcmdata_addr, &adpcmdata, adpcmdata_addr, (int) (end - start_40), &coder_1_state, coder_1_state_addr);
// # SUCC: 12 [100.0%] (fallthru,exec)
......@@ -214,7 +226,9 @@ mainbb_12:
// Simulating I Cache for obj block 11
memAccessCycles += simICache(0x344, 16);
printf("memAccessCycles = \%llu\n", memAccessCycles);
printf("pipelineCycles = \%llu\n", pipelineCycles);
cacheSimFini();
pipelineCycles += 18;
return 0;
// # SUCC: EXIT [100.0%]
......
......@@ -2,13 +2,15 @@ import logging
from optparse import OptionParser
from subprocess import call
import linecache as lc
from collections import OrderedDict
from load_store_info import *
from match_cfg import match_cfg
from gdb_info import *
from cGrammar import parse_statement
from irc_regex import *
from collections import OrderedDict
from pipeline_sim import *
from annotation import *
import re
......@@ -21,16 +23,6 @@ def find(f, seq):
return item
return None
class Annotation:
def __init__(self, annotation, fileName, lineNum, replace = False):
self.fileName = fileName
self.lineNum = lineNum
self.annotation = annotation
self.replace = replace
def debug(self):
logging.debug("%s:%d: %s" % (self.fileName, self.lineNum, self.annotation))
def getListLocalVarInFunc(listLocalVariables, functionName):
listLocalVarInFunc = []
for localVar in listLocalVariables:
......@@ -38,19 +30,8 @@ def getListLocalVarInFunc(listLocalVariables, functionName):
listLocalVarInFunc.append(localVar)
return listLocalVarInFunc
def debugDictAnnot(dictAnnot):
for lineNum in dictAnnot.iterkeys():
for annot in dictAnnot[lineNum]:
annot.debug()
def addAnnotationToDict(dict, lineNum, annot):
if lineNum not in dict:
dict[lineNum] = [annot]
else:
for a in dict[lineNum]:
if a.annotation == annot.annotation and a.fileName == annot.fileName:
return
dict[lineNum].append(annot)
# TODO : Make a new function to instrument the additional global vars needed!
# def annotateGlobalVar(listISCFileNames):
def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables, listLocalVariables):
dictAnnotVarFuncDecl = {}
......@@ -91,6 +72,11 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables,
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
annot_str = "unsigned long long pipelineCycles = 0;"
annot = Annotation(annot_str, ISCFileName, lineNum, False)
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
else:
annot_str = "extern unsigned long SP;"
annot = Annotation(annot_str, ISCFileName, lineNum, False)
......@@ -102,6 +88,11 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables,
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
annot_str = "extern unsigned long long pipelineCycles;"
annot = Annotation(annot_str, ISCFileName, lineNum, False)
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
if inMultiLineVarInit == 1:
m = re_VarDeclInitMultiLineEnd.match(line)
......@@ -303,6 +294,8 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables,
debugDictAnnot(dictAnnotVarFuncDecl)
return dictAnnotVarFuncDecl
# TODO : Annotate Push Pop Operations for DCache Access to Stack!
def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGlobalVariables, listLocalVariables):
dictAnnotLoadStore = {}
......@@ -445,6 +438,9 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl
annot_str = 'printf("memAccessCycles = \%llu\\n", memAccessCycles);'
annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False)
addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot)
annot_str = 'printf("pipelineCycles = \%llu\\n", pipelineCycles);'
annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False)
addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot)
annot_str = 'cacheSimFini();'
annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False)
addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot)
......@@ -553,7 +549,11 @@ def instrumentCache(listISCFileNames, listObjdumpFileNames, listBinaryFileNames,
dictAnnotLoadStore = annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGlobalVariables, listLocalVariables)
dictAnnotPipeline = annot_pipeline_sim(listISCFunctions, listObjdumpFunctions)
debugDictAnnot(dictAnnotPipeline)
dictAnnot = unionDict(dictAnnotVarFuncDecl, dictAnnotLoadStore)
dictAnnot = unionDict(dictAnnot, dictAnnotPipeline)
generateAnnotatedSourceFiles(dictAnnot, listISCFileNames, insOutputPath)
......
import linecache as lc
from arm_isa_regex import *
from annotation import *
ALU_LAT = 1
MUL_LAT = 1
LDST_LAT = 1
ALU_RES_LAT = 1 - ALU_LAT
MUL_RES_LAT = 4 - MUL_LAT
LDST_RES_LAT = 3 - LDST_LAT
def find(f, seq):
"""Return first item in sequence where f(item) == True."""
for item in seq:
if f(item):
return item
def annot_pipeline_sim(listISCFunctions,
listObjdumpFunctions):
'''
In this function, we simulate pipeline for each basic block in the Objdump.
We assume that :
* Each basic block is independent, and is cold started ie. no instruction
midway in pipeline.
* For each Load/Store Instruction L1 Data Hit occurs.
Pipeline Structure
* 8 stage pipeline
* 2 Instruction Fetch Stages
* 2 Instruction Decode Stages
* 4 Parallel Stages for
* Arithmetic Operations : SH, ALU, SAT, WB
* Multiply Operations : MAC1, MAC2, MAC3
* Load Store Unit : ADD, DC1, DC2, WB
Definitions:
* Result Latency : Number of cycles required for the result of this
instruction to be available at the start of ALU, MAC2 or DC1 stages
of the next instruction.
* Early Reg : Register required at the start of SH, MAC1 or ADD stages.
One cycle must be added to result latency of instruction producing this
register for interlock calculations.
* Late Reg : Register required in second stage of execution pipeline. One
cycle must be subtracted from result latency of instruction producing
this register for interlock calculations.
Load/Store Instructions
* Result Latency : 3 cycles
ADD/MOV Inst:
* Result Latency : 1 cycle
MUL Inst
* Result Latency : avg. 4 cycles (varies)
'''
dictAnnotPipeline = {}
for funcObj in listObjdumpFunctions:
funcISC = find(lambda fn: fn.functionName == funcObj.functionName,
listISCFunctions)
for blockObj in funcObj.cfg.listBlocks:
#initialize some state registers
prevOpLoadStore = False
prevDestReg = None
currBlockCycles = 7; # For filling the pipeline on cold start
for lineNumObj in range(blockObj.startLine, blockObj.endLine + 1):
lineObj = lc.getline(funcObj.fileName, lineNumObj)
# Initialize some state Registers
opcode = ""
destReg = ""
op1Reg = ""
op2 = ""
op2RegIsShifted = False
m = re_instruction.match(lineObj)
assert(m is not None)
instObj = m.group("instruction")
m = re_arithInst.match(instObj)
if m is not None:
opcode = m.group("arithOpcode")
destReg = m.group("destReg")
op1Reg = m.group("op1Reg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
if opcode not in ["mul", "mla"]:
# Add Instruction
currBlockCycles= currBlockCycles + ALU_LAT
prevResLat = ALU_RES_LAT
else:
# Multiply Instruction
currBlockCycles = currBlockCycles + MUL_LAT
prevResLat = MUL_RES_LAT
# Calculation Interlock for Add and Mul Instructions
if prevDestReg is not None:
if op2 is not "":
if (op2RegIsShifted == True):
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
elif (op1Reg == prevDestReg):
# Late Reg!
currBlockCycles = currBlockCycles + prevResLat - 1
else:
# No interlock
pass
else: # (op2RegIsShifted == False)
if (op1Reg == prevDestReg or
op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
continue
m = re_movInst.match(instObj)
if m is not None:
destReg = m.group("destReg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
currBlockCycles= currBlockCycles + ALU_LAT
# Calculation Interlock for Add and Mul Instructions
if prevDestReg is not None:
if op2 is not "":
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
prevResLat = ALU_RES_LAT
continue
m = re_mvnInst.match(instObj)
if m is not None:
destReg = m.group("destReg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
currBlockCycles= currBlockCycles + ALU_LAT
# Calculation Interlock for Add and Mul Instructions
if prevDestReg is not None:
if op2 is not "":
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
prevResLat = ALU_RES_LAT
continue
m = re_arithLongInst.match(instObj)
if m is not None:
# Long Arithmetic Instructions
currBlockCycles = currBlockCycles + 2 * MUL_RES_LAT
prevDestReg = None
prevResLat = 0
# TODO: This needs to be improved!
continue
m = re_logicInst.match(instObj)
if m is not None:
# Logical Instruction
# opcode = m.group("logicOpcode")
destReg = m.group("destReg")
op1Reg = m.group("op1Reg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
currBlockCycles = currBlockCycles + ALU_RES_LAT
# Calculating Interlock
if prevDestReg is not None:
if op2 is not "":
if (op2RegIsShifted == True):
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
elif (op1Reg == prevDestReg):
# Late Reg!
currBlockCycles = currBlockCycles + prevResLat - 1
else:
# No interlock
pass
else: # (op2RegIsShifted == False)
if (op1Reg == prevDestReg or
op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
prevResLat = ALU_RES_LAT
continue
# Calculating Interlock
m = re_shiftInst.match(instObj)
if m is not None:
# Shift Instruction
# opcode = m.group("shiftOpcode")
destReg = m.group("destReg")
op1Reg = m.group("op1Reg")
currBlockCycles = currBlockCycles + ALU_RES_LAT
# Calculation Interlock
if prevDestReg is not None:
if prevDestReg == op1Reg:
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No Interlock!
pass
prevDestReg = destReg
prevResLat = ALU_RES_LAT
continue
m = re_branchInst.match(instObj)
if m is not None:
# Branch Instruction
currBlockCycles = currBlockCycles + ALU_RES_LAT
prevDestReg = None
prevResLat = 0
# TODO: May need to be improved!
continue
m = re_cmpInst.match(instObj)
if m is not None:
# Compare Instruction
op1Reg = m.group("op1Reg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
currBlockCycles = currBlockCycles + ALU_RES_LAT
# Calculating Interlock
if prevDestReg is not None:
if op2 is not "":
if (op2RegIsShifted == True):
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
elif (op1Reg == prevDestReg):
# Late Reg!
currBlockCycles = currBlockCycles + prevResLat - 1
else:
# No interlock
pass
else: # (op2RegIsShifted == False)
if (op1Reg == prevDestReg or
op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = None
prevResLat = 0
continue
m = re_pushInst.match(instObj)
if m is not None:
pushRegs = m.group("pushRegs")
listPushRegs = pushRegs.split(",")
currBlockCycles = currBlockCycles + len(listPushRegs)
prevDestReg = None
prevResLat = 0
# TODO: May need to be fixed!
continue
m = re_popInst.match(instObj)
if m is not None:
pushRegs = m.group("popRegs")
listPushRegs = pushRegs.split(",")
currBlockCycles = currBlockCycles + len(listPushRegs)
prevDestReg = None
prevResLat = 0
# TODO: May need to be fixed!
continue
m = re_ignoredInst.match(instObj)
if m is not None:
currBlockCycles = currBlockCycles + LDST_LAT
prevDestReg = None
prevResLat = 0
# TODO: Has to be improved!!!
continue
m = re_loadInst.match(instObj)
if m is not None:
destReg = m.group("destReg")
for baseRegLabel in ["am2_1BaseReg",
"am2_2BaseReg",
"am2_3BaseReg",
"am2_4BaseReg",
"am2_5BaseReg",
"am2_6BaseReg",
"am2_7BaseReg"]:
if m.group(baseRegLabel) is not None:
break
op1Reg = m.group(baseRegLabel)
op2 = ""
if op1Reg == "am2_3BaseReg":
op2 = m.group("am2_3OffsetReg")
op2RegIsShifted = False
elif op1Reg == "am2_4BaseReg":
op2 = m.group("am2_4OffsetReg")
op2RegIsShifted = True
elif op1Reg == "am2_6BaseReg":
op2 = m.group("am2_6OffsetReg")
op2RegIsShifted = False
elif op1Reg == "am2_7BaseReg":
op2 = m.group("am2_7OffsetReg")
op2RegIsShifted = True
currBlockCycles = currBlockCycles + LDST_LAT
if prevDestReg is not None:
if op2 is not "":
if (op2RegIsShifted == True):
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
elif (op1Reg == prevDestReg):
# Late Reg!
currBlockCycles = currBlockCycles + prevResLat - 1
else:
# No interlock
pass
else: # (op2RegIsShifted == False)
if (op1Reg == prevDestReg or
op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
prevResLat = LDST_RES_LAT
continue
m = re_storeInst.match(instObj)
if m is not None:
currBlockCycles = currBlockCycles + LDST_LAT
prevDestReg = None
prevResLat = 0
continue
print "%d : Instruction Could not be identified!" % (lineNumObj)
# Block Done!
blockIndISC = blockObj.mapsTo[0]
blockISC = funcISC.cfg.listBlocks[blockIndISC]
annot_str = "pipelineCycles += %d;" % (currBlockCycles)
annot = Annotation(annot_str,
funcISC.fileName,
blockISC.startLine,
replace = False)
print ("Adding annotation to %s:%d : %s" % (funcISC.fileName,
blockISC.startLine-1,
annot_str))
addAnnotationToDict(dictAnnotPipeline,
blockISC.startLine-1,
annot)
# Function Done!
# All Functions Done!
return dictAnnotPipeline
if __name__ == "__main__":
pass
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment