Commit 635059af authored by Gaurav Kukreja's avatar Gaurav Kukreja

Combined commit for 2 major changes

 * Added pipeline_sim for simulating pipeline, not completely tested
 * Added Cache HW Mod for Cortex A5.
Signed-off-by: Gaurav Kukreja's avatarGaurav Kukreja <gaurav@gauravk.in>
parent 70b806db
...@@ -10,7 +10,7 @@ current_dir := $(patsubst %/,%,$(dir $(mkfile_path))) ...@@ -10,7 +10,7 @@ current_dir := $(patsubst %/,%,$(dir $(mkfile_path)))
CSIM_DIR = $(current_dir) CSIM_DIR = $(current_dir)
# Hardware Model to use # Hardware Model to use
CACHESIM_HWMOD = generic CACHESIM_HWMOD = cortexA5
CACHESIM_SRC = $(CSIM_DIR)/src CACHESIM_SRC = $(CSIM_DIR)/src
CACHESIM_HEADERS = $(CSIM_DIR)/headers/ CACHESIM_HEADERS = $(CSIM_DIR)/headers/
......
...@@ -15,6 +15,11 @@ ifeq ($(CACHESIM_HWMOD),generic) ...@@ -15,6 +15,11 @@ ifeq ($(CACHESIM_HWMOD),generic)
OBJECTS += genericHwMod.o OBJECTS += genericHwMod.o
endif endif
ifeq ($(CACHESIM_HWMOD),cortexA5)
SOURCES += cortexA5HwMod.c
OBJECTS += cortexA5HwMod.o
endif
all: cacheSim all: cacheSim
cacheSim: $(SOURCES) cacheSim: $(SOURCES)
......
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cacheSimHwMod.h"
#define CACHELINE_VALID_BIT (1 << 0)
#define IS_CACHELINE_VALID(flags) (flags & CACHELINE_VALID_BIT)
#define SET_CACHELINE_VALID(flags) (flags |= CACHELINE_VALID_BIT)
#define SET_CACHELINE_INVALID(flags) (flags &= ~CACHELINE_VALID_BIT)
#define CACHELINE_DIRTY_BIT (1 << 1)
#define IS_CACHELINE_DIRTY(flags) (flags & CACHELINE_DIRTY_BIT)
#define SET_CACHELINE_DIRTY(flags) (flags |= CACHELINE_DIRTY_BIT)
#define SET_CACHELINE_CLEAN(flags) (flags &= ~CACHELINE_DIRTY_BIT)
#define ADDRESS_LEN_BITS 32
/**** DATA STRUCTURES *********************************************************/
struct cacheConfig
{
// Size
unsigned int lineLenBytes;
unsigned int cacheSizeBytes;
unsigned int numSets;
// Derived
unsigned int numLines;
unsigned long tagMask;
unsigned int tagLenBits;
unsigned long indexMask;
unsigned int indexLenBits;
// Features
unsigned int isWriteThrough;
// Latencies
// unsigned int hitLat;
// unsigned int missLat;
unsigned int hitLatency;
unsigned int missLatency;
};
typedef struct cacheConfig cacheConfig_t;
/**
* Stores all data related to a cache line.
*/
struct cacheLine
{
unsigned int flags;
unsigned long tag;
};
typedef struct cacheLine cacheLine_t;
/**** GLOBAL VARIABLES ********************************************************/
cacheConfig_t L1DCacheConf;
cacheConfig_t L1ICacheConf;
cacheConfig_t L2CacheConf;
cacheLine_t **L1DCache;
cacheLine_t **L1ICache;
cacheLine_t **L2Cache;
unsigned int memWriteLatency = 100;
unsigned int memReadLatency = 100;
unsigned long L1D_Hit_Read = 0;
unsigned long L1D_Hit_Writeback = 0;
unsigned long L1D_Hit_Writethrough = 0;
unsigned long L1D_Miss = 0;
unsigned long L1I_Hit_Read = 0;
unsigned long L1I_Hit_Writeback = 0;
unsigned long L1I_Hit_Writethrough = 0;
unsigned long L1I_Miss = 0;
unsigned long L2_Hit_Read = 0;
unsigned long L2_Hit_Writeback = 0;
unsigned long L2_Hit_Writethrough = 0;
unsigned long L2_Miss = 0;
/**** LOCAL FUNCTIONS *********************************************************/
int log_base2(int val)
{
int ret = 0;
while (val >>= 1) ++ret;
return ret;
}
void initCacheParams ()
{
int subIndexLen = 0;
int i;
/*** L1 DCache *****************/
L1DCacheConf.lineLenBytes = 32;
L1DCacheConf.cacheSizeBytes = 4 * 1024; // 4 KB
L1DCacheConf.numSets = 4;
L1DCacheConf.numLines = L1DCacheConf.cacheSizeBytes /
(L1DCacheConf.lineLenBytes * L1DCacheConf.numSets);
subIndexLen = log_base2(L1DCacheConf.lineLenBytes);
L1DCacheConf.indexLenBits = log_base2(L1DCacheConf.numLines);
L1DCacheConf.indexMask = 0;
for (i = 0; i < L1DCacheConf.indexLenBits; i++)
{
L1DCacheConf.indexMask = L1DCacheConf.indexMask << 1;
L1DCacheConf.indexMask |= 0x00000001;
}
L1DCacheConf.indexMask = L1DCacheConf.indexMask << subIndexLen;
L1DCacheConf.tagLenBits = ADDRESS_LEN_BITS - L1DCacheConf.indexMask - subIndexLen;
L1DCacheConf.tagMask = 0;
for (i = 0; i < L1DCacheConf.tagLenBits; i++)
{
L1DCacheConf.tagMask = L1DCacheConf.tagMask << 1;
L1DCacheConf.tagMask |= 0x00000001;
}
L1DCacheConf.tagMask = L1DCacheConf.tagMask << (L1DCacheConf.indexLenBits + subIndexLen);
L1DCacheConf.isWriteThrough = 0;
L1DCacheConf.hitLatency = 2;
L1DCacheConf.missLatency = 2;
/*** L1 ICache *****************/
L1ICacheConf.lineLenBytes = 32;
L1ICacheConf.cacheSizeBytes = 4 * 1024; // 4 KB
L1ICacheConf.numSets = 2;
L1ICacheConf.numLines = L1ICacheConf.cacheSizeBytes /
(L1ICacheConf.lineLenBytes * L1ICacheConf.numSets);
subIndexLen = log_base2(L1ICacheConf.lineLenBytes);
L1ICacheConf.indexLenBits = log_base2(L1ICacheConf.numLines);
L1ICacheConf.indexMask = 0;
for (i = 0; i < L1ICacheConf.indexLenBits; i++)
{
L1ICacheConf.indexMask = L1ICacheConf.indexMask << 1;
L1ICacheConf.indexMask |= 0x00000001;
}
L1ICacheConf.indexMask = L1ICacheConf.indexMask << subIndexLen;
L1ICacheConf.tagLenBits = ADDRESS_LEN_BITS - L1ICacheConf.indexMask - subIndexLen;
L1ICacheConf.tagMask = 0;
for (i = 0; i < L1ICacheConf.tagLenBits; i++)
{
L1ICacheConf.tagMask = L1ICacheConf.tagMask << 1;
L1ICacheConf.tagMask |= 0x00000001;
}
L1ICacheConf.tagMask = L1ICacheConf.tagMask << (L1ICacheConf.indexLenBits + subIndexLen);
L1ICacheConf.isWriteThrough = 0;
L1ICacheConf.hitLatency = 2;
L1ICacheConf.missLatency = 2;
/*** L2 Cache *****************/
L2CacheConf.lineLenBytes = 32;
L2CacheConf.cacheSizeBytes = 32 * 1024; // 32 KB
L2CacheConf.numSets = 2;
L2CacheConf.numLines = L2CacheConf.cacheSizeBytes /
(L2CacheConf.lineLenBytes * L2CacheConf.numSets);
subIndexLen = log_base2(L2CacheConf.lineLenBytes);
L2CacheConf.indexLenBits = log_base2(L2CacheConf.numLines);
L2CacheConf.indexMask = 0;
for (i = 0; i < L2CacheConf.indexLenBits; i++)
{
L2CacheConf.indexMask = L2CacheConf.indexMask << 1;
L2CacheConf.indexMask |= 0x00000001;
}
L2CacheConf.indexMask = L2CacheConf.indexMask << subIndexLen;
L2CacheConf.tagLenBits = ADDRESS_LEN_BITS - L2CacheConf.indexMask - subIndexLen;
L2CacheConf.tagMask = 0;
for (i = 0; i < L2CacheConf.tagLenBits; i++)
{
L2CacheConf.tagMask = L2CacheConf.tagMask << 1;
L2CacheConf.tagMask |= 0x00000001;
}
L2CacheConf.tagMask = L2CacheConf.tagMask << (L2CacheConf.indexLenBits + subIndexLen);
L2CacheConf.isWriteThrough = 0;
L2CacheConf.hitLatency = 14;
L2CacheConf.missLatency = 14;
}
/**
* Allocates a 2 dimensional array. To be used to allocate space for cache lines
*
* @param rows number of sets
* @param cols number of cache lines
* @param size size of the data structure to be stored
*
* @return pointer to array of pointers pointing to rows of data.
*/
void** alloc2D(unsigned int rows, unsigned int cols, size_t size)
{
void** ret;
void *data;
int i;
size_t arrSize = (rows * sizeof(void*)) + (rows * cols * size);
ret = malloc(arrSize);
memset(ret, 0, arrSize);
data = (void*) (ret + rows);
for(i=0; i<rows; i++)
{
ret[i] = data + i * cols * size;
}
return ret;
}
inline unsigned long getTagFromAddress(unsigned long address,
unsigned int tagLengthBits, unsigned long tagMask)
{
return (address & tagMask) >> (ADDRESS_LEN_BITS - tagLengthBits);
}
inline unsigned long getIndexFromAddress(unsigned long address,
unsigned int offsetLengthBits, unsigned long indexMask)
{
return (address & indexMask) >> offsetLengthBits;
}
/**** HWMOD FUNCTIONS *********************************************************/
unsigned long long cortexA5_simICache(unsigned long address,
unsigned int nBytes)
{
unsigned int latency = 0;
unsigned long tag;
unsigned long index;
int setIndex = 0;
tag = getTagFromAddress(address, L1ICacheConf.tagLenBits, L1ICacheConf.tagMask);
index = getIndexFromAddress(address, L1ICacheConf.indexLenBits, L1ICacheConf.indexMask);
for (setIndex = 0; setIndex < L1ICacheConf.numSets; setIndex++)
{
if (L1ICache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L1ICache[setIndex][index].flags))
{
latency += L1ICacheConf.hitLatency;
L1I_Hit_Read++;
return latency;
}
}
// L1 Miss has occured!
L1I_Miss++;
latency += L1ICacheConf.missLatency;
tag = getTagFromAddress(address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(address, L2CacheConf.indexLenBits, L2CacheConf.indexMask);
for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{
if (L2Cache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L2Cache[setIndex][index].flags))
{
latency += L2CacheConf.hitLatency;
L2_Hit_Read++;
return latency;
}
}
// L2 Miss has occured!
L2_Miss++;
latency += L2CacheConf.missLatency;
latency += memReadLatency;
return latency;
}
unsigned long long cortexA5_simDCache(unsigned long address,
unsigned int isReadAccess)
{
unsigned int latency = 0;
unsigned long tag;
unsigned long index;
int setIndex = 0;
if (isReadAccess == 0 && L1DCacheConf.isWriteThrough == 1) // Write Access
{
// Simply increment latency by time to write to memory
latency += memWriteLatency;
L1D_Hit_Writethrough++;
}
// For writeback, there is no latency. We can safely take this assumption,
// as we are only using a Single Core System.
tag = getTagFromAddress(address, L1DCacheConf.tagLenBits, L1DCacheConf.tagMask);
index = getIndexFromAddress(address, L1DCacheConf.indexLenBits, L1DCacheConf.indexMask);
for (setIndex = 0; setIndex < L1DCacheConf.numSets; setIndex++)
{
if (L1DCache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L1DCache[setIndex][index].flags))
{
latency += L1DCacheConf.hitLatency;
if (isReadAccess)
L1D_Hit_Read++;
else
L1D_Hit_Writeback++;
return latency;
}
}
// L1 Miss has occured!
L1D_Miss++;
latency += L1DCacheConf.missLatency;
tag = getTagFromAddress(address, L2CacheConf.tagLenBits, L2CacheConf.tagMask);
index = getIndexFromAddress(address, L2CacheConf.indexLenBits, L2CacheConf.indexMask);
for (setIndex = 0; setIndex < L2CacheConf.numSets; setIndex++)
{
if (L2Cache[setIndex][index].tag == tag &&
IS_CACHELINE_VALID(L2Cache[setIndex][index].flags))
{
latency += L2CacheConf.hitLatency;
if (isReadAccess)
L2_Hit_Read++;
else
L2_Hit_Writeback++;
return latency;
}
}
// L2 Miss has occured!
L2_Miss++;
latency += L2CacheConf.missLatency;
latency += memReadLatency;
return latency;
}
void cortexA5_cacheSimInit()
{
// Allocate space for caches
initCacheParams();
L1DCache = (cacheLine_t **) alloc2D(L1DCacheConf.numSets,
L1DCacheConf.numLines, sizeof(cacheLine_t));
L1ICache = (cacheLine_t **) alloc2D(L1ICacheConf.numSets,
L1ICacheConf.numLines, sizeof(cacheLine_t));
L2Cache = (cacheLine_t **) alloc2D(L2CacheConf.numSets,
L2CacheConf.numLines, sizeof(cacheLine_t));
return;
}
void cortexA5_cacheSimFini()
{
free(L1DCache);
free(L1ICache);
free(L2Cache);
printf("Statistics : \n");
printf("\nL1 Data Cache\n");
printf("\t Hit Read = %ld\n", L1D_Hit_Read);
printf("\t Hit Writeback = %ld\n", L1D_Hit_Writeback);
printf("\t Miss = %ld\n", L1D_Miss);
printf("\nL1 Instruction Cache\n");
printf("\t Hit Read = %ld\n", L1I_Hit_Read);
printf("\t Miss = %ld\n", L1I_Miss);
printf("\nL2 Unified Cache\n");
printf("\t Hit Read = %ld\n", L2_Hit_Read);
printf("\t Hit Writeback = %ld\n", L2_Hit_Writeback);
printf("\t Miss = %ld\n", L2_Miss);
return;
}
struct cacheSimHwMod_t hwMod = {
.simDCache = &cortexA5_simDCache,
.simICache = &cortexA5_simICache,
.cacheSimInit = &cortexA5_cacheSimInit,
.cacheSimFini = &cortexA5_cacheSimFini
};
import logging
class Annotation:
def __init__(self, annotation, fileName, lineNum, replace = False):
self.fileName = fileName
self.lineNum = lineNum
self.annotation = annotation
self.replace = replace
def debug(self):
logging.debug("%s:%d: %s" % (self.fileName, self.lineNum, self.annotation))
def debugDictAnnot(dictAnnot):
for lineNum in dictAnnot.iterkeys():
for annot in dictAnnot[lineNum]:
annot.debug()
def addAnnotationToDict(dict, lineNum, annot):
if lineNum not in dict:
print("adding annotation on line %d" % lineNum)
dict[lineNum] = [annot]
else:
for a in dict[lineNum]:
if a.annotation == annot.annotation and a.fileName == annot.fileName:
return
dict[lineNum].append(annot)
\ No newline at end of file
...@@ -14,7 +14,7 @@ re_mvnInst = re.compile("\s*(?:mvn)s?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)" ...@@ -14,7 +14,7 @@ re_mvnInst = re.compile("\s*(?:mvn)s?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)"
(Cond, Reg, Operand2, EndLine)) (Cond, Reg, Operand2, EndLine))
ArithOpcode = "(?P<arithOpcode>add|adc|sub|sbc|rsb|rsc|mul|mla)s?" # There are more that I have ignored for now ArithOpcode = "(?P<arithOpcode>add|adc|sub|sbc|rsb|rsc|mul|mla)s?" # There are more that I have ignored for now
re_arithInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*(?:%s)(?:%s)" % re_arithInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*(?P<op2>%s)(?:%s)" %
(ArithOpcode, Cond, Reg, Reg, Operand2, EndLine)) (ArithOpcode, Cond, Reg, Reg, Operand2, EndLine))
ArithLongOpcode = "(?P<arithLongOpcode>umull|umlal|smull|smlal)" ArithLongOpcode = "(?P<arithLongOpcode>umull|umlal|smull|smlal)"
...@@ -25,7 +25,7 @@ LogicOpcode = "(?P<logicOpcode>and|eor|orr|bic)s?" ...@@ -25,7 +25,7 @@ LogicOpcode = "(?P<logicOpcode>and|eor|orr|bic)s?"
re_logicInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*(?:%s)(?:%s)" % re_logicInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*(?:%s)(?:%s)" %
(LogicOpcode, Cond, Reg, Reg, Operand2, EndLine)) (LogicOpcode, Cond, Reg, Reg, Operand2, EndLine))
re_shiftInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*#(?P<op2ImedVal>\d*)(?:%s)" % re_shiftInst = re.compile("\s*(?P<shiftOpcode>%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*#(?P<op2ImedVal>\d*)(?:%s)" %
(ShiftOpcode, Cond, Reg, Reg, EndLine)) (ShiftOpcode, Cond, Reg, Reg, EndLine))
BranchOpcode = "(?P<branchOpcode>b|bl|bx|blx|bxj)" BranchOpcode = "(?P<branchOpcode>b|bl|bx|blx|bxj)"
...@@ -41,8 +41,8 @@ AMode2_2 = "\[(?P<am2_2BaseReg>%s),\s*#(?P<am2_2ImedOff>-?\d*)\]" % (Reg) ...@@ -41,8 +41,8 @@ AMode2_2 = "\[(?P<am2_2BaseReg>%s),\s*#(?P<am2_2ImedOff>-?\d*)\]" % (Reg)
AMode2_3 = "\[(?P<am2_3BaseReg>%s),\s*(?P<am2_3OffsetReg>-?%s)\]" % (Reg, Reg) AMode2_3 = "\[(?P<am2_3BaseReg>%s),\s*(?P<am2_3OffsetReg>-?%s)\]" % (Reg, Reg)
AMode2_4 = "\[(?P<am2_4BaseReg>%s),\s*(?P<am2_4OffsetReg>%s),\s*(?:%s)\s*#\d*\]" % (Reg, Reg, ShiftOpcode) AMode2_4 = "\[(?P<am2_4BaseReg>%s),\s*(?P<am2_4OffsetReg>%s),\s*(?:%s)\s*#\d*\]" % (Reg, Reg, ShiftOpcode)
AMode2_5 = "\[(?P<am2_5BaseReg>%s)\],\s*#(?P<am2_5ImedOff>-?\d*)" % (Reg) AMode2_5 = "\[(?P<am2_5BaseReg>%s)\],\s*#(?P<am2_5ImedOff>-?\d*)" % (Reg)
AMode2_6 = "\[(?P<am2_6BaseReg>%s)\],\s*-?(?:%s)" % (Reg, Reg) AMode2_6 = "\[(?P<am2_6BaseReg>%s)\],\s*-?(?P<am2_6OffsetReg>%s)" % (Reg, Reg)
AMode2_7 = "\[(?P<am2_7BaseReg>%s)\],\s*(?:%s),\s*(?:%s)\s*#\d*" % (Reg, Reg, ShiftOpcode) AMode2_7 = "\[(?P<am2_7BaseReg>%s)\],\s*(?P<am2_7OffsetReg>%s),\s*(?:%s)\s*#\d*" % (Reg, Reg, ShiftOpcode)
AMode2 = ("(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)" % (AMode2_1, AMode2 = ("(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)" % (AMode2_1,
AMode2_2, AMode2_2,
...@@ -59,7 +59,7 @@ re_loadInst = re.compile("\s*ldrs?(?:%s)?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:% ...@@ -59,7 +59,7 @@ re_loadInst = re.compile("\s*ldrs?(?:%s)?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%
re_storeInst = re.compile("\s*strs?(?:%s)?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)" % re_storeInst = re.compile("\s*strs?(?:%s)?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)" %
(LoadStoreType, Cond, Reg, AMode2, EndLine)) (LoadStoreType, Cond, Reg, AMode2, EndLine))
re_cmpInst = re.compile("\s*(?:cmp|cmn)\s*(?:%s),\s*(?:%s)(?:%s)" % re_cmpInst = re.compile("\s*(?:cmp|cmn)\s*(?P<op1Reg>%s),\s*(?:%s)(?:%s)" %
(Reg, Operand2, EndLine)) (Reg, Operand2, EndLine))
re_pushInst = re.compile("\s*push\s*\{(?P<pushRegs>(?:%s)(?:,\s*(?:%s))*)\}(?:%s)" % re_pushInst = re.compile("\s*push\s*\{(?P<pushRegs>(?:%s)(?:,\s*(?:%s))*)\}(?:%s)" %
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "cacheSim.h" #include "cacheSim.h"
extern unsigned long SP; extern unsigned long SP;
extern unsigned long long memAccessCycles; extern unsigned long long memAccessCycles;
extern unsigned long long pipelineCycles;
/*********************************************************** /***********************************************************
Copyright 1992 by Stichting Mathematisch Centrum, Amsterdam, The Copyright 1992 by Stichting Mathematisch Centrum, Amsterdam, The
...@@ -120,6 +121,7 @@ memAccessCycles += simDCache(0x4a8, 1); // PC Relative Load ...@@ -120,6 +121,7 @@ memAccessCycles += simDCache(0x4a8, 1); // PC Relative Load
memAccessCycles += simICache(0x36c, 44); memAccessCycles += simICache(0x36c, 44);
// TODO: UnmappedLS: Load GlobalVar coder_1_state at line 247 // TODO: UnmappedLS: Load GlobalVar coder_1_state at line 247
// TODO: UnmappedLS: Load GlobalVar coder_1_state at line 249 // TODO: UnmappedLS: Load GlobalVar coder_1_state at line 249
pipelineCycles += 23;
valpred = state->valprev; valpred = state->valprev;
memAccessCycles += simDCache(state_addr, 1); memAccessCycles += simDCache(state_addr, 1);
index = state->index; index = state->index;
...@@ -138,6 +140,7 @@ memAccessCycles += simDCache(0x4a8, 1); // PC Relative Load ...@@ -138,6 +140,7 @@ memAccessCycles += simDCache(0x4a8, 1); // PC Relative Load
memAccessCycles += simDCache((SP + outp_addr), 0); memAccessCycles += simDCache((SP + outp_addr), 0);
// Simulating I Cache for obj block 1 // Simulating I Cache for obj block 1
memAccessCycles += simICache(0x398, 32); memAccessCycles += simICache(0x398, 32);
pipelineCycles += 15;
outp = outdata; outp = outdata;
memAccessCycles += simDCache(outdata_addr, 1); memAccessCycles += simDCache(outdata_addr, 1);
ivtmp_28 = 0; ivtmp_28 = 0;
...@@ -147,6 +150,7 @@ memAccessCycles += simICache(0x398, 32); ...@@ -147,6 +150,7 @@ memAccessCycles += simICache(0x398, 32);
adpcm_coderbb_4: adpcm_coderbb_4:
// # PRED: 18 [91.0%] (true,exec) 3 [100.0%] (fallthru,exec) // # PRED: 18 [91.0%] (true,exec) 3 [100.0%] (fallthru,exec)
memAccessCycles += simDCache((SP + 0x4), 1); // Reading Spilt Register memAccessCycles += simDCache((SP + 0x4), 1); // Reading Spilt Register
pipelineCycles += 48;
diff = (int) *(short int *)((uintptr_t)indata + (uintptr_t)ivtmp_28) - valpred; diff = (int) *(short int *)((uintptr_t)indata + (uintptr_t)ivtmp_28) - valpred;
memAccessCycles += simDCache(indata_addr + (sizeof(short ) * (+ivtmp_28)), 1); memAccessCycles += simDCache(indata_addr + (sizeof(short ) * (+ivtmp_28)), 1);
if (diff < 0) if (diff < 0)
...@@ -289,6 +293,7 @@ memAccessCycles += simICache(0x3b8, 200); ...@@ -289,6 +293,7 @@ memAccessCycles += simICache(0x3b8, 200);
adpcm_coderbb_19: adpcm_coderbb_19:
// # PRED: 18 [9.0%] (false,exec) // # PRED: 18 [9.0%] (false,exec)
pipelineCycles += 10;
if (bufferstep == 0) if (bufferstep == 0)
goto adpcm_coderbb_20; goto adpcm_coderbb_20;
else else
...@@ -312,6 +317,7 @@ memAccessCycles += simDCache((SP + 0xc), 1); // Reading Spilt Register ...@@ -312,6 +317,7 @@ memAccessCycles += simDCache((SP + 0xc), 1); // Reading Spilt Register
memAccessCycles += simICache(0x490, 24); memAccessCycles += simICache(0x490, 24);
// TODO: UnmappedLS: Store GlobalVar coder_1_state at line 317 // TODO: UnmappedLS: Store GlobalVar coder_1_state at line 317
// TODO: UnmappedLS: Store GlobalVar coder_1_state at line 318 // TODO: UnmappedLS: Store GlobalVar coder_1_state at line 318
pipelineCycles += 19;
state->valprev = (short int) (short int) valpred; state->valprev = (short int) (short int) valpred;
memAccessCycles += simDCache(state_addr, 0); memAccessCycles += simDCache(state_addr, 0);
state->index = (char) (char) index; state->index = (char) (char) index;
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "cacheSim.h" #include "cacheSim.h"
unsigned long SP = 0x1234; unsigned long SP = 0x1234;
unsigned long long memAccessCycles = 0; unsigned long long memAccessCycles = 0;
unsigned long long pipelineCycles = 0;
/* /*
** Timing - Test timing on adpcm coder and decoder. ** Timing - Test timing on adpcm coder and decoder.
...@@ -69,6 +70,7 @@ memAccessCycles += simDCache(ARR_SIZE_addr, 1); ...@@ -69,6 +70,7 @@ memAccessCycles += simDCache(ARR_SIZE_addr, 1);
memAccessCycles += simDCache((SP + ARR_SIZE_0_addr), 0); memAccessCycles += simDCache((SP + ARR_SIZE_0_addr), 0);
// Simulating I Cache for obj block 0 // Simulating I Cache for obj block 0
memAccessCycles += simICache(0x200, 36); memAccessCycles += simICache(0x200, 36);
pipelineCycles += 27;
ARR_SIZE_0 = ARR_SIZE; ARR_SIZE_0 = ARR_SIZE;
j = ARR_SIZE_0 / 10240; j = ARR_SIZE_0 / 10240;
if (j != 0) if (j != 0)
...@@ -86,12 +88,14 @@ memAccessCycles += simDCache(0x364, 1); // PC Relative Load ...@@ -86,12 +88,14 @@ memAccessCycles += simDCache(0x364, 1); // PC Relative Load
memAccessCycles += simDCache(0x368, 1); // PC Relative Load memAccessCycles += simDCache(0x368, 1); // PC Relative Load
// Simulating I Cache for obj block 1 // Simulating I Cache for obj block 1
memAccessCycles += simICache(0x224, 40); memAccessCycles += simICache(0x224, 40);
pipelineCycles += 21;
end_43 = 0; end_43 = 0;
count = 0; count = 0;
// # SUCC: 3 [100.0%] (fallthru) // # SUCC: 3 [100.0%] (fallthru)
mainbb_3: mainbb_3:
// # PRED: 13 [100.0%] (fallthru) 14 [100.0%] (fallthru) // # PRED: 13 [100.0%] (fallthru) 14 [100.0%] (fallthru)
pipelineCycles += 9;
end_46 = end_43 + 10240; end_46 = end_43 + 10240;
if (end_43 < end_46) if (end_43 < end_46)
goto mainbb_4; goto mainbb_4;
...@@ -104,6 +108,7 @@ mainbb_4: ...@@ -104,6 +108,7 @@ mainbb_4:
memAccessCycles += simDCache((SP + 0x4), 1); // Reading Spilt Register memAccessCycles += simDCache((SP + 0x4), 1); // Reading Spilt Register
// Simulating I Cache for obj block 3 // Simulating I Cache for obj block 3
memAccessCycles += simICache(0x258, 20); memAccessCycles += simICache(0x258, 20);
pipelineCycles += 13;
i_45 = (int) end_43; i_45 = (int) end_43;
ivtmp_34 = (uintptr_t)&in_Data[i_45]; ivtmp_34 = (uintptr_t)&in_Data[i_45];
end_44 = end_43; end_44 = end_43;
...@@ -115,6 +120,7 @@ memAccessCycles += simDCache(pcmdata_addr + (2 * (end_44-end_43)), 0); ...@@ -115,6 +120,7 @@ memAccessCycles += simDCache(pcmdata_addr + (2 * (end_44-end_43)), 0);
// Simulating I Cache for obj block 4 // Simulating I Cache for obj block 4
memAccessCycles += simICache(0x26c, 36); memAccessCycles += simICache(0x26c, 36);
// TODO: UnmappedLS: Load GlobalVar in_Data at line 179 // TODO: UnmappedLS: Load GlobalVar in_Data at line 179
pipelineCycles += 16;
pcmdata[end_44 - end_43] = *(short int*)((uintptr_t)ivtmp_34); pcmdata[end_44 - end_43] = *(short int*)((uintptr_t)ivtmp_34);
i_45 = i_45 + 1; i_45 = i_45 + 1;
end_44 = (long unsigned int) i_45; end_44 = (long unsigned int) i_45;
...@@ -129,6 +135,7 @@ mainbb_6: ...@@ -129,6 +135,7 @@ mainbb_6:
// # PRED: 5 [1.0%] (false,exec) 3 [1.0%] (false,exec) // # PRED: 5 [1.0%] (false,exec) 3 [1.0%] (false,exec)
// Simulating I Cache for obj block 5 // Simulating I Cache for obj block 5
memAccessCycles += simICache(0x290, 40); memAccessCycles += simICache(0x290, 40);
pipelineCycles += 14;
adpcm_coder (&pcmdata, pcmdata_addr, &adpcmdata, adpcmdata_addr, 10240, &coder_1_state, coder_1_state_addr); adpcm_coder (&pcmdata, pcmdata_addr, &adpcmdata, adpcmdata_addr, 10240, &coder_1_state, coder_1_state_addr);
count = count + 1; count = count + 1;
if (j > count) if (j > count)
...@@ -151,6 +158,7 @@ memAccessCycles += simDCache(0x358, 1); // PC Relative Load ...@@ -151,6 +158,7 @@ memAccessCycles += simDCache(0x358, 1); // PC Relative Load
memAccessCycles += simDCache((SP + ARR_SIZE_0_addr), 1); memAccessCycles += simDCache((SP + ARR_SIZE_0_addr), 1);
// Simulating I Cache for obj block 6 // Simulating I Cache for obj block 6
memAccessCycles += simICache(0x2b8, 32); memAccessCycles += simICache(0x2b8, 32);
pipelineCycles += 19;
if (ARR_SIZE_0 % 10240 != 0) if (ARR_SIZE_0 % 10240 != 0)
goto mainbb_8; goto mainbb_8;
else else
...@@ -162,6 +170,7 @@ mainbb_8: ...@@ -162,6 +170,7 @@ mainbb_8:
memAccessCycles += simDCache(0x354, 1); // PC Relative Load memAccessCycles += simDCache(0x354, 1); // PC Relative Load
// Simulating I Cache for obj block 7 // Simulating I Cache for obj block 7
memAccessCycles += simICache(0x2d8, 24); memAccessCycles += simICache(0x2d8, 24);
pipelineCycles += 14;
start_40 = j * 10240; start_40 = j * 10240;
memAccessCycles += simDCache(ARR_SIZE_addr, 1); memAccessCycles += simDCache(ARR_SIZE_addr, 1);
end = ARR_SIZE; end = ARR_SIZE;
...@@ -177,6 +186,7 @@ memAccessCycles += simDCache(0x35c, 1); // PC Relative Load ...@@ -177,6 +186,7 @@ memAccessCycles += simDCache(0x35c, 1); // PC Relative Load
memAccessCycles += simDCache(0x360, 1); // PC Relative Load memAccessCycles += simDCache(0x360, 1); // PC Relative Load
// Simulating I Cache for obj block 8 // Simulating I Cache for obj block 8
memAccessCycles += simICache(0x2f0, 28); memAccessCycles += simICache(0x2f0, 28);
pipelineCycles += 13;
i = (int) start_40; i = (int) start_40;
ivtmp_28 = (uintptr_t)&in_Data[i]; ivtmp_28 = (uintptr_t)&in_Data[i];
D_2229 = (int) end; D_2229 = (int) end;
...@@ -189,6 +199,7 @@ memAccessCycles += simDCache(pcmdata_addr + (2 * (start-start_40)), 0); ...@@ -189,6 +199,7 @@ memAccessCycles += simDCache(pcmdata_addr + (2 * (start-start_40)), 0);
// Simulating I Cache for obj block 9 // Simulating I Cache for obj block 9
memAccessCycles += simICache(0x30c, 36); memAccessCycles += simICache(0x30c, 36);
// TODO: UnmappedLS: Inaccurately Matched Load at line 219 // TODO: UnmappedLS: Inaccurately Matched Load at line 219
pipelineCycles += 16;
pcmdata[start - start_40] = *(short int*)((uintptr_t)ivtmp_28); pcmdata[start - start_40] = *(short int*)((uintptr_t)ivtmp_28);
i = i + 1; i = i + 1;
start = (long unsigned int) i; start = (long unsigned int) i;
...@@ -206,6 +217,7 @@ memAccessCycles += simDCache(0x364, 1); // PC Relative Load ...@@ -206,6 +217,7 @@ memAccessCycles += simDCache(0x364, 1); // PC Relative Load
memAccessCycles += simDCache(0x368, 1); // PC Relative Load memAccessCycles += simDCache(0x368, 1); // PC Relative Load
// Simulating I Cache for obj block 10 // Simulating I Cache for obj block 10
memAccessCycles += simICache(0x330, 20); memAccessCycles += simICache(0x330, 20);
pipelineCycles += 11;
adpcm_coder (&pcmdata, pcmdata_addr, &adpcmdata, adpcmdata_addr, (int) (end - start_40), &coder_1_state, coder_1_state_addr); adpcm_coder (&pcmdata, pcmdata_addr, &adpcmdata, adpcmdata_addr, (int) (end - start_40), &coder_1_state, coder_1_state_addr);
// # SUCC: 12 [100.0%] (fallthru,exec) // # SUCC: 12 [100.0%] (fallthru,exec)
...@@ -214,7 +226,9 @@ mainbb_12: ...@@ -214,7 +226,9 @@ mainbb_12:
// Simulating I Cache for obj block 11 // Simulating I Cache for obj block 11
memAccessCycles += simICache(0x344, 16); memAccessCycles += simICache(0x344, 16);
printf("memAccessCycles = \%llu\n", memAccessCycles); printf("memAccessCycles = \%llu\n", memAccessCycles);
printf("pipelineCycles = \%llu\n", pipelineCycles);
cacheSimFini(); cacheSimFini();
pipelineCycles += 18;
return 0; return 0;
// # SUCC: EXIT [100.0%] // # SUCC: EXIT [100.0%]
......
...@@ -2,13 +2,15 @@ import logging ...@@ -2,13 +2,15 @@ import logging
from optparse import OptionParser from optparse import OptionParser
from subprocess import call from subprocess import call
import linecache as lc import linecache as lc
from collections import OrderedDict
from load_store_info import * from load_store_info import *
from match_cfg import match_cfg from match_cfg import match_cfg
from gdb_info import * from gdb_info import *
from cGrammar import parse_statement from cGrammar import parse_statement
from irc_regex import * from irc_regex import *
from collections import OrderedDict from pipeline_sim import *
from annotation import *
import re import re
...@@ -21,16 +23,6 @@ def find(f, seq): ...@@ -21,16 +23,6 @@ def find(f, seq):
return item return item
return None return None
class Annotation:
def __init__(self, annotation, fileName, lineNum, replace = False):
self.fileName = fileName
self.lineNum = lineNum
self.annotation = annotation
self.replace = replace
def debug(self):
logging.debug("%s:%d: %s" % (self.fileName, self.lineNum, self.annotation))
def getListLocalVarInFunc(listLocalVariables, functionName): def getListLocalVarInFunc(listLocalVariables, functionName):
listLocalVarInFunc = [] listLocalVarInFunc = []
for localVar in listLocalVariables: for localVar in listLocalVariables:
...@@ -38,19 +30,8 @@ def getListLocalVarInFunc(listLocalVariables, functionName): ...@@ -38,19 +30,8 @@ def getListLocalVarInFunc(listLocalVariables, functionName):
listLocalVarInFunc.append(localVar) listLocalVarInFunc.append(localVar)
return listLocalVarInFunc return listLocalVarInFunc
def debugDictAnnot(dictAnnot): # TODO : Make a new function to instrument the additional global vars needed!
for lineNum in dictAnnot.iterkeys(): # def annotateGlobalVar(listISCFileNames):
for annot in dictAnnot[lineNum]:
annot.debug()
def addAnnotationToDict(dict, lineNum, annot):
if lineNum not in dict:
dict[lineNum] = [annot]
else:
for a in dict[lineNum]:
if a.annotation == annot.annotation and a.fileName == annot.fileName:
return
dict[lineNum].append(annot)
def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables, listLocalVariables): def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables, listLocalVariables):
dictAnnotVarFuncDecl = {} dictAnnotVarFuncDecl = {}
...@@ -91,6 +72,11 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables, ...@@ -91,6 +72,11 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables,
addAnnotationToDict(dictAnnotVarFuncDecl, addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum, lineNum,
annot) annot)
annot_str = "unsigned long long pipelineCycles = 0;"
annot = Annotation(annot_str, ISCFileName, lineNum, False)
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
else: else:
annot_str = "extern unsigned long SP;" annot_str = "extern unsigned long SP;"
annot = Annotation(annot_str, ISCFileName, lineNum, False) annot = Annotation(annot_str, ISCFileName, lineNum, False)
...@@ -102,6 +88,11 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables, ...@@ -102,6 +88,11 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables,
addAnnotationToDict(dictAnnotVarFuncDecl, addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum, lineNum,
annot) annot)
annot_str = "extern unsigned long long pipelineCycles;"
annot = Annotation(annot_str, ISCFileName, lineNum, False)
addAnnotationToDict(dictAnnotVarFuncDecl,
lineNum,
annot)
if inMultiLineVarInit == 1: if inMultiLineVarInit == 1:
m = re_VarDeclInitMultiLineEnd.match(line) m = re_VarDeclInitMultiLineEnd.match(line)
...@@ -303,6 +294,8 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables, ...@@ -303,6 +294,8 @@ def annotateVarFuncDecl(listISCFileNames, listISCFunctions, listGlobalVariables,
debugDictAnnot(dictAnnotVarFuncDecl) debugDictAnnot(dictAnnotVarFuncDecl)
return dictAnnotVarFuncDecl return dictAnnotVarFuncDecl
# TODO : Annotate Push Pop Operations for DCache Access to Stack!
def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGlobalVariables, listLocalVariables): def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGlobalVariables, listLocalVariables):
dictAnnotLoadStore = {} dictAnnotLoadStore = {}
...@@ -445,6 +438,9 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl ...@@ -445,6 +438,9 @@ def annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGl
annot_str = 'printf("memAccessCycles = \%llu\\n", memAccessCycles);' annot_str = 'printf("memAccessCycles = \%llu\\n", memAccessCycles);'
annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False) annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False)
addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot) addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot)
annot_str = 'printf("pipelineCycles = \%llu\\n", pipelineCycles);'
annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False)
addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot)
annot_str = 'cacheSimFini();' annot_str = 'cacheSimFini();'
annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False) annot = Annotation(annot_str, funcISC.fileName, returnLineNumber-1, False)
addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot) addAnnotationToDict(dictAnnotLoadStore, returnLineNumber-1, annot)
...@@ -553,7 +549,11 @@ def instrumentCache(listISCFileNames, listObjdumpFileNames, listBinaryFileNames, ...@@ -553,7 +549,11 @@ def instrumentCache(listISCFileNames, listObjdumpFileNames, listBinaryFileNames,
dictAnnotLoadStore = annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGlobalVariables, listLocalVariables) dictAnnotLoadStore = annotateLoadStore(listISCFunctions, listObjdumpFunctions, listLSInfo, listGlobalVariables, listLocalVariables)
dictAnnotPipeline = annot_pipeline_sim(listISCFunctions, listObjdumpFunctions)
debugDictAnnot(dictAnnotPipeline)
dictAnnot = unionDict(dictAnnotVarFuncDecl, dictAnnotLoadStore) dictAnnot = unionDict(dictAnnotVarFuncDecl, dictAnnotLoadStore)
dictAnnot = unionDict(dictAnnot, dictAnnotPipeline)
generateAnnotatedSourceFiles(dictAnnot, listISCFileNames, insOutputPath) generateAnnotatedSourceFiles(dictAnnot, listISCFileNames, insOutputPath)
......
import linecache as lc
from arm_isa_regex import *
from annotation import *
ALU_LAT = 1
MUL_LAT = 1
LDST_LAT = 1
ALU_RES_LAT = 1 - ALU_LAT
MUL_RES_LAT = 4 - MUL_LAT
LDST_RES_LAT = 3 - LDST_LAT
def find(f, seq):
"""Return first item in sequence where f(item) == True."""
for item in seq:
if f(item):
return item
def annot_pipeline_sim(listISCFunctions,
listObjdumpFunctions):
'''
In this function, we simulate pipeline for each basic block in the Objdump.
We assume that :
* Each basic block is independent, and is cold started ie. no instruction
midway in pipeline.
* For each Load/Store Instruction L1 Data Hit occurs.
Pipeline Structure
* 8 stage pipeline
* 2 Instruction Fetch Stages
* 2 Instruction Decode Stages
* 4 Parallel Stages for
* Arithmetic Operations : SH, ALU, SAT, WB
* Multiply Operations : MAC1, MAC2, MAC3
* Load Store Unit : ADD, DC1, DC2, WB
Definitions:
* Result Latency : Number of cycles required for the result of this
instruction to be available at the start of ALU, MAC2 or DC1 stages
of the next instruction.
* Early Reg : Register required at the start of SH, MAC1 or ADD stages.
One cycle must be added to result latency of instruction producing this
register for interlock calculations.
* Late Reg : Register required in second stage of execution pipeline. One
cycle must be subtracted from result latency of instruction producing
this register for interlock calculations.
Load/Store Instructions
* Result Latency : 3 cycles
ADD/MOV Inst:
* Result Latency : 1 cycle
MUL Inst
* Result Latency : avg. 4 cycles (varies)
'''
dictAnnotPipeline = {}
for funcObj in listObjdumpFunctions:
funcISC = find(lambda fn: fn.functionName == funcObj.functionName,
listISCFunctions)
for blockObj in funcObj.cfg.listBlocks:
#initialize some state registers
prevOpLoadStore = False
prevDestReg = None
currBlockCycles = 7; # For filling the pipeline on cold start
for lineNumObj in range(blockObj.startLine, blockObj.endLine + 1):
lineObj = lc.getline(funcObj.fileName, lineNumObj)
# Initialize some state Registers
opcode = ""
destReg = ""
op1Reg = ""
op2 = ""
op2RegIsShifted = False
m = re_instruction.match(lineObj)
assert(m is not None)
instObj = m.group("instruction")
m = re_arithInst.match(instObj)
if m is not None:
opcode = m.group("arithOpcode")
destReg = m.group("destReg")
op1Reg = m.group("op1Reg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
if opcode not in ["mul", "mla"]:
# Add Instruction
currBlockCycles= currBlockCycles + ALU_LAT
prevResLat = ALU_RES_LAT
else:
# Multiply Instruction
currBlockCycles = currBlockCycles + MUL_LAT
prevResLat = MUL_RES_LAT
# Calculation Interlock for Add and Mul Instructions
if prevDestReg is not None:
if op2 is not "":
if (op2RegIsShifted == True):
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
elif (op1Reg == prevDestReg):
# Late Reg!
currBlockCycles = currBlockCycles + prevResLat - 1
else:
# No interlock
pass
else: # (op2RegIsShifted == False)
if (op1Reg == prevDestReg or
op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
continue
m = re_movInst.match(instObj)
if m is not None:
destReg = m.group("destReg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
currBlockCycles= currBlockCycles + ALU_LAT
# Calculation Interlock for Add and Mul Instructions
if prevDestReg is not None:
if op2 is not "":
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
prevResLat = ALU_RES_LAT
continue
m = re_mvnInst.match(instObj)
if m is not None:
destReg = m.group("destReg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
currBlockCycles= currBlockCycles + ALU_LAT
# Calculation Interlock for Add and Mul Instructions
if prevDestReg is not None:
if op2 is not "":
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
prevResLat = ALU_RES_LAT
continue
m = re_arithLongInst.match(instObj)
if m is not None:
# Long Arithmetic Instructions
currBlockCycles = currBlockCycles + 2 * MUL_RES_LAT
prevDestReg = None
prevResLat = 0
# TODO: This needs to be improved!
continue
m = re_logicInst.match(instObj)
if m is not None:
# Logical Instruction
# opcode = m.group("logicOpcode")
destReg = m.group("destReg")
op1Reg = m.group("op1Reg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
currBlockCycles = currBlockCycles + ALU_RES_LAT
# Calculating Interlock
if prevDestReg is not None:
if op2 is not "":
if (op2RegIsShifted == True):
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
elif (op1Reg == prevDestReg):
# Late Reg!
currBlockCycles = currBlockCycles + prevResLat - 1
else:
# No interlock
pass
else: # (op2RegIsShifted == False)
if (op1Reg == prevDestReg or
op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
prevResLat = ALU_RES_LAT
continue
# Calculating Interlock
m = re_shiftInst.match(instObj)
if m is not None:
# Shift Instruction
# opcode = m.group("shiftOpcode")
destReg = m.group("destReg")
op1Reg = m.group("op1Reg")
currBlockCycles = currBlockCycles + ALU_RES_LAT
# Calculation Interlock
if prevDestReg is not None:
if prevDestReg == op1Reg:
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No Interlock!
pass
prevDestReg = destReg
prevResLat = ALU_RES_LAT
continue
m = re_branchInst.match(instObj)
if m is not None:
# Branch Instruction
currBlockCycles = currBlockCycles + ALU_RES_LAT
prevDestReg = None
prevResLat = 0
# TODO: May need to be improved!
continue
m = re_cmpInst.match(instObj)
if m is not None:
# Compare Instruction
op1Reg = m.group("op1Reg")
if m.group("op2RegShifted") is not None:
op2 = m.group("op2RegShifted")
op2RegIsShifted = True
elif m.group("op2Reg") is not None:
op2 = m.group("op2Reg")
op2RegIsShifted = False
else:
assert(m.group("op2ImedVal") is not None)
op2 = ""
op2RegIsShifted = False
currBlockCycles = currBlockCycles + ALU_RES_LAT
# Calculating Interlock
if prevDestReg is not None:
if op2 is not "":
if (op2RegIsShifted == True):
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
elif (op1Reg == prevDestReg):
# Late Reg!
currBlockCycles = currBlockCycles + prevResLat - 1
else:
# No interlock
pass
else: # (op2RegIsShifted == False)
if (op1Reg == prevDestReg or
op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = None
prevResLat = 0
continue
m = re_pushInst.match(instObj)
if m is not None:
pushRegs = m.group("pushRegs")
listPushRegs = pushRegs.split(",")
currBlockCycles = currBlockCycles + len(listPushRegs)
prevDestReg = None
prevResLat = 0
# TODO: May need to be fixed!
continue
m = re_popInst.match(instObj)
if m is not None:
pushRegs = m.group("popRegs")
listPushRegs = pushRegs.split(",")
currBlockCycles = currBlockCycles + len(listPushRegs)
prevDestReg = None
prevResLat = 0
# TODO: May need to be fixed!
continue
m = re_ignoredInst.match(instObj)
if m is not None:
currBlockCycles = currBlockCycles + LDST_LAT
prevDestReg = None
prevResLat = 0
# TODO: Has to be improved!!!
continue
m = re_loadInst.match(instObj)
if m is not None:
destReg = m.group("destReg")
for baseRegLabel in ["am2_1BaseReg",
"am2_2BaseReg",
"am2_3BaseReg",
"am2_4BaseReg",
"am2_5BaseReg",
"am2_6BaseReg",
"am2_7BaseReg"]:
if m.group(baseRegLabel) is not None:
break
op1Reg = m.group(baseRegLabel)
op2 = ""
if op1Reg == "am2_3BaseReg":
op2 = m.group("am2_3OffsetReg")
op2RegIsShifted = False
elif op1Reg == "am2_4BaseReg":
op2 = m.group("am2_4OffsetReg")
op2RegIsShifted = True
elif op1Reg == "am2_6BaseReg":
op2 = m.group("am2_6OffsetReg")
op2RegIsShifted = False
elif op1Reg == "am2_7BaseReg":
op2 = m.group("am2_7OffsetReg")
op2RegIsShifted = True
currBlockCycles = currBlockCycles + LDST_LAT
if prevDestReg is not None:
if op2 is not "":
if (op2RegIsShifted == True):
if (op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
elif (op1Reg == prevDestReg):
# Late Reg!
currBlockCycles = currBlockCycles + prevResLat - 1
else:
# No interlock
pass
else: # (op2RegIsShifted == False)
if (op1Reg == prevDestReg or
op2 == prevDestReg):
# Early Reg!
currBlockCycles = currBlockCycles + prevResLat + 1
else:
# No interlock
pass
prevDestReg = destReg
prevResLat = LDST_RES_LAT
continue
m = re_storeInst.match(instObj)
if m is not None:
currBlockCycles = currBlockCycles + LDST_LAT
prevDestReg = None
prevResLat = 0
continue
print "%d : Instruction Could not be identified!" % (lineNumObj)
# Block Done!
blockIndISC = blockObj.mapsTo[0]
blockISC = funcISC.cfg.listBlocks[blockIndISC]
annot_str = "pipelineCycles += %d;" % (currBlockCycles)
annot = Annotation(annot_str,
funcISC.fileName,
blockISC.startLine,
replace = False)
print ("Adding annotation to %s:%d : %s" % (funcISC.fileName,
blockISC.startLine-1,
annot_str))
addAnnotationToDict(dictAnnotPipeline,
blockISC.startLine-1,
annot)
# Function Done!
# All Functions Done!
return dictAnnotPipeline
if __name__ == "__main__":
pass
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment