Commit bf591060 authored by Gaurav Kukreja's avatar Gaurav Kukreja

Arm Emulator working so awesome

Signed-off-by: Gaurav Kukreja's avatarGaurav Kukreja <gaurav@gauravk.in>
parent 7a09307f
import re
import logging
from arm_isa_regex import *
class Register:
def __init__(self, name):
self.name = name
self.isValid = False
self.value = 0
def setInvalid(self):
self.isValid = False
def setValue(self, value):
self.valid = True
self.value = value
# List of names of General Purpose Registers
listGPRegNames = ["r0",
"r1",
"r2",
"r3",
"r4",
"r5",
"r6",
"r7",
"r8",
"r9",
"sl",
"fp",
"ip",
"sp",
"lr",
"pc"]
INIT_STACK_POINTER_VAL = 0xffffffff
class ArmEmulator:
def __init__(self, dictGlobVarAddAtTableAddress):
self.reg = {}
for regName in listGPRegNames:
self.reg[regName] = Register(regName)
# For our emulation purpose, we don't care what the actual value of SP
# is. However, we do want to initialize the SP so that we can
# identify local variable load/store operations.
self.reg["sp"].setValue(INIT_STACK_POINTER_VAL)
self.dictGlobVarAddAtTableAddress = dictGlobVarAddAtTableAddress
def printRegisters(self):
for regName in listGPRegNames:
print "%s = %d" % (regName, self.reg[regName].value)
def setDictGlobVarTable(self, dictGlobVarAddAtTableAddress):
self.dictGlobVarAddAtTableAddress = dictGlobVarAddAtTableAddress
def emulate(self, inst):
m = re_movInst.match(inst)
if m is not None:
destReg = m.group("destReg")
if m.group("op2ImedVal") is not None:
self.reg[destReg].setValue(int(m.group("op2ImedVal")))
logging.debug("\t %s = %d" % (destReg, self.reg[destReg].value))
return 0
elif m.group("op2Reg") is not None:
self.reg[destReg].setValue(self.reg[m.group("op2Reg")].value)
logging.debug("\t %s = %s (= %d)" % (destReg, m.group("op2Reg"),
self.reg[destReg].value))
return 0
elif m.group("op2RegShifted") is not None:
self.reg[destReg].setInvalid()
logging.debug("\t Move Instruction with Shifted Operand Ignored!")
return 0
else:
logging.error("\t Move instruction does not match any format!")
return -1
m = re_mvnInst.match(inst)
if m is not None:
destReg = m.group("destReg")
if m.group("op2ImedVal") is not None:
self.reg[destReg].setValue(~int(m.group("op2ImedVal")))
logging.debug("\t %s = %d" % (destReg, self.reg[destReg].value))
return 0
elif m.group("op2Reg") is not None:
self.reg[destReg].setValue(~self.reg[m.group("op2Reg")].value)
logging.debug("\t %s = %s (= %d)" % (destReg, m.group("op2Reg"),
self.reg[destReg].value))
return 0
elif m.group("op2RegShifted") is not None:
self.reg[destReg].setInvalid()
logging.debug("\t Move Instruction with Shifted Operand Ignored!")
return 0
else:
logging.error("\t Move instruction does not match any format!")
return -1
m = re_arithInst.match(inst)
if m is not None:
opcode = m.group("arithOpcode")
destReg = m.group("destReg")
op1Reg = m.group("op1Reg")
if opcode in ["add", "adc"]:
if m.group("op2ImedVal") is not None:
self.reg[destReg].setValue(self.reg[op1Reg].value + int(m.group("op2ImedVal")))
logging.debug("\t %s = %s + %d ( = %d )" % (destReg, op1Reg,
int(m.group("op2ImedVal")),
self.reg[destReg].value))
return 0
elif m.group("op2Reg") is not None:
self.reg[destReg].setValue(self.reg[op1Reg].value + self.reg[m.group("op2Reg")].value)
logging.debug("\t %s = %s + %s ( = %d )" % (destReg, op1Reg,
m.group("op2Reg"),
self.reg[destReg].value))
return 0
elif m.group("op2RegShifted") is not None:
self.reg[destReg].setInvalid()
logging.debug("\t Add Instruction with shifted operand2 ignored!")
return 0
else:
logging.error("\t Add Instruction, but operand 2 does not match expected format!")
return -1
elif opcode in ["sub", "sbc", "rsb", "rsc"]:
if m.group("op2ImedVal") is not None:
self.reg[destReg].setValue(self.reg[destReg].value - int(m.group("op2ImedVal")))
logging.debug("\t %s = %s - %d ( = %d )" % (destReg, op1Reg,
int(m.group("op2ImedVal")),
self.reg[destReg].value))
return 0
elif m.group("op2Reg") is not None:
self.reg[destReg].setValue(self.reg[destReg].value - self.reg[m.group("op2Reg")].value)
logging.debug("\t %s = %s - %s ( = %d )" % (destReg, op1Reg,
m.group("op2Reg"),
self.reg[destReg].value))
return 0
elif m.group("op2RegShifted") is not None:
self.reg[destReg].setInvalid()
logging.debug("\t Sub Instruction with shifted operand2 ignored!")
return 0
else:
logging.error("\t Sub Instruction, but operand 2 does not match expected format!")
return -1
else:
# All other arithmetic instructions, like mul etc.
# These instructions shouldn't matter for us, so ignore them.
self.reg[destReg].setInvalid()
logging.debug("\t Arithmetic Instruction ignored!")
return 0
m = re_arithLongInst.match(inst)
if m is not None:
destRegHi = m.group("destRegHi")
destRegLow = m.group("destRegLow")
self.reg[destRegHi].setInvalid()
self.reg[destRegLow].setInvalid()
logging.debug("\t Ignoring Arithmetic long instructions.")
return 0
m = re_shiftInst.match(inst)
if m is not None:
destReg = m.group("destReg")
self.reg[destReg].setInvalid()
logging.debug("\t Ignoring Shift Instruction.")
return 0
m = re_logicInst.match(inst)
if m is not None:
destReg = m.group("destReg")
self.reg[destReg].setInvalid()
logging.debug("\t Logical Instruction Ignored!")
return 0
m = re_branchInst.match(inst)
if m is not None:
# Ignore the branch instructions
logging.debug("\t Branch Instruction Ignored!")
return 0
m = re_loadInst.match(inst)
if m is not None:
for baseRegLabel in ["am2_1BaseReg",
"am2_2BaseReg",
"am2_3BaseReg",
"am2_4BaseReg",
"am2_5BaseReg",
"am2_6BaseReg",
"am2_7BaseReg"]:
if m.group(baseRegLabel) is not None:
break
destReg = m.group("destReg")
srcBaseReg = m.group(baseRegLabel)
if srcBaseReg == "pc":
# PC Relative Load Instruction to load an address of Global Variable to register
# Note: It could also be loading some long value which was stored at the end
# of the function!
comment = m.group("comment")
m = re.match("\s*(?P<address>[a-f0-9]*)\s*", comment)
if m is not None:
address = int(m.group("address"), 16)
else:
logging.error("\t Load PC Relative, comment could not be matched!")
return -1
if address in self.dictGlobVarAddAtTableAddress:
globVarAddress = self.dictGlobVarAddAtTableAddress[address]
self.reg[destReg].setValue(globVarAddress)
logging.debug("\t %s = %d (address of global var)" %
(destReg, self.reg[destReg].value))
return 0
else:
logging.error("\t Load PC Relative address could not be found in table!")
return -1
else:
# Ignoring other load instructions
logging.debug("\t Ignoring Load Instruction (not PC Relative).")
return 0
m = re_storeInst.match(inst)
if m is not None:
logging.debug("\t Store Instruction ignored!")
return 0
m = re_cmpInst.match(inst)
if m is not None:
logging.debug("\t Compare Instruction ignored!")
return 0
m = re_pushInst.match(inst)
if m is not None:
pushRegs = m.group("pushRegs")
numRegs = pushRegs.count(',') + 1
self.reg["sp"].setValue(self.reg["sp"].value + (numRegs * 4))
logging.debug("\t Push Instruction, sp incremented by %d ( = %d )" %
((numRegs * 4), self.reg["sp"].value))
return 0
m = re_popInst.match(inst)
if m is not None:
popRegs = m.group("popRegs")
numRegs = popRegs.count(',') + 1
self.reg["sp"].setValue(self.reg["sp"].value - (numRegs * 4))
logging.debug("\t Pop Instruction, sp decremented by %d ( = %d )" %
((numRegs * 4), self.reg["sp"].value))
return 0
m = re_ignoredInst.match(inst)
if m is not None:
opcode = m.group("ignoredOpcode")
logging.debug("\t %s instruction being ignored" % opcode)
return 0
# If here, instruction was unable to be matched!
logging.error("\t Could Not match the instruction! ********************")
logging.error("\t %s" % inst)
return -1
\ No newline at end of file
import re
EndLine = "\s*;\s*(?P<comment>.*)$|$"
Cond = "eq|ne|cs|hs|lo|cc|mi|pl|hi|ls|ge|lt|gt|le"
Reg = "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc"
ShiftOpcode = "(?:lsl|lsr|asr|ror|rrx)s?"
Operand2 = "#(?P<op2ImedVal>-?\d*)|(?P<op2Reg>%s)|(?P<op2RegShifted>%s),\s*(?:%s)\s*(?:#-?\d*|(?:%s))" % (Reg, Reg, ShiftOpcode, Reg)
re_movInst = re.compile("\s*(?:mov)s?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)" %
(Cond, Reg, Operand2, EndLine))
re_mvnInst = re.compile("\s*(?:mvn)s?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)" %
(Cond, Reg, Operand2, EndLine))
ArithOpcode = "(?P<arithOpcode>add|adc|sub|sbc|rsb|rsc|mul|mla)s?" # There are more that I have ignored for now
re_arithInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*(?:%s)(?:%s)" %
(ArithOpcode, Cond, Reg, Reg, Operand2, EndLine))
ArithLongOpcode = "(?P<arithLongOpcode>umull|umlal|smull|smlal)"
re_arithLongInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destRegLow>%s),\s*(?P<destRegHi>%s),\s*(?:%s),\s*(?:%s)(?:%s)" %
(ArithLongOpcode, Cond, Reg, Reg, Reg, Reg, EndLine))
LogicOpcode = "(?P<logicOpcode>and|eor|orr|bic)s?"
re_logicInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*(?:%s)(?:%s)" %
(LogicOpcode, Cond, Reg, Reg, Operand2, EndLine))
re_shiftInst = re.compile("\s*(?:%s)(?:%s)?\s*(?P<destReg>%s),\s*(?P<op1Reg>%s),\s*#(?P<op2ImedVal>\d*)(?:%s)" %
(ShiftOpcode, Cond, Reg, Reg, EndLine))
BranchOpcode = "(?P<branchOpcode>b|bl|bx|blx|bxj)"
BranchTarget = "(?P<branchTarget>[a-f0-9]*)"
Label = "\<(?P<labelFunction>\w*)(?:\+0x[a-f0-9]*)?\>"
re_branchInst = re.compile("\s*(?:%s)(?:%s)?\s*(?:(?:%s)|(?:%s))\s*(?:%s)?(?:%s)" %
(BranchOpcode, Cond, Reg, BranchTarget, Label, EndLine))
# AMode2 = "\[(?P<lsop2BaseReg>%s)(?:(?:,\s*(?:(?P<imedOffset>#-?\d*)|(?P<offsetReg>-?%s(?:,\s*(?:%s)\s*#\d*)?)))?\])|(?:(?:\](?:,\s*(?:#\d*)|(?:%s(?:,\s*(?:%s),\s*#\d*)?))?))" % (Reg, Reg, ShiftOpcode, Reg, ShiftOpcode)
AMode2_1 = "\[(?P<am2_1BaseReg>%s)\]" % (Reg)
AMode2_2 = "\[(?P<am2_2BaseReg>%s),\s*#(?P<am2_2ImedOff>-?\d*)\]" % (Reg)
AMode2_3 = "\[(?P<am2_3BaseReg>%s),\s*(?P<am2_3OffsetReg>-?%s)\]" % (Reg, Reg)
AMode2_4 = "\[(?P<am2_4BaseReg>%s),\s*(?P<am2_4OffsetReg>%s),\s*(?:%s)\s*#\d*\]" % (Reg, Reg, ShiftOpcode)
AMode2_5 = "\[(?P<am2_5BaseReg>%s)\],\s*#(?P<am2_5ImedOff>-?\d*)" % (Reg)
AMode2_6 = "\[(?P<am2_6BaseReg>%s)\],\s*-?(?:%s)" % (Reg, Reg)
AMode2_7 = "\[(?P<am2_7BaseReg>%s)\],\s*(?:%s),\s*(?:%s)\s*#\d*" % (Reg, Reg, ShiftOpcode)
AMode2 = ("(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)|(?:%s)" % (AMode2_1,
AMode2_2,
AMode2_3,
AMode2_4,
AMode2_5,
AMode2_6,
AMode2_7))
LoadStoreType = "t|b|bt|sb|h|sh|d"
LoadStoreOp2 = ("\[(?P<lsop2BaseReg>%s)(?:,\s*(?P<lsop2Index>%s))?\]" % (Reg, Operand2))
re_loadInst = re.compile("\s*ldrs?(?:%s)?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)" %
(LoadStoreType, Cond, Reg, AMode2, EndLine))
re_storeInst = re.compile("\s*strs?(?:%s)?(?:%s)?\s*(?P<destReg>%s),\s*(?:%s)(?:%s)" %
(LoadStoreType, Cond, Reg, AMode2, EndLine))
re_cmpInst = re.compile("\s*(?:cmp|cmn)\s*(?:%s),\s*(?:%s)(?:%s)" %
(Reg, Operand2, EndLine))
re_pushInst = re.compile("\s*push\s*\{(?P<pushRegs>(?:%s)(?:,\s*(?:%s))*)\}(?:%s)" %
(Reg, Reg, EndLine))
re_popInst = re.compile("\s*pop\s*\{(?P<popRegs>(?:%s)(?:,\s*(?:%s))*)\}(?:%s)" %
(Reg, Reg, EndLine))
IgnoredOpcode = "(?P<ignoredOpcode>stc|ldc|stm|tst)"
re_ignoredInst = re.compile("\s*(?:%s).*" % (IgnoredOpcode))
import re
import logging
from optparse import OptionParser
from subprocess import call
......@@ -6,6 +5,7 @@ import linecache as lc
from instrument import *
from match_cfg import match_cfg
from armEmulate import *
def find(f, seq):
......@@ -101,7 +101,6 @@ def getGlobalVariablesInfoFromGDB(listBinaryFileNames):
debugListGlobalVariables(listGlobalVariables)
return listGlobalVariables
re_instructionLineObj = re.compile('\s*([0-9a-f]*):\s*([0-9a-f]*)\s*(.*)')
re_loadPCRelative = re.compile("\s*ldr\w*\s*([\w]{2}),\s*\[pc,\s#[\d]*\]\s*;\s*([a-fA-F0-9]*)\s*<\w*\+0x[a-fA-F0-9]*>")
re_loadStoreSPRelative = re.compile("\s*((?:str\w*)|(?:ldr\w*))\s*(\w{2}),\s*\[sp(?:,\s*#(\d*))?\]")
......@@ -111,219 +110,104 @@ re_addSubInst = re.compile("\s*((?:add)|(?:sub))(?:\w{2})?\s*(\w{2}),\s*(.*),\s*
re_instObj = re.compile("\s*(\w*)\s*(\w*)((?:,\s*[\w\[\]\#]*)*)")
def mapRegToVar(dictGlobalVarAddInReg, fileNameObj, lsLineNum, regName):
if regName in dictGlobalVarAddInReg:
globalVar = dictGlobalVarAddInReg[regName]
return globalVar
else:
READ_BACKWARDS_INST_THRESHOLD = 20
lineNum = lsLineNum
# Check if register can be mapped to a global variable by reading instructions above
while lineNum > (lsLineNum - READ_BACKWARDS_INST_THRESHOLD):
lineNum = lineNum - 1
line = lc.getline(fileNameObj, lineNum)
m = re_instructionLineObj.match(line)
if m is not None:
inst = m.group(3)
instAdd = int(m.group(1), 16)
m = re_instObj.match(inst)
if m is not None:
targetReg = m.group(2)
operands = m.group(3)
if targetReg == regName:
# find all operands that are registers (\w{2})
listRegOperands = re.findall("\s*,\s*(\w{2})", operands)
for regOperand in listRegOperands:
if regOperand in dictGlobalVarAddInReg:
globalVar = dictGlobalVarAddInReg[regOperand]
return globalVar
else:
continue
else:
break
else:
def getGlobalVariableAddressTableForFunc(funcObj):
dictGlobVarAddAtTableAddress = {}
lineNumObj = funcObj.endLine
while True:
lineObj = lc.getline(funcObj.fileName, lineNumObj)
m = re_instructionLineObj.match(lineObj)
if m is not None:
inst = m.group(3)
m1 = re_returnInst.match(inst)
if m1 is not None:
break
else:
instAdd = int(m.group(1), 16)
globalVarAdd = int(m.group(2), 16)
dictGlobVarAddAtTableAddress[instAdd] = globalVarAdd
lineNumObj = lineNumObj - 1
else:
break
return dictGlobVarAddAtTableAddress
logging.debug("\t %d: Could not map register \"%s\" to a global var." %
(lsLineNum, regName))
# if here, the register could not be matched to a global var using above analysis
return None
def instrument_cache(listISCFileNames, listObjdumpFileNames, listBinaryFileNames):
'''
Algorithm
To make our process simple, we are planning to use only Global Variables.
We will modify the benchmark program, to make all local variables as global.
This will only be possible, if the function is not recursive and won't have
a huge impact on performance unless there are a number of functions that are
called, and each have a large number of local variables.
1. Extract info of Global Variables from GDB.
2. Perform Matching of Control Flow Graphs.
3. For each funcObj in list of Objdump functions,
a. Find corresponding funcISC.
b. For each line in funcObj,
1. look for load and store instructions. Each load and store
should be either,
a. PC Relative Load/Store (Global Variable)
b. Stack Pointer Relative Load/Store (Local Variable)
PS. This is when we don't consider heap/dynamically allocated
memory.
2. PC Relative Load/Store:
a. Find the address of the address of global variable.
b. Find name of the global variable being fetched from memory.
c. In the mapping basic block in ISC, look for an instruction
where this global variable is being accessed. If found,
1. Look if this variable is an array and is being accessed
with an index.
a. TODO: Find a good way, to check if the current basic
block is part of a loop.
b. If yes, we need to find the index variable in the
source code.
c. Annotate the memory access at appropriate line in
the source code.
'''
def instrumentCache(listISCFileNames, listObjdumpFileNames, listBinaryFileNames):
(listISCFunctions, listObjdumpFunctions) = match_cfg(listISCFileNames,
listObjdumpFileNames,
listBinaryFileNames)
listGlobalVariables = getGlobalVariablesInfoFromGDB(listBinaryFileNames)
listGlobalVariables = getGlobalVariablesInfoFromGDB(listBinaryFileNames)
for funcObj in listObjdumpFunctions:
dictGlobalVarAtPCRelativeAdd = {}
dictGlobalVarAddInReg = {}
fileNameObj = funcObj.fileName
funcISC = find(lambda fn: fn.functionName == funcObj.functionName, listISCFunctions)
fileNameISC = funcISC.fileName
logging.debug("")
logging.debug(" Function: %s" % funcObj.functionName)
# Read addresses of all global variables attached at the end of the
# function, by reading backwards until return instruction is found
logging.debug(" Table of Global Variable Addresses:")
lineNumObj = funcObj.endLine
while True:
lineObj = lc.getline(fileNameObj, lineNumObj)
m = re_instructionLineObj.match(lineObj)
if m is not None:
inst = m.group(3)
m1 = re_returnInst.match(inst)
if m1 is not None:
break
else:
instAdd = int(m.group(1), 16)
globalVarAdd = int(m.group(2), 16)
globalVar = find(lambda var: var.address == globalVarAdd,
listGlobalVariables)
if globalVar is not None:
dictGlobalVarAtPCRelativeAdd[instAdd] = globalVar
logging.debug("\t \"%s\" at address %x." % (globalVar.name, globalVar.address))
else:
logging.error("\t Global Var with address %x not found." % globalVarAdd)
lineNumObj = lineNumObj - 1
else:
break
assert(funcISC is not None)
dictGlobVarAddAtTableAddress = getGlobalVariableAddressTableForFunc(funcObj)
armEmu = ArmEmulator(dictGlobVarAddAtTableAddress)
logging.debug(" Load Store Instructions:")
for lineNumObj in range(funcObj.startLine, funcObj.endLine+1):
lineObj = lc.getline(fileNameObj, lineNumObj)
for lineNumObj in range(funcObj.startLine, funcObj.endLine + 1):
lineObj = lc.getline(funcObj.fileName, lineNumObj)
m = re_instructionLineObj.match(lineObj)
if m is not None:
instAdd = int(m.group(1), 16)
instAdd = m.group(1)
instObj = m.group(3)
ret = armEmu.emulate(instObj)
if ret == -1:
logging.debug("\t %d: Instruction could not be emulated!" % lineNumObj)
return -1
# Load PC Relative - Global Variables
m = re_loadPCRelative.match(instObj)
m = re_loadInst.match(instObj)
if m is not None:
globalVarAddInReg = m.group(1)
globalVarTabAddress = int(m.group(2), 16)
if globalVarTabAddress in dictGlobalVarAtPCRelativeAdd:
globalVar = dictGlobalVarAtPCRelativeAdd[globalVarTabAddress]
else:
logging.debug("\t %d: PC Relative Address was loaded, but corresponding global var was not found. Ignoring!")
continue
dictGlobalVarAddInReg[globalVarAddInReg] = globalVar
logging.debug("\t %d: %s = address of global var \"%s\"" %
(lineNumObj, globalVarAddInReg, globalVar.name))
continue
m = re_loadStoreSPRelative.match(instObj)
if m is not None:
lsOpcode = m.group(1)
lsReg = m.group(2)
if m.group(3) is None:
lsSPIndexVal = 0
else:
lsSPIndexVal = int(m.group(3))
logging.debug("\t %d: Local Variable was accessed at address sp+%d." %
(lineNumObj, lsSPIndexVal))
continue
# TODO: Look for SP relative address being loaded into a register
# It can be done by moving sp to a register, or by
# adding/subtracting from sp and storing to a register.
# Map the address to a local variable
m = re_loadStoreInst.match(instObj)
if m is not None:
lsOpcode = m.group(1)
lsReg = m.group(2)
lsAddReg1 = m.group(3)
lsAddReg2 = m.group(4)
if lsAddReg2 == None:
# See if we know address of which global var is stored in lsAddReg1
# TODO: Look for above instructions to see where lsAddReg1 comes from.
globalVar = mapRegToVar(dictGlobalVarAddInReg,
fileNameObj,
lineNumObj,
lsAddReg1)
for baseRegLabel in ["am2_1BaseReg",
"am2_2BaseReg",
"am2_3BaseReg",
"am2_4BaseReg",
"am2_5BaseReg",
"am2_6BaseReg",
"am2_7BaseReg"]:
if m.group(baseRegLabel) is not None:
break
baseReg = m.group(baseRegLabel)
if baseReg == "pc":
comment = m.group("comment")
m_comment = re.match("\s*([0-9a-f]*)\s*\<.*\>", comment)
assert(m_comment)
addInTable = int(m_comment.group(1), 16)
assert(addInTable in dictGlobVarAddAtTableAddress)
address = dictGlobVarAddAtTableAddress[addInTable]
globalVar = find(lambda var: var.address == address,
listGlobalVariables)
if globalVar is None:
logging.error("\t %s:%d: Could not match load/store instruction with variable"
(fileNameObj, lineNumObj))
return -1
else:
baseAddReg = lsAddReg1
logging.debug(" PC Relative Load not for Global Var, probably for long value!")
continue
# TODO: Verify the global variable being accessed by looking at source code
logging.debug("\t %d: %s = value of global var \"%s\" (pointed to by %s)" %
(lineNumObj, lsReg, globalVar.name, baseAddReg))
logging.debug(" %d: Load Address of Global Var %s" %
(lineNumObj, globalVar.name))
continue
else:
globalVar = mapRegToVar(dictGlobalVarAddInReg,
fileNameObj,
lineNumObj,
lsAddReg1)
baseRegVal = armEmu.reg[baseReg].value
globalVar = find(lambda var: var.address == baseRegVal,
listGlobalVariables)
if globalVar is not None:
baseAddReg = lsAddReg1
indexReg = lsAddReg2
logging.debug(" %d: Loading content of Global Var %s" %
(lineNumObj, globalVar.name))
continue
elif baseRegVal > armEmu.reg["sp"]:
logging.debug(" %d: Accessing some local variable" %
(lineNumObj))
# TODO: Which Local Variable?
continue
else:
globalVar = mapRegToVar(dictGlobalVarAddInReg,
fileNameObj,
lineNumObj,
lsAddReg2)
if globalVar is not None:
baseAddReg = lsAddReg2
indexReg = lsAddReg1
else:
logging.error("\t %d: Could not match load/store instruction with variable" %
(lineNumObj))
if globalVar.length == 1:
logging.error("From objdump, it looks like an indexed access of array, but global variable is not an array (based on info from GDB)!")
quit
# TODO: Look for Indexed array access in matching block in Source Code
logging.debug("\t %d: %s = value of element in global var %s (pointed to by %s, indexed by %s)." %
(lineNumObj, lsReg, globalVar.name, baseAddReg, indexReg))
logging.debug(" %d: %s" % (lineNumObj, instObj))
continue
else:
logging.error(" %d: Instruction does not match!")
return -1
armEmu.printRegisters()
if __name__ == "__main__":
......@@ -354,14 +238,4 @@ if __name__ == "__main__":
listObjdumpFileNames = options.listObjdumpFileNames
listBinaryFileNames = options.listBinaryFileNames
# (listISCFunctions, listObjdumpFunctions) = map_cfg(listISCFileNames,
# listObjdumpFileNames,
# listBinaryFileNames)
# getGlobalVariablesInfoFromGDB(listBinaryFileNames)
# instrument_cache(listISCFileNames, listISCFunctions,
# listObjdumpFileNames, listObjdumpFunctions,
# listBinaryFileNames)
#
instrument_cache(listISCFileNames, listObjdumpFileNames, listBinaryFileNames)
instrumentCache(listISCFileNames, listObjdumpFileNames, listBinaryFileNames)
\ No newline at end of file
import re
import logging
from optparse import OptionParser
from subprocess import call
import linecache as lc
from instrument import *
from match_cfg import match_cfg
def find(f, seq):
"""Return first item in sequence where f(item) == True."""
for item in seq:
if f(item):
return item
def debugListGlobalVariables(listGlobalVariables):
print ""
for globVar in listGlobalVariables:
print ("%s\t\t0x%x\t\t(type=%s; length=%d) - %s:%d" %
(globVar.name, globVar.address, globVar.type, globVar.length,
globVar.file, globVar.lineNum))
print ""
def getGlobalVariablesInfoFromGDB(listBinaryFileNames):
re_AllDefinedVariables = re.compile("All Defined Variables:")
re_File = re.compile("File\s(.*):")
re_Variable = re.compile("((?:[\w_]*\s)*)([\w_]*)(?:\[([0-9]*)\])*;")
re_VarAdd = re.compile("Symbol \"([\w_]*)\" is static storage at address ([0-9a-fA-Fx]*).")
listGlobalVariables = []
for fileName in listBinaryFileNames:
# Fetch Global Variable Names from this file
gdbXFileName = fileName + ".globalVarNames.gdbx"
gdbXFile = open(gdbXFileName, 'w')
command = "info variables\n"
gdbXFile.write(command)
gdbXFile.write("quit\n")
gdbXFile.close()
gdbGlobalVarNamesOutputFileName = fileName + ".globalVarNames.gdbo"
gdbGlobalVarNamesOutputFile = open(gdbGlobalVarNamesOutputFileName, 'w')
call(args=["gdb", "--quiet", "--command="+gdbXFileName, fileName],
stdout=gdbGlobalVarNamesOutputFile)
gdbGlobalVarNamesOutputFile.close()
gdbGlobalVarNamesOutputFile = open(gdbGlobalVarNamesOutputFileName, 'r')
currFileName = ""
currListGlobalVariables = []
for line in gdbGlobalVarNamesOutputFile:
m = re_File.match(line)
if m is not None:
currFileName = m.group(1)
m = re_Variable.match(line)
if m is not None:
dataType = m.group(1)
varName = m.group(2)
if m.group(3) is not None:
varLen = int(m.group(3))
else:
varLen = 1
currListGlobalVariables.append(GlobalVariable(name=varName,
type=dataType,
length=varLen,
file=currFileName))
gdbGlobalVarNamesOutputFile.close()
# Fetch addresses for Global Variables in this file
gdbXGlobalVarAddFileName = fileName + ".globalVarAdd.gdbx"
gdbXGlobalVarAddFile = open(gdbXGlobalVarAddFileName, 'w')
for var in currListGlobalVariables:
gdbXGlobalVarAddFile.write("info address %s\n" % (var.name))
gdbXGlobalVarAddFile.write("quit\n")
gdbXGlobalVarAddFile.close()
gdbGlobalVarAddOutputFileName = fileName + ".globalVarAdd.gdbo"
gdbGlobalVarAddOutputFile = open(gdbGlobalVarAddOutputFileName, 'w')
call(args=["gdb", "--quiet", "--command="+gdbXGlobalVarAddFileName, fileName],
stdout=gdbGlobalVarAddOutputFile)
gdbGlobalVarAddOutputFile.close()
gdbGlobalVarAddOutputFile = open(gdbGlobalVarAddOutputFileName, 'r')
for line in gdbGlobalVarAddOutputFile:
m = re_VarAdd.match(line)
if m is not None:
var = find(lambda v: v.name == m.group(1), currListGlobalVariables)
var.setAddress(int(m.group(2), 16))
listGlobalVariables = listGlobalVariables + currListGlobalVariables
debugListGlobalVariables(listGlobalVariables)
return listGlobalVariables
re_instructionLineObj = re.compile('\s*([0-9a-f]*):\s*([0-9a-f]*)\s*(.*)')
re_loadPCRelative = re.compile("\s*ldr\w*\s*([\w]{2}),\s*\[pc,\s#[\d]*\]\s*;\s*([a-fA-F0-9]*)\s*<\w*\+0x[a-fA-F0-9]*>")
re_loadStoreSPRelative = re.compile("\s*((?:str\w*)|(?:ldr\w*))\s*(\w{2}),\s*\[sp(?:,\s*#(\d*))?\]")
re_returnInst = re.compile('\s*(bx)\s*(lr)')
re_loadStoreInst = re.compile("\s*((?:ldr)|(?:str))\s*(\w{2}),\s*\[(\w{2})(?:,\s*(\w{2}))?(?:,\s*(.*))?\]")
re_addSubInst = re.compile("\s*((?:add)|(?:sub))(?:\w{2})?\s*(\w{2}),\s*(.*),\s*(.*)")
re_instObj = re.compile("\s*(\w*)\s*(\w*)((?:,\s*[\w\[\]\#]*)*)")
def mapRegToVar(dictGlobalVarAddInReg, fileNameObj, lsLineNum, regName):
if regName in dictGlobalVarAddInReg:
globalVar = dictGlobalVarAddInReg[regName]
return globalVar
else:
READ_BACKWARDS_INST_THRESHOLD = 20
lineNum = lsLineNum
# Check if register can be mapped to a global variable by reading instructions above
while lineNum > (lsLineNum - READ_BACKWARDS_INST_THRESHOLD):
lineNum = lineNum - 1
line = lc.getline(fileNameObj, lineNum)
m = re_instructionLineObj.match(line)
if m is not None:
inst = m.group(3)
instAdd = int(m.group(1), 16)
m = re_instObj.match(inst)
if m is not None:
targetReg = m.group(2)
operands = m.group(3)
if targetReg == regName:
# find all operands that are registers (\w{2})
listRegOperands = re.findall("\s*,\s*(\w{2})", operands)
for regOperand in listRegOperands:
if regOperand in dictGlobalVarAddInReg:
globalVar = dictGlobalVarAddInReg[regOperand]
return globalVar
else:
continue
else:
break
else:
break
logging.debug("\t %d: Could not map register \"%s\" to a global var." %
(lsLineNum, regName))
# if here, the register could not be matched to a global var using above analysis
return None
def instrument_cache(listISCFileNames, listObjdumpFileNames, listBinaryFileNames):
'''
Algorithm
To make our process simple, we are planning to use only Global Variables.
We will modify the benchmark program, to make all local variables as global.
This will only be possible, if the function is not recursive and won't have
a huge impact on performance unless there are a number of functions that are
called, and each have a large number of local variables.
1. Extract info of Global Variables from GDB.
2. Perform Matching of Control Flow Graphs.
3. For each funcObj in list of Objdump functions,
a. Find corresponding funcISC.
b. For each line in funcObj,
1. look for load and store instructions. Each load and store
should be either,
a. PC Relative Load/Store (Global Variable)
b. Stack Pointer Relative Load/Store (Local Variable)
PS. This is when we don't consider heap/dynamically allocated
memory.
2. PC Relative Load/Store:
a. Find the address of the address of global variable.
b. Find name of the global variable being fetched from memory.
c. In the mapping basic block in ISC, look for an instruction
where this global variable is being accessed. If found,
1. Look if this variable is an array and is being accessed
with an index.
a. TODO: Find a good way, to check if the current basic
block is part of a loop.
b. If yes, we need to find the index variable in the
source code.
c. Annotate the memory access at appropriate line in
the source code.
'''
(listISCFunctions, listObjdumpFunctions) = match_cfg(listISCFileNames,
listObjdumpFileNames,
listBinaryFileNames)
listGlobalVariables = getGlobalVariablesInfoFromGDB(listBinaryFileNames)
for funcObj in listObjdumpFunctions:
dictGlobalVarAtPCRelativeAdd = {}
dictGlobalVarAddInReg = {}
fileNameObj = funcObj.fileName
funcISC = find(lambda fn: fn.functionName == funcObj.functionName, listISCFunctions)
fileNameISC = funcISC.fileName
logging.debug("")
logging.debug(" Function: %s" % funcObj.functionName)
# Read addresses of all global variables attached at the end of the
# function, by reading backwards until return instruction is found
logging.debug(" Table of Global Variable Addresses:")
lineNumObj = funcObj.endLine
while True:
lineObj = lc.getline(fileNameObj, lineNumObj)
m = re_instructionLineObj.match(lineObj)
if m is not None:
inst = m.group(3)
m1 = re_returnInst.match(inst)
if m1 is not None:
break
else:
instAdd = int(m.group(1), 16)
globalVarAdd = int(m.group(2), 16)
globalVar = find(lambda var: var.address == globalVarAdd,
listGlobalVariables)
if globalVar is not None:
dictGlobalVarAtPCRelativeAdd[instAdd] = globalVar
logging.debug("\t \"%s\" at address %x." % (globalVar.name, globalVar.address))
else:
logging.error("\t Global Var with address %x not found." % globalVarAdd)
lineNumObj = lineNumObj - 1
else:
break
logging.debug(" Load Store Instructions:")
for lineNumObj in range(funcObj.startLine, funcObj.endLine+1):
lineObj = lc.getline(fileNameObj, lineNumObj)
m = re_instructionLineObj.match(lineObj)
if m is not None:
instAdd = int(m.group(1), 16)
instObj = m.group(3)
# Load PC Relative - Global Variables
m = re_loadPCRelative.match(instObj)
if m is not None:
globalVarAddInReg = m.group(1)
globalVarTabAddress = int(m.group(2), 16)
if globalVarTabAddress in dictGlobalVarAtPCRelativeAdd:
globalVar = dictGlobalVarAtPCRelativeAdd[globalVarTabAddress]
else:
logging.debug("\t %d: PC Relative Address was loaded, but corresponding global var was not found. Ignoring!")
continue
dictGlobalVarAddInReg[globalVarAddInReg] = globalVar
logging.debug("\t %d: %s = address of global var \"%s\"" %
(lineNumObj, globalVarAddInReg, globalVar.name))
continue
m = re_loadStoreSPRelative.match(instObj)
if m is not None:
lsOpcode = m.group(1)
lsReg = m.group(2)
if m.group(3) is None:
lsSPIndexVal = 0
else:
lsSPIndexVal = int(m.group(3))
logging.debug("\t %d: Local Variable was accessed at address sp+%d." %
(lineNumObj, lsSPIndexVal))
continue
# TODO: Look for SP relative address being loaded into a register
# It can be done by moving sp to a register, or by
# adding/subtracting from sp and storing to a register.
# Map the address to a local variable
m = re_loadStoreInst.match(instObj)
if m is not None:
lsOpcode = m.group(1)
lsReg = m.group(2)
lsAddReg1 = m.group(3)
lsAddReg2 = m.group(4)
if lsAddReg2 == None:
# See if we know address of which global var is stored in lsAddReg1
# TODO: Look for above instructions to see where lsAddReg1 comes from.
globalVar = mapRegToVar(dictGlobalVarAddInReg,
fileNameObj,
lineNumObj,
lsAddReg1)
if globalVar is None:
logging.error("\t %s:%d: Could not match load/store instruction with variable"
(fileNameObj, lineNumObj))
return -1
else:
baseAddReg = lsAddReg1
# TODO: Verify the global variable being accessed by looking at source code
logging.debug("\t %d: %s = value of global var \"%s\" (pointed to by %s)" %
(lineNumObj, lsReg, globalVar.name, baseAddReg))
else:
globalVar = mapRegToVar(dictGlobalVarAddInReg,
fileNameObj,
lineNumObj,
lsAddReg1)
if globalVar is not None:
baseAddReg = lsAddReg1
indexReg = lsAddReg2
else:
globalVar = mapRegToVar(dictGlobalVarAddInReg,
fileNameObj,
lineNumObj,
lsAddReg2)
if globalVar is not None:
baseAddReg = lsAddReg2
indexReg = lsAddReg1
else:
logging.error("\t %d: Could not match load/store instruction with variable" %
(lineNumObj))
if globalVar.length == 1:
logging.error("From objdump, it looks like an indexed access of array, but global variable is not an array (based on info from GDB)!")
quit
# TODO: Look for Indexed array access in matching block in Source Code
logging.debug("\t %d: %s = value of element in global var %s (pointed to by %s, indexed by %s)." %
(lineNumObj, lsReg, globalVar.name, baseAddReg, indexReg))
if __name__ == "__main__":
# listISCFileNames = []
# listObjdumpFileNames = []
# app = QtGui.QApplication(sys.argv)
logging.basicConfig(level=logging.DEBUG)
optparser = OptionParser()
optparser.add_option("-i", "--isc", action="append", dest="listISCFileNames",
type="string", help="ISC Filename. For multiple files, use -i <filename> multiple times.",
metavar="FILE")
optparser.add_option("-o", "--objdump", action="append",
type="string", dest="listObjdumpFileNames",
help="Objdump Filename. For multiple files, use -o <filename> multiple times.",
metavar="FILE")
optparser.add_option("-b", "--binary", action="append",
type="string", dest="listBinaryFileNames",
help="Binary Filename. For multiple files, use -b <filename> multiple times.",
metavar="FILE")
(options, args) = optparser.parse_args()
if (len(args) > 0):
print "Additional arguments are being ignored"
listISCFileNames = options.listISCFileNames
listObjdumpFileNames = options.listObjdumpFileNames
listBinaryFileNames = options.listBinaryFileNames
# (listISCFunctions, listObjdumpFunctions) = map_cfg(listISCFileNames,
# listObjdumpFileNames,
# listBinaryFileNames)
# getGlobalVariablesInfoFromGDB(listBinaryFileNames)
# instrument_cache(listISCFileNames, listISCFunctions,
# listObjdumpFileNames, listObjdumpFunctions,
# listBinaryFileNames)
#
instrument_cache(listISCFileNames, listObjdumpFileNames, listBinaryFileNames)
......@@ -5,6 +5,7 @@ class GlobalVariable:
self.name = ""
self.address = 0
self.type = ""
self.size = -1
self.length = -1
self.file = ""
self.lineNum = -1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment