Commit a7f23563 authored by supriti's avatar supriti

cGrammar annotating mostly correctly

parent b2014af1
...@@ -3,7 +3,8 @@ import logging ...@@ -3,7 +3,8 @@ import logging
from pyparsing import * from pyparsing import *
IDENTIFIER = Word(alphas, alphanums+'_')("ident")
IDENTIFIER = Word(alphas, alphanums+'_')
CONSTANT = Word(nums+'.') CONSTANT = Word(nums+'.')
STRING_LITERAL = quotedString STRING_LITERAL = quotedString
...@@ -18,11 +19,24 @@ DIV_OP = Literal("/") ...@@ -18,11 +19,24 @@ DIV_OP = Literal("/")
MOD_OP = Literal("%") MOD_OP = Literal("%")
INC_OP = Literal("++") INC_OP = Literal("++")
DEC_OP = Literal("--") DEC_OP = Literal("--")
OR_OP = Literal("||")
AND_OP = Literal("&&")
BITXOR_OP = Literal("^")
BITOR_OP = Literal("|")
BITAND_OP = Literal("&")
EQ_OP = Literal("==")
NE_OP = Literal("!=")
GT_OP = Literal(">")
GE_OP = Literal(">=")
LT_OP = Literal("<")
LE_OP = Literal("<=")
RIGHT_OP = Literal(">>")
LEFT_OP = Literal("<<")
POINTER = Literal("*") POINTER = Literal("*")
SEMICOLON = Literal(";") SEMICOLON = Literal(";")
SIZEOF = Literal("sizeof") SIZEOF = Literal("sizeof")
# Type Specifiers # Type Specifiers
...@@ -48,7 +62,32 @@ ENUM = Literal("enum") ...@@ -48,7 +62,32 @@ ENUM = Literal("enum")
AUTO = Literal("auto") AUTO = Literal("auto")
REGISTER = Literal("register") REGISTER = Literal("register")
extra_type_specifier = ( UINTPTR) extra_type_specifier = ( UINTPTR )
'''
State Management Variables
'''
deref_operator_seen = 0
deref_expression_lparen_seen = 0
base_pointer_var_seen = 0
base_pointer_var_name = ""
pointer_var_name = ""
var_name = ""
deref_index_seen = 0
deref_index_string = ""
array_name = ""
array_index_lbrace_seen = 0
array_index_string = ""
list_identifiers = []
list_annotations = []
################################################################3
list_type_names = ["short", "int", "long", "char", "float", "double", "void", "signed", "unsigned", "uintptr_t"]
# TODO: Extend this : struct_or_union, enum # TODO: Extend this : struct_or_union, enum
type_specifier = ( SHORT type_specifier = ( SHORT
...@@ -84,64 +123,349 @@ type_name = ( (specifier_qualifier_list + abstract_declarator) ...@@ -84,64 +123,349 @@ type_name = ( (specifier_qualifier_list + abstract_declarator)
| specifier_qualifier_list | specifier_qualifier_list
) )
def act_identifier(tokens):
global deref_operator_seen
global deref_expression_lparen_seen
global base_pointer_var_seen
global base_pointer_var_name
global pointer_var_name
global var_name
global list_identifiers
global list_annotations
if tokens[0] not in list_type_names:
# Deref Operation
if deref_operator_seen == 1:
# Deref Operation with Index
if deref_expression_lparen_seen == 1:
if base_pointer_var_seen == 0:
base_pointer_var_name = tokens[0]
base_pointer_var_seen = 1
logging.debug(" Base Pointer Var Name = " + base_pointer_var_name)
# else ignore
# Deref Operation without Index
else: # if deref_expression_lparen_seen == 0:
pointer_var_name = tokens[0]
logging.debug(" Pointer Var Name = " + pointer_var_name)
# TODO Annotate Deref without Index here.
annotation = (pointer_var_name, "simDCache(%s_addr, \"r\");" % (pointer_var_name))
if annotation not in list_annotations:
list_annotations.append(annotation)
deref_operator_seen = 0
# Variable Access
else:
var_name = tokens[0]
if (var_name not in list_identifiers):
logging.debug(" Variable name = " + var_name)
annotation = (var_name, "simDCache(%s_addr, \"r\");" % (var_name))
if annotation not in list_annotations:
list_annotations.append(annotation)
# TODO Annotate Variable Access Here.
list_identifiers.append(tokens[0])
def act_rparen_expression(tokens):
global deref_operator_seen
global deref_expression_lparen_seen
global deref_index_seen
global base_pointer_var_seen
global list_annotations
if deref_operator_seen == 1 and deref_expression_lparen_seen == 1 and deref_index_seen == 1:
deref_operator_seen = 0
deref_expression_lparen_seen = 0
deref_index_seen = 0
base_pointer_var_seen = 0
# TODO ANNOTATE Deref with Index here
if deref_index_string != "":
annotation = (base_pointer_var_name, "simDCache((%s_addr + (%s)), \"r\");" % (base_pointer_var_name, deref_index_string))
else:
annotation = (base_pointer_var_name, "simDCache(%s_addr, \"r\");" % (base_pointer_var_name))
if annotation not in list_annotations:
list_annotations.append(annotation)
def act_lparen_expression(tokens):
global deref_operator_seen
global deref_expression_lparen_seen
if deref_operator_seen == 1:
deref_expression_lparen_seen = 1
expression = Forward() expression = Forward()
primary_expression = ( IDENTIFIER primary_expression = ( IDENTIFIER.setParseAction(act_identifier)
| CONSTANT ^ CONSTANT
| STRING_LITERAL ^ STRING_LITERAL
| (LPAREN + expression + RPAREN) ^ ((LPAREN.setParseAction(act_lparen_expression) + expression + RPAREN.setParseAction(act_rparen_expression)))
) )
def act_array_index_rbrace(tokens):
global array_index_lbrace_seen
global list_annotations
assert(array_index_lbrace_seen == 1)
array_index_lbrace_seen = 0
# TODO Annotate Array Indexed Access Here
annotation = (array_name, "simDCache((%s_addr + (%s)), \"r\");" % (array_name, array_index_string))
if annotation not in list_annotations:
list_annotations.append(annotation)
def act_array_index_expression(tokens):
global array_index_lbrace_seen
global array_index_string
if array_index_lbrace_seen == 1:
array_index_string = tokens[0]
logging.debug(" Array Index = " + array_index_string)
def act_array_index_lbrace(tokens):
global array_index_lbrace_seen
global base_pointer_var_name
global list_identifiers
global list_annotations
global array_name
array_index_lbrace_seen = 1
array_name = list_identifiers[-1]
logging.debug(" Array Name = " + array_name)
# TODO : Previous variable name was already annotated, assuming its a variable access. Delete the last entry in the list of annotations.
del(list_annotations[-1])
# Removing Left Recursion # Removing Left Recursion
postfix_expression_1 = Forward() postfix_expression_1 = Forward()
postfix_expression_1 << ( (Literal("[") + Combine(expression)("array_index") + Literal("]")) postfix_expression_1 << ( (Literal("[").setParseAction(act_array_index_lbrace) + Combine(expression).setParseAction(act_array_index_expression) + Literal("]").setParseAction(act_array_index_rbrace))
| Empty() | Empty()
) )
postfix_expression = ( Combine(primary_expression)("prim") + postfix_expression_1) postfix_expression = ( (primary_expression) + postfix_expression_1)
def act_deref_operator(tokens):
global deref_operator_seen
deref_operator_seen = 1
cast_expression = Forward() cast_expression = Forward()
unary_expression = Forward() unary_expression = Forward()
unary_expression << ( postfix_expression("post") unary_expression << ( postfix_expression
| (INC_OP + unary_expression) | (INC_OP + unary_expression)
| (DEC_OP + unary_expression) | (DEC_OP + unary_expression)
| (unary_operator + cast_expression) | (unary_operator + cast_expression)
| (DEREF_OP + Combine(cast_expression)("deref_exp"))("deref") | (DEREF_OP.setParseAction(act_deref_operator) + Combine(cast_expression))
| (SIZEOF + unary_expression) | (SIZEOF + unary_expression)
| (SIZEOF + LPAREN + type_name + RPAREN) | (SIZEOF + LPAREN + type_name + RPAREN)
) )
cast_expression << ( unary_expression cast_expression << ( ((LPAREN + type_name + RPAREN).suppress() + cast_expression)
^ ((LPAREN + type_name + RPAREN).suppress() + cast_expression) | unary_expression
) )
# Left Factored # Left Factored
multiplicative_expression_1 = Forward() multiplicative_expression_1 = Forward()
multiplicative_expression_1 << ( (MUL_OP + cast_expression("mul_op") + multiplicative_expression_1("mul_op_rest")) multiplicative_expression_1 << ( (MUL_OP + cast_expression + multiplicative_expression_1)
| (DIV_OP + cast_expression("mul_op") + multiplicative_expression_1("mul_op_rest")) | (DIV_OP + cast_expression + multiplicative_expression_1)
| (MOD_OP + cast_expression("mul_op") + multiplicative_expression_1("mul_op_rest")) | (MOD_OP + cast_expression + multiplicative_expression_1)
| Empty() | Empty()
) )
multiplicative_expression = ( cast_expression("mul_op") + multiplicative_expression_1("mul_op_rest")) multiplicative_expression = ( cast_expression + multiplicative_expression_1)
def act_add_second_operand_onwards(tokens):
global deref_operator_seen
global deref_expression_lparen_seen
global base_pointer_var_seen
global deref_index_seen
global deref_index_string
if tokens[0] is not "":
if deref_operator_seen == 1 and deref_expression_lparen_seen == 1 and base_pointer_var_seen == 1 and deref_index_seen == 0:
deref_index_seen = 1
deref_index_string = tokens[0]
logging.debug(" Deref Index String = " + deref_index_string)
# Can happen when the derefed pointer was written in a paranthesis without an index. In this case, annotate deref pointer without index
else:
if deref_operator_seen == 1 and deref_expression_lparen_seen == 1 and base_pointer_var_seen == 1 and deref_index_seen == 0:
deref_index_seen = 1
deref_index_string = ""
logging.debug(" Deref Index String = " + deref_index_string)
# Left Factored # Left Factored
additive_expression_1 = Forward() additive_expression_1 = Forward()
additive_expression_1 << ( (ADD_OP + multiplicative_expression("add_op") + Combine(additive_expression_1)("add_op_rest")) additive_expression_1 << ( (ADD_OP + multiplicative_expression + (additive_expression_1))
| (SUB_OP + multiplicative_expression("add_op") + Combine(additive_expression_1)("add_op_rest")) | (SUB_OP + multiplicative_expression + (additive_expression_1))
| Empty() | Empty()
) )
additive_expression = ( multiplicative_expression("add_op") + Combine(additive_expression_1)("add_op_rest") ) additive_expression = ( multiplicative_expression + Combine(additive_expression_1).setParseAction(act_add_second_operand_onwards) )
shift_expression_1 = Forward()
shift_expression_1 << ( (LEFT_OP + additive_expression + shift_expression_1)
| (RIGHT_OP + additive_expression + shift_expression_1)
| Empty()
)
shift_expression = ( (additive_expression + shift_expression_1) )
assignment_expression = Forward() relational_expression_1 = Forward()
assignment_expression << ( (Combine(unary_expression)("dest") + ASSIGN_OP + Combine(assignment_expression)("value")) relational_expression_1 << ( (LT_OP + shift_expression + relational_expression_1)
| (additive_expression)("add") | (LT_OP + shift_expression + relational_expression_1)
| (GE_OP + shift_expression + relational_expression_1)
| (LE_OP + shift_expression + relational_expression_1)
| Empty()
)
relational_expression = ( (shift_expression + relational_expression_1))
equality_expression_1 = Forward()
equality_expression_1 << ( (EQ_OP + relational_expression + equality_expression_1)
| (NE_OP + relational_expression + equality_expression_1)
| Empty()
)
equality_expression = ( (relational_expression + equality_expression_1) )
and_expression_1 = Forward()
and_expression_1 << ( (BITAND_OP + equality_expression + and_expression_1)
| Empty()
)
and_expression = ( (equality_expression + and_expression_1) )
exclusive_or_expression_1 = Forward()
exclusive_or_expression_1 << ( (BITXOR_OP + and_expression + exclusive_or_expression_1)
| empty()
)
exclusive_or_expression = ( (and_expression + exclusive_or_expression_1) )
inclusive_or_expression_1 = Forward()
inclusive_or_expression_1 << ( (BITOR_OP + exclusive_or_expression + inclusive_or_expression_1)
| Empty()
)
inclusive_or_expression = ( (exclusive_or_expression + inclusive_or_expression_1) )
logical_and_expression_1 = Forward()
logical_and_expression_1 << ( (AND_OP + inclusive_or_expression + logical_and_expression_1)
| Empty()
)
logical_and_expression = ( (inclusive_or_expression + logical_and_expression_1) )
logical_or_expression_1 = Forward()
logical_or_expression_1 << ( (OR_OP + logical_and_expression + logical_or_expression_1)
| Empty()
) )
logical_or_expression = ( (logical_and_expression + logical_or_expression_1) )
# def print_conditional_expression(tokens):
# print "Conditional Expression = ",
# print tokens
conditional_expression = Forward()
conditional_expression << ( (logical_or_expression + Literal("?") + expression + Literal(":") + conditional_expression)
| (logical_or_expression)
)
assignment_expression = Forward()
assignment_expression << ( ((unary_expression) + ASSIGN_OP + (assignment_expression))
| (conditional_expression)
)
expression << ( assignment_expression ) expression << ( assignment_expression )
# Full statement with ';' # Full statement with ';'
statement = ( expression + SEMICOLON + stringEnd) statement = ( expression + SEMICOLON + stringEnd)
# statement.ignore(cStyleComment) # statement.ignore(cStyleComment)
def parse_statement(line):
global deref_operator_seen
global deref_expression_lparen_seen
global base_pointer_var_seen
global base_pointer_var_name
global pointer_var_name
global var_name
global deref_index_seen
global deref_index_string
global array_name
global array_index_lbrace_seen
global array_index_string
global list_identifiers
global list_annotations
deref_operator_seen = 0
deref_expression_lparen_seen = 0
base_pointer_var_seen = 0
base_pointer_var_name = ""
pointer_var_name = ""
var_name = ""
deref_index_seen = 0
deref_index_string = ""
array_name = ""
array_index_lbrace_seen = 0
array_index_string = ""
list_identifiers = []
list_annotations = []
r = statement.parseString(line)
return list_annotations
def test():
lines = [ "pcmdata[start - start_40] = *(short int*)((uintptr_t)ivtmp_28);"
, "a = b + c;"
, "diff = (int) *(short int *)((uintptr_t)indata + (uintptr_t)ivtmp_28) - valpred;"
, "D_2252 = (unsigned int) j_76 + D_2263;"
, "*outp = (signed char) (signed char) outputbuffer;"
, "*(outp + i) = (signed char) (signed char) outputbuffer;"
]
for line in lines:
print ""
print line
annotations = parse_statement(line)
print "Annotations:"
for annotation in annotations:
print "%s :: %s" % (annotation[0], annotation[1])
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
deref_operator_seen = 0
deref_expression_lparen_seen = 0
base_pointer_var_seen = 0
base_pointer_var_name = ""
list_identifiers = []
test()
def parseStatement(line): def parseStatement(line):
...@@ -215,38 +539,3 @@ def parseStatement(line): ...@@ -215,38 +539,3 @@ def parseStatement(line):
logging.debug(" Annotation = %s" % annotation[1]) logging.debug(" Annotation = %s" % annotation[1])
return annotation return annotation
def test():
logging.basicConfig(level=logging.DEBUG)
lines = [ "pcmdata[start - start_40] = *(short int*)((uintptr_t)ivtmp_28);"
, "a = b + c;"
, "diff = (int) *(short int *)((uintptr_t)indata + (uintptr_t)ivtmp_28) - valpred;"
, "D_2252 = (unsigned int) j_76 + D_2263;"
, "*outp = (signed char) (signed char) outputbuffer;"
, "*(outp + i) = (signed char) (signed char) outputbuffer;"
]
annotations = [ "simDCache((pcmdata_addr + (start-start_40)), \"w\");"
, "simDCache((a_addr), \"w\");"
, "simDCache((diff_addr), \"w\");"
, "simDCache((D_2252_addr), \"w\");"
, "simDCache((outp_addr), \"w\");"
, "simDCache((outp_addr + (+i)), \"w\");"
]
for i in range(len(lines)):
print ""
annotation = parseStatement(lines[i])
if annotation[1] != annotations[i]:
logging.error(" Line %d does not give expected results!" % i)
quit
print "\n\n All Tests Passed!"
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
line = "pcmdata[start - start_40] = *(short int*)((uintptr_t)ivtmp_28);"
parseStatement(line)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment