diff --git a/ll.py b/ll.py new file mode 100644 index 0000000..83b71a4 --- /dev/null +++ b/ll.py @@ -0,0 +1,51 @@ +"""Data structure for LLVM AST.""" + +from collections import namedtuple +from enum import Enum + + +class SimpleType(Enum): + """Simple types in LLVM.""" + + Void = 1 + I1 = 2 + I8 = 3 + I32 = 4 + I64 = 5 + + +Program = namedtuple('Program', ['tdecls', 'gdecls', 'fdecls']) + +TypeDec = namedtuple('TypeDec', ['name', 'body']) +PointerType = namedtuple('PointerType', ['inner_ty']) +StructType = namedtuple('StructType', ['fields']) +ArrayType = namedtuple('ArrayType', ['length', 'inner_ty']) +FunctionType = namedtuple('FunctionType', ['return_ty', 'parameters']) +NamedType = namedtuple('NamedType', ['other_name']) + +GlobalDec = namedtuple('GlobalDec', ['name', 'ty', 'body']) +GNull = namedtuple('GNull', []) +GGid = namedtuple('GGid', ['val']) +GInt = namedtuple('GInt', ['val']) +GString = namedtuple('GString', ['val']) +GArray = namedtuple('GArray', ['entries']) +GStruct = namedtuple('GStruct', ['fields']) + +FunctionDec = namedtuple('FunctionDec', ['return_type', 'name', 'parameters', 'body']) +FunctionBody = namedtuple('FunctionBody', ['first_block', 'named_blocks']) +Block = namedtuple('Block', ['insns', 'terminator']) + +Binop = namedtuple('Binop', ['bop', 'ty', 'left', 'right']) +Alloca = namedtuple('Alloca', ['ty']) +Load = namedtuple('Load', ['ty', 'oper']) +Store = namedtuple('Store', ['ty', 'value', 'location']) +Icmp = namedtuple('Icmp', ['cnd', 'ty', 'left', 'right']) +Call = namedtuple('Call', ['return_ty', 'callee', 'arguments']) +Bitcast = namedtuple('Bitcast', ['from_ty', 'oper', 'to_ty']) +Gep = namedtuple('Gep', ['base_ty', 'oper_ty', 'oper', 'steps']) +Zext = namedtuple('Zext', ['from_ty', 'oper', 'to_ty']) +Ptrtoint = namedtuple('Ptrtoint', ['pointer_ty', 'oper', 'to_ty']) + +Ret = namedtuple('Ret', ['ty', 'oper']) +Br = namedtuple('Br', ['label']) +Cbr = namedtuple('Cbr', ['ty', 'oper', 'then_label', 'else_label']) diff --git a/parser.py b/parser.py index d3adbe2..b255b04 100644 --- a/parser.py +++ b/parser.py @@ -1,57 +1,9 @@ """Parser for LLVM--.""" -from collections import namedtuple - import ply.lex as lex import ply.yacc as yacc -from enum import Enum - - -class SimpleType(Enum): - Void = 1 - I1 = 2 - I8 = 3 - I32 = 4 - I64 = 5 - - -# Namedtuples for storing AST -Program = namedtuple('Program', ['tdecls', 'gdecls', 'fdecls']) - -TypeDec = namedtuple('TypeDec', ['name', 'body']) -PointerType = namedtuple('PointerType', ['inner_ty']) -StructType = namedtuple('StructType', ['fields']) -ArrayType = namedtuple('ArrayType', ['length', 'inner_ty']) -FunctionType = namedtuple('FunctionType', ['return_ty', 'parameters']) -NamedType = namedtuple('NamedType', ['other_name']) - -GlobalDec = namedtuple('GlobalDec', ['name', 'ty', 'body']) -GNull = namedtuple('GNull', []) -GGid = namedtuple('GGid', ['val']) -GInt = namedtuple('GInt', ['val']) -GString = namedtuple('GString', ['val']) -GArray = namedtuple('GArray', ['entries']) -GStruct = namedtuple('GStruct', ['fields']) - -FunctionDec = namedtuple('FunctionDec', ['return_type', 'name', 'parameters', 'body']) -FunctionBody = namedtuple('FunctionBody', ['first_block', 'named_blocks']) -Block = namedtuple('Block', ['insns', 'terminator']) - -Binop = namedtuple('Binop', ['bop', 'ty', 'left', 'right']) -Alloca = namedtuple('Alloca', ['ty']) -Load = namedtuple('Load', ['ty', 'oper']) -Store = namedtuple('Store', ['ty', 'value', 'location']) -Icmp = namedtuple('Icmp', ['cnd', 'ty', 'left', 'right']) -Call = namedtuple('Call', ['return_ty', 'callee', 'arguments']) -Bitcast = namedtuple('Bitcast', ['from_ty', 'oper', 'to_ty']) -Gep = namedtuple('Gep', ['base_ty', 'oper_ty', 'oper', 'steps']) -Zext = namedtuple('Zext', ['from_ty', 'oper', 'to_ty']) -Ptrtoint = namedtuple('Ptrtoint', ['pointer_ty', 'oper', 'to_ty']) - -Ret = namedtuple('Ret', ['ty', 'oper']) -Br = namedtuple('Br', ['label']) -Cbr = namedtuple('Cbr', ['ty', 'oper', 'then_label', 'else_label']) +import ll class LLVMParser(object): @@ -208,17 +160,17 @@ class LLVMParser(object): fdecs = {} for top_dec in smth: name = top_dec.name - if isinstance(top_dec, FunctionDec): + if isinstance(top_dec, ll.FunctionDec): if name in fdecs.keys(): print('ERROR: Function {} is declared more than once' .format(name)) fdecs[name] = top_dec - elif isinstance(top_dec, TypeDec): + elif isinstance(top_dec, ll.TypeDec): if name in tdecs.keys(): print('ERROR: Type {} is declared more than once' .format(name)) tdecs[name] = top_dec - elif isinstance(top_dec, GlobalDec): + elif isinstance(top_dec, ll.GlobalDec): if name in gdecs.keys(): print('ERROR: Global {} is declared more than once' .format(name)) @@ -231,7 +183,7 @@ class LLVMParser(object): def p_program(self, p): 'program : top_decs' tdecls, gdecls, fdecls = self.handle_top_decs(p[1]) - p[0] = Program(tdecls, gdecls, fdecls) + p[0] = ll.Program(tdecls, gdecls, fdecls) def p_topdecs_some(self, p): '''top_decs : tdec top_decs @@ -245,40 +197,40 @@ class LLVMParser(object): def p_tdec(self, p): 'tdec : PercentID ASSIGN TYPE ty' - p[0] = TypeDec(p[1], p[4]) + p[0] = ll.TypeDec(p[1], p[4]) def p_ty_void(self, p): 'ty : VOID' - p[0] = SimpleType.Void + p[0] = ll.SimpleType.Void def p_ty_i1(self, p): 'ty : I1' - p[0] = SimpleType.I1 + p[0] = ll.SimpleType.I1 def p_ty_i8(self, p): 'ty : I8' - p[0] = SimpleType.I8 + p[0] = ll.SimpleType.I8 def p_ty_i32(self, p): 'ty : I32' - p[0] = SimpleType.I32 + p[0] = ll.SimpleType.I32 def p_ty_i64(self, p): 'ty : I64' - p[0] = SimpleType.I64 + p[0] = ll.SimpleType.I64 def p_ty_ptr(self, p): 'ty : ty ASTERIX' - p[0] = PointerType(p[1]) + p[0] = ll.PointerType(p[1]) def p_ty_struct(self, p): 'ty : LBRACE ty_list RBRACE' - p[0] = StructType(p[2]) + p[0] = ll.StructType(p[2]) def p_ty_array(self, p): 'ty : LBRACK INT ID ty RBRACK' if p[3] == 'x': - p[0] = ArrayType(p[2], p[4]) + p[0] = ll.ArrayType(p[2], p[4]) else: print('Invalid name in array definition: {}\n It should have been an x.' .format(p[3])) @@ -286,11 +238,11 @@ class LLVMParser(object): def p_ty_fun(self, p): 'ty : ty LPAREN ty_list RPAREN' - p[0] = FunctionType(p[1], p[3]) + p[0] = ll.FunctionType(p[1], p[3]) def p_ty_id(self, p): 'ty : PercentID' - p[0] = NamedType(p[1]) + p[0] = ll.NamedType(p[1]) def p_ty_list_single(self, p): 'ty_list : ty' @@ -306,32 +258,32 @@ class LLVMParser(object): def p_gdec(self, p): 'gdec : AtID ASSIGN GLOBAL ty ginit' - p[0] = GlobalDec(p[1], p[4], p[5]) + p[0] = ll.GlobalDec(p[1], p[4], p[5]) def p_ginit_null(self, p): 'ginit : NULL' - p[0] = GNull() + p[0] = ll.GNull() def p_ginit_id(self, p): 'ginit : AtID' - p[0] = GGid(p[1]) + p[0] = ll.GGid(p[1]) def p_ginit_int(self, p): 'ginit : INT' - p[0] = GInt(p[1]) + p[0] = ll.GInt(p[1]) def p_ginit_string(self, p): 'ginit : STRING' - p[0] = GString(p[1]) + p[0] = ll.GString(p[1]) def p_ginit_array(self, p): 'ginit : LBRACK ty_ginit_list RBRACK' # TODO This syntax seems weird - p[0] = GArray(p[2]) + p[0] = ll.GArray(p[2]) def p_ginit_struct(self, p): 'ginit : LBRACE ty_ginit_list RBRACE' - p[0] = GStruct(p[2]) + p[0] = ll.GStruct(p[2]) def p_ty_ginit_list_single(self, p): 'ty_ginit_list : ty ginit' @@ -347,7 +299,7 @@ class LLVMParser(object): def p_fdec(self, p): 'fdec : DEFINE ty AtID LPAREN ty_id_list RPAREN LBRACE fbody RBRACE' - p[0] = FunctionDec(p[2], p[3], p[5], p[8]) + p[0] = ll.FunctionDec(p[2], p[3], p[5], p[8]) def p_ty_id_list_single(self, p): 'ty_id_list : ty PercentID' @@ -363,19 +315,19 @@ class LLVMParser(object): def p_fbody_multiple_blocks(self, p): 'fbody : block named_block_list' - p[0] = FunctionBody(p[1], p[2]) + p[0] = ll.FunctionBody(p[1], p[2]) def p_fbody_one_block(self, p): 'fbody : block' - p[0] = FunctionBody(p[1], []) + p[0] = ll.FunctionBody(p[1], []) def p_block_insns_terminator(self, p): 'block : insns terminator' - p[0] = Block(p[1], p[2]) + p[0] = ll.Block(p[1], p[2]) def p_block_terminator(self, p): 'block : terminator' - p[0] = Block([], p[1]) + p[0] = ll.Block([], p[1]) def p_insns_single(self, p): 'insns : optionally_named_insn' @@ -395,51 +347,51 @@ class LLVMParser(object): def p_insn_bop(self, p): 'insn : bop ty operand COMMA operand' - p[0] = Binop(p[1], p[2], p[3], p[5]) + p[0] = ll.Binop(p[1], p[2], p[3], p[5]) def p_insn_alloca(self, p): 'insn : ALLOCA ty' - p[0] = Alloca(p[2]) + p[0] = ll.Alloca(p[2]) def p_insn_load(self, p): 'insn : LOAD ty COMMA ty operand' - p[0] = Load(p[2], p[5]) + p[0] = ll.Load(p[2], p[5]) def p_insn_store(self, p): 'insn : STORE ty operand COMMA ty operand' - p[0] = Store(p[2], p[3], p[6]) + p[0] = ll.Store(p[2], p[3], p[6]) def p_insn_icmp(self, p): 'insn : ICMP cnd ty operand COMMA operand' - p[0] = Icmp(p[2], p[3], p[4], p[6]) + p[0] = ll.Icmp(p[2], p[3], p[4], p[6]) def p_insn_call(self, p): 'insn : CALL ty operand LPAREN ty_operand_list RPAREN' - p[0] = Call(p[2], p[3], p[5]) + p[0] = ll.Call(p[2], p[3], p[5]) def p_insn_call_empty(self, p): 'insn : CALL ty operand LPAREN RPAREN' - p[0] = Call(p[2], p[3], []) + p[0] = ll.Call(p[2], p[3], []) def p_insn_bitcast(self, p): 'insn : BITCAST ty operand TO ty' - p[0] = Bitcast(p[2], p[3], p[5]) + p[0] = ll.Bitcast(p[2], p[3], p[5]) def p_insn_gep(self, p): 'insn : GETELEMENTPTR ty COMMA ty operand COMMA ty_operand_list' - p[0] = Gep(p[2], p[4], p[5], p[7]) + p[0] = ll.Gep(p[2], p[4], p[5], p[7]) def p_insn_gep_empty(self, p): 'insn : GETELEMENTPTR ty COMMA ty operand' - p[0] = Gep(p[2], p[4], p[5], []) + p[0] = ll.Gep(p[2], p[4], p[5], []) def p_insn_zext(self, p): 'insn : ZEXT ty operand TO ty' - p[0] = Zext(p[2], p[3], p[4]) + p[0] = ll.Zext(p[2], p[3], p[4]) def p_insn_ptrtoint(self, p): 'insn : PTRTOINT ty ASTERIX operand TO ty' - p[0] = Ptrtoint(p[2], p[4], p[6]) + p[0] = ll.Ptrtoint(p[2], p[4], p[6]) def p_bop(self, p): '''bop : ADD @@ -473,19 +425,19 @@ class LLVMParser(object): def p_terminator_ret_void(self, p): 'terminator : RET VOID' - p[0] = Ret(SimpleType.Void, None) + p[0] = ll.Ret(ll.SimpleType.Void, None) def p_terminator_ret_oper(self, p): 'terminator : RET ty operand' - p[0] = Ret(p[2], p[3]) + p[0] = ll.Ret(p[2], p[3]) def p_terminator_branch(self, p): 'terminator : BR LABEL PercentID' - p[0] = Br(p[3]) + p[0] = ll.Br(p[3]) def p_terminator_conditional_branch(self, p): 'terminator : BR ty operand COMMA LABEL PercentID COMMA LABEL PercentID' - p[0] = Cbr(p[2], p[3], p[6], p[9]) + p[0] = ll.Cbr(p[2], p[3], p[6], p[9]) def p_operand(self, p): '''operand : NULL