Move llvm AST into own module.
This commit is contained in:
parent
34964c186b
commit
abebb940f6
51
ll.py
Normal file
51
ll.py
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
"""Data structure for LLVM AST."""
|
||||||
|
|
||||||
|
from collections import namedtuple
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class SimpleType(Enum):
|
||||||
|
"""Simple types in LLVM."""
|
||||||
|
|
||||||
|
Void = 1
|
||||||
|
I1 = 2
|
||||||
|
I8 = 3
|
||||||
|
I32 = 4
|
||||||
|
I64 = 5
|
||||||
|
|
||||||
|
|
||||||
|
Program = namedtuple('Program', ['tdecls', 'gdecls', 'fdecls'])
|
||||||
|
|
||||||
|
TypeDec = namedtuple('TypeDec', ['name', 'body'])
|
||||||
|
PointerType = namedtuple('PointerType', ['inner_ty'])
|
||||||
|
StructType = namedtuple('StructType', ['fields'])
|
||||||
|
ArrayType = namedtuple('ArrayType', ['length', 'inner_ty'])
|
||||||
|
FunctionType = namedtuple('FunctionType', ['return_ty', 'parameters'])
|
||||||
|
NamedType = namedtuple('NamedType', ['other_name'])
|
||||||
|
|
||||||
|
GlobalDec = namedtuple('GlobalDec', ['name', 'ty', 'body'])
|
||||||
|
GNull = namedtuple('GNull', [])
|
||||||
|
GGid = namedtuple('GGid', ['val'])
|
||||||
|
GInt = namedtuple('GInt', ['val'])
|
||||||
|
GString = namedtuple('GString', ['val'])
|
||||||
|
GArray = namedtuple('GArray', ['entries'])
|
||||||
|
GStruct = namedtuple('GStruct', ['fields'])
|
||||||
|
|
||||||
|
FunctionDec = namedtuple('FunctionDec', ['return_type', 'name', 'parameters', 'body'])
|
||||||
|
FunctionBody = namedtuple('FunctionBody', ['first_block', 'named_blocks'])
|
||||||
|
Block = namedtuple('Block', ['insns', 'terminator'])
|
||||||
|
|
||||||
|
Binop = namedtuple('Binop', ['bop', 'ty', 'left', 'right'])
|
||||||
|
Alloca = namedtuple('Alloca', ['ty'])
|
||||||
|
Load = namedtuple('Load', ['ty', 'oper'])
|
||||||
|
Store = namedtuple('Store', ['ty', 'value', 'location'])
|
||||||
|
Icmp = namedtuple('Icmp', ['cnd', 'ty', 'left', 'right'])
|
||||||
|
Call = namedtuple('Call', ['return_ty', 'callee', 'arguments'])
|
||||||
|
Bitcast = namedtuple('Bitcast', ['from_ty', 'oper', 'to_ty'])
|
||||||
|
Gep = namedtuple('Gep', ['base_ty', 'oper_ty', 'oper', 'steps'])
|
||||||
|
Zext = namedtuple('Zext', ['from_ty', 'oper', 'to_ty'])
|
||||||
|
Ptrtoint = namedtuple('Ptrtoint', ['pointer_ty', 'oper', 'to_ty'])
|
||||||
|
|
||||||
|
Ret = namedtuple('Ret', ['ty', 'oper'])
|
||||||
|
Br = namedtuple('Br', ['label'])
|
||||||
|
Cbr = namedtuple('Cbr', ['ty', 'oper', 'then_label', 'else_label'])
|
136
parser.py
136
parser.py
|
@ -1,57 +1,9 @@
|
||||||
"""Parser for LLVM--."""
|
"""Parser for LLVM--."""
|
||||||
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
import ply.lex as lex
|
import ply.lex as lex
|
||||||
import ply.yacc as yacc
|
import ply.yacc as yacc
|
||||||
|
|
||||||
from enum import Enum
|
import ll
|
||||||
|
|
||||||
|
|
||||||
class SimpleType(Enum):
|
|
||||||
Void = 1
|
|
||||||
I1 = 2
|
|
||||||
I8 = 3
|
|
||||||
I32 = 4
|
|
||||||
I64 = 5
|
|
||||||
|
|
||||||
|
|
||||||
# Namedtuples for storing AST
|
|
||||||
Program = namedtuple('Program', ['tdecls', 'gdecls', 'fdecls'])
|
|
||||||
|
|
||||||
TypeDec = namedtuple('TypeDec', ['name', 'body'])
|
|
||||||
PointerType = namedtuple('PointerType', ['inner_ty'])
|
|
||||||
StructType = namedtuple('StructType', ['fields'])
|
|
||||||
ArrayType = namedtuple('ArrayType', ['length', 'inner_ty'])
|
|
||||||
FunctionType = namedtuple('FunctionType', ['return_ty', 'parameters'])
|
|
||||||
NamedType = namedtuple('NamedType', ['other_name'])
|
|
||||||
|
|
||||||
GlobalDec = namedtuple('GlobalDec', ['name', 'ty', 'body'])
|
|
||||||
GNull = namedtuple('GNull', [])
|
|
||||||
GGid = namedtuple('GGid', ['val'])
|
|
||||||
GInt = namedtuple('GInt', ['val'])
|
|
||||||
GString = namedtuple('GString', ['val'])
|
|
||||||
GArray = namedtuple('GArray', ['entries'])
|
|
||||||
GStruct = namedtuple('GStruct', ['fields'])
|
|
||||||
|
|
||||||
FunctionDec = namedtuple('FunctionDec', ['return_type', 'name', 'parameters', 'body'])
|
|
||||||
FunctionBody = namedtuple('FunctionBody', ['first_block', 'named_blocks'])
|
|
||||||
Block = namedtuple('Block', ['insns', 'terminator'])
|
|
||||||
|
|
||||||
Binop = namedtuple('Binop', ['bop', 'ty', 'left', 'right'])
|
|
||||||
Alloca = namedtuple('Alloca', ['ty'])
|
|
||||||
Load = namedtuple('Load', ['ty', 'oper'])
|
|
||||||
Store = namedtuple('Store', ['ty', 'value', 'location'])
|
|
||||||
Icmp = namedtuple('Icmp', ['cnd', 'ty', 'left', 'right'])
|
|
||||||
Call = namedtuple('Call', ['return_ty', 'callee', 'arguments'])
|
|
||||||
Bitcast = namedtuple('Bitcast', ['from_ty', 'oper', 'to_ty'])
|
|
||||||
Gep = namedtuple('Gep', ['base_ty', 'oper_ty', 'oper', 'steps'])
|
|
||||||
Zext = namedtuple('Zext', ['from_ty', 'oper', 'to_ty'])
|
|
||||||
Ptrtoint = namedtuple('Ptrtoint', ['pointer_ty', 'oper', 'to_ty'])
|
|
||||||
|
|
||||||
Ret = namedtuple('Ret', ['ty', 'oper'])
|
|
||||||
Br = namedtuple('Br', ['label'])
|
|
||||||
Cbr = namedtuple('Cbr', ['ty', 'oper', 'then_label', 'else_label'])
|
|
||||||
|
|
||||||
|
|
||||||
class LLVMParser(object):
|
class LLVMParser(object):
|
||||||
|
@ -208,17 +160,17 @@ class LLVMParser(object):
|
||||||
fdecs = {}
|
fdecs = {}
|
||||||
for top_dec in smth:
|
for top_dec in smth:
|
||||||
name = top_dec.name
|
name = top_dec.name
|
||||||
if isinstance(top_dec, FunctionDec):
|
if isinstance(top_dec, ll.FunctionDec):
|
||||||
if name in fdecs.keys():
|
if name in fdecs.keys():
|
||||||
print('ERROR: Function {} is declared more than once'
|
print('ERROR: Function {} is declared more than once'
|
||||||
.format(name))
|
.format(name))
|
||||||
fdecs[name] = top_dec
|
fdecs[name] = top_dec
|
||||||
elif isinstance(top_dec, TypeDec):
|
elif isinstance(top_dec, ll.TypeDec):
|
||||||
if name in tdecs.keys():
|
if name in tdecs.keys():
|
||||||
print('ERROR: Type {} is declared more than once'
|
print('ERROR: Type {} is declared more than once'
|
||||||
.format(name))
|
.format(name))
|
||||||
tdecs[name] = top_dec
|
tdecs[name] = top_dec
|
||||||
elif isinstance(top_dec, GlobalDec):
|
elif isinstance(top_dec, ll.GlobalDec):
|
||||||
if name in gdecs.keys():
|
if name in gdecs.keys():
|
||||||
print('ERROR: Global {} is declared more than once'
|
print('ERROR: Global {} is declared more than once'
|
||||||
.format(name))
|
.format(name))
|
||||||
|
@ -231,7 +183,7 @@ class LLVMParser(object):
|
||||||
def p_program(self, p):
|
def p_program(self, p):
|
||||||
'program : top_decs'
|
'program : top_decs'
|
||||||
tdecls, gdecls, fdecls = self.handle_top_decs(p[1])
|
tdecls, gdecls, fdecls = self.handle_top_decs(p[1])
|
||||||
p[0] = Program(tdecls, gdecls, fdecls)
|
p[0] = ll.Program(tdecls, gdecls, fdecls)
|
||||||
|
|
||||||
def p_topdecs_some(self, p):
|
def p_topdecs_some(self, p):
|
||||||
'''top_decs : tdec top_decs
|
'''top_decs : tdec top_decs
|
||||||
|
@ -245,40 +197,40 @@ class LLVMParser(object):
|
||||||
|
|
||||||
def p_tdec(self, p):
|
def p_tdec(self, p):
|
||||||
'tdec : PercentID ASSIGN TYPE ty'
|
'tdec : PercentID ASSIGN TYPE ty'
|
||||||
p[0] = TypeDec(p[1], p[4])
|
p[0] = ll.TypeDec(p[1], p[4])
|
||||||
|
|
||||||
def p_ty_void(self, p):
|
def p_ty_void(self, p):
|
||||||
'ty : VOID'
|
'ty : VOID'
|
||||||
p[0] = SimpleType.Void
|
p[0] = ll.SimpleType.Void
|
||||||
|
|
||||||
def p_ty_i1(self, p):
|
def p_ty_i1(self, p):
|
||||||
'ty : I1'
|
'ty : I1'
|
||||||
p[0] = SimpleType.I1
|
p[0] = ll.SimpleType.I1
|
||||||
|
|
||||||
def p_ty_i8(self, p):
|
def p_ty_i8(self, p):
|
||||||
'ty : I8'
|
'ty : I8'
|
||||||
p[0] = SimpleType.I8
|
p[0] = ll.SimpleType.I8
|
||||||
|
|
||||||
def p_ty_i32(self, p):
|
def p_ty_i32(self, p):
|
||||||
'ty : I32'
|
'ty : I32'
|
||||||
p[0] = SimpleType.I32
|
p[0] = ll.SimpleType.I32
|
||||||
|
|
||||||
def p_ty_i64(self, p):
|
def p_ty_i64(self, p):
|
||||||
'ty : I64'
|
'ty : I64'
|
||||||
p[0] = SimpleType.I64
|
p[0] = ll.SimpleType.I64
|
||||||
|
|
||||||
def p_ty_ptr(self, p):
|
def p_ty_ptr(self, p):
|
||||||
'ty : ty ASTERIX'
|
'ty : ty ASTERIX'
|
||||||
p[0] = PointerType(p[1])
|
p[0] = ll.PointerType(p[1])
|
||||||
|
|
||||||
def p_ty_struct(self, p):
|
def p_ty_struct(self, p):
|
||||||
'ty : LBRACE ty_list RBRACE'
|
'ty : LBRACE ty_list RBRACE'
|
||||||
p[0] = StructType(p[2])
|
p[0] = ll.StructType(p[2])
|
||||||
|
|
||||||
def p_ty_array(self, p):
|
def p_ty_array(self, p):
|
||||||
'ty : LBRACK INT ID ty RBRACK'
|
'ty : LBRACK INT ID ty RBRACK'
|
||||||
if p[3] == 'x':
|
if p[3] == 'x':
|
||||||
p[0] = ArrayType(p[2], p[4])
|
p[0] = ll.ArrayType(p[2], p[4])
|
||||||
else:
|
else:
|
||||||
print('Invalid name in array definition: {}\n It should have been an x.'
|
print('Invalid name in array definition: {}\n It should have been an x.'
|
||||||
.format(p[3]))
|
.format(p[3]))
|
||||||
|
@ -286,11 +238,11 @@ class LLVMParser(object):
|
||||||
|
|
||||||
def p_ty_fun(self, p):
|
def p_ty_fun(self, p):
|
||||||
'ty : ty LPAREN ty_list RPAREN'
|
'ty : ty LPAREN ty_list RPAREN'
|
||||||
p[0] = FunctionType(p[1], p[3])
|
p[0] = ll.FunctionType(p[1], p[3])
|
||||||
|
|
||||||
def p_ty_id(self, p):
|
def p_ty_id(self, p):
|
||||||
'ty : PercentID'
|
'ty : PercentID'
|
||||||
p[0] = NamedType(p[1])
|
p[0] = ll.NamedType(p[1])
|
||||||
|
|
||||||
def p_ty_list_single(self, p):
|
def p_ty_list_single(self, p):
|
||||||
'ty_list : ty'
|
'ty_list : ty'
|
||||||
|
@ -306,32 +258,32 @@ class LLVMParser(object):
|
||||||
|
|
||||||
def p_gdec(self, p):
|
def p_gdec(self, p):
|
||||||
'gdec : AtID ASSIGN GLOBAL ty ginit'
|
'gdec : AtID ASSIGN GLOBAL ty ginit'
|
||||||
p[0] = GlobalDec(p[1], p[4], p[5])
|
p[0] = ll.GlobalDec(p[1], p[4], p[5])
|
||||||
|
|
||||||
def p_ginit_null(self, p):
|
def p_ginit_null(self, p):
|
||||||
'ginit : NULL'
|
'ginit : NULL'
|
||||||
p[0] = GNull()
|
p[0] = ll.GNull()
|
||||||
|
|
||||||
def p_ginit_id(self, p):
|
def p_ginit_id(self, p):
|
||||||
'ginit : AtID'
|
'ginit : AtID'
|
||||||
p[0] = GGid(p[1])
|
p[0] = ll.GGid(p[1])
|
||||||
|
|
||||||
def p_ginit_int(self, p):
|
def p_ginit_int(self, p):
|
||||||
'ginit : INT'
|
'ginit : INT'
|
||||||
p[0] = GInt(p[1])
|
p[0] = ll.GInt(p[1])
|
||||||
|
|
||||||
def p_ginit_string(self, p):
|
def p_ginit_string(self, p):
|
||||||
'ginit : STRING'
|
'ginit : STRING'
|
||||||
p[0] = GString(p[1])
|
p[0] = ll.GString(p[1])
|
||||||
|
|
||||||
def p_ginit_array(self, p):
|
def p_ginit_array(self, p):
|
||||||
'ginit : LBRACK ty_ginit_list RBRACK'
|
'ginit : LBRACK ty_ginit_list RBRACK'
|
||||||
# TODO This syntax seems weird
|
# TODO This syntax seems weird
|
||||||
p[0] = GArray(p[2])
|
p[0] = ll.GArray(p[2])
|
||||||
|
|
||||||
def p_ginit_struct(self, p):
|
def p_ginit_struct(self, p):
|
||||||
'ginit : LBRACE ty_ginit_list RBRACE'
|
'ginit : LBRACE ty_ginit_list RBRACE'
|
||||||
p[0] = GStruct(p[2])
|
p[0] = ll.GStruct(p[2])
|
||||||
|
|
||||||
def p_ty_ginit_list_single(self, p):
|
def p_ty_ginit_list_single(self, p):
|
||||||
'ty_ginit_list : ty ginit'
|
'ty_ginit_list : ty ginit'
|
||||||
|
@ -347,7 +299,7 @@ class LLVMParser(object):
|
||||||
|
|
||||||
def p_fdec(self, p):
|
def p_fdec(self, p):
|
||||||
'fdec : DEFINE ty AtID LPAREN ty_id_list RPAREN LBRACE fbody RBRACE'
|
'fdec : DEFINE ty AtID LPAREN ty_id_list RPAREN LBRACE fbody RBRACE'
|
||||||
p[0] = FunctionDec(p[2], p[3], p[5], p[8])
|
p[0] = ll.FunctionDec(p[2], p[3], p[5], p[8])
|
||||||
|
|
||||||
def p_ty_id_list_single(self, p):
|
def p_ty_id_list_single(self, p):
|
||||||
'ty_id_list : ty PercentID'
|
'ty_id_list : ty PercentID'
|
||||||
|
@ -363,19 +315,19 @@ class LLVMParser(object):
|
||||||
|
|
||||||
def p_fbody_multiple_blocks(self, p):
|
def p_fbody_multiple_blocks(self, p):
|
||||||
'fbody : block named_block_list'
|
'fbody : block named_block_list'
|
||||||
p[0] = FunctionBody(p[1], p[2])
|
p[0] = ll.FunctionBody(p[1], p[2])
|
||||||
|
|
||||||
def p_fbody_one_block(self, p):
|
def p_fbody_one_block(self, p):
|
||||||
'fbody : block'
|
'fbody : block'
|
||||||
p[0] = FunctionBody(p[1], [])
|
p[0] = ll.FunctionBody(p[1], [])
|
||||||
|
|
||||||
def p_block_insns_terminator(self, p):
|
def p_block_insns_terminator(self, p):
|
||||||
'block : insns terminator'
|
'block : insns terminator'
|
||||||
p[0] = Block(p[1], p[2])
|
p[0] = ll.Block(p[1], p[2])
|
||||||
|
|
||||||
def p_block_terminator(self, p):
|
def p_block_terminator(self, p):
|
||||||
'block : terminator'
|
'block : terminator'
|
||||||
p[0] = Block([], p[1])
|
p[0] = ll.Block([], p[1])
|
||||||
|
|
||||||
def p_insns_single(self, p):
|
def p_insns_single(self, p):
|
||||||
'insns : optionally_named_insn'
|
'insns : optionally_named_insn'
|
||||||
|
@ -395,51 +347,51 @@ class LLVMParser(object):
|
||||||
|
|
||||||
def p_insn_bop(self, p):
|
def p_insn_bop(self, p):
|
||||||
'insn : bop ty operand COMMA operand'
|
'insn : bop ty operand COMMA operand'
|
||||||
p[0] = Binop(p[1], p[2], p[3], p[5])
|
p[0] = ll.Binop(p[1], p[2], p[3], p[5])
|
||||||
|
|
||||||
def p_insn_alloca(self, p):
|
def p_insn_alloca(self, p):
|
||||||
'insn : ALLOCA ty'
|
'insn : ALLOCA ty'
|
||||||
p[0] = Alloca(p[2])
|
p[0] = ll.Alloca(p[2])
|
||||||
|
|
||||||
def p_insn_load(self, p):
|
def p_insn_load(self, p):
|
||||||
'insn : LOAD ty COMMA ty operand'
|
'insn : LOAD ty COMMA ty operand'
|
||||||
p[0] = Load(p[2], p[5])
|
p[0] = ll.Load(p[2], p[5])
|
||||||
|
|
||||||
def p_insn_store(self, p):
|
def p_insn_store(self, p):
|
||||||
'insn : STORE ty operand COMMA ty operand'
|
'insn : STORE ty operand COMMA ty operand'
|
||||||
p[0] = Store(p[2], p[3], p[6])
|
p[0] = ll.Store(p[2], p[3], p[6])
|
||||||
|
|
||||||
def p_insn_icmp(self, p):
|
def p_insn_icmp(self, p):
|
||||||
'insn : ICMP cnd ty operand COMMA operand'
|
'insn : ICMP cnd ty operand COMMA operand'
|
||||||
p[0] = Icmp(p[2], p[3], p[4], p[6])
|
p[0] = ll.Icmp(p[2], p[3], p[4], p[6])
|
||||||
|
|
||||||
def p_insn_call(self, p):
|
def p_insn_call(self, p):
|
||||||
'insn : CALL ty operand LPAREN ty_operand_list RPAREN'
|
'insn : CALL ty operand LPAREN ty_operand_list RPAREN'
|
||||||
p[0] = Call(p[2], p[3], p[5])
|
p[0] = ll.Call(p[2], p[3], p[5])
|
||||||
|
|
||||||
def p_insn_call_empty(self, p):
|
def p_insn_call_empty(self, p):
|
||||||
'insn : CALL ty operand LPAREN RPAREN'
|
'insn : CALL ty operand LPAREN RPAREN'
|
||||||
p[0] = Call(p[2], p[3], [])
|
p[0] = ll.Call(p[2], p[3], [])
|
||||||
|
|
||||||
def p_insn_bitcast(self, p):
|
def p_insn_bitcast(self, p):
|
||||||
'insn : BITCAST ty operand TO ty'
|
'insn : BITCAST ty operand TO ty'
|
||||||
p[0] = Bitcast(p[2], p[3], p[5])
|
p[0] = ll.Bitcast(p[2], p[3], p[5])
|
||||||
|
|
||||||
def p_insn_gep(self, p):
|
def p_insn_gep(self, p):
|
||||||
'insn : GETELEMENTPTR ty COMMA ty operand COMMA ty_operand_list'
|
'insn : GETELEMENTPTR ty COMMA ty operand COMMA ty_operand_list'
|
||||||
p[0] = Gep(p[2], p[4], p[5], p[7])
|
p[0] = ll.Gep(p[2], p[4], p[5], p[7])
|
||||||
|
|
||||||
def p_insn_gep_empty(self, p):
|
def p_insn_gep_empty(self, p):
|
||||||
'insn : GETELEMENTPTR ty COMMA ty operand'
|
'insn : GETELEMENTPTR ty COMMA ty operand'
|
||||||
p[0] = Gep(p[2], p[4], p[5], [])
|
p[0] = ll.Gep(p[2], p[4], p[5], [])
|
||||||
|
|
||||||
def p_insn_zext(self, p):
|
def p_insn_zext(self, p):
|
||||||
'insn : ZEXT ty operand TO ty'
|
'insn : ZEXT ty operand TO ty'
|
||||||
p[0] = Zext(p[2], p[3], p[4])
|
p[0] = ll.Zext(p[2], p[3], p[4])
|
||||||
|
|
||||||
def p_insn_ptrtoint(self, p):
|
def p_insn_ptrtoint(self, p):
|
||||||
'insn : PTRTOINT ty ASTERIX operand TO ty'
|
'insn : PTRTOINT ty ASTERIX operand TO ty'
|
||||||
p[0] = Ptrtoint(p[2], p[4], p[6])
|
p[0] = ll.Ptrtoint(p[2], p[4], p[6])
|
||||||
|
|
||||||
def p_bop(self, p):
|
def p_bop(self, p):
|
||||||
'''bop : ADD
|
'''bop : ADD
|
||||||
|
@ -473,19 +425,19 @@ class LLVMParser(object):
|
||||||
|
|
||||||
def p_terminator_ret_void(self, p):
|
def p_terminator_ret_void(self, p):
|
||||||
'terminator : RET VOID'
|
'terminator : RET VOID'
|
||||||
p[0] = Ret(SimpleType.Void, None)
|
p[0] = ll.Ret(ll.SimpleType.Void, None)
|
||||||
|
|
||||||
def p_terminator_ret_oper(self, p):
|
def p_terminator_ret_oper(self, p):
|
||||||
'terminator : RET ty operand'
|
'terminator : RET ty operand'
|
||||||
p[0] = Ret(p[2], p[3])
|
p[0] = ll.Ret(p[2], p[3])
|
||||||
|
|
||||||
def p_terminator_branch(self, p):
|
def p_terminator_branch(self, p):
|
||||||
'terminator : BR LABEL PercentID'
|
'terminator : BR LABEL PercentID'
|
||||||
p[0] = Br(p[3])
|
p[0] = ll.Br(p[3])
|
||||||
|
|
||||||
def p_terminator_conditional_branch(self, p):
|
def p_terminator_conditional_branch(self, p):
|
||||||
'terminator : BR ty operand COMMA LABEL PercentID COMMA LABEL PercentID'
|
'terminator : BR ty operand COMMA LABEL PercentID COMMA LABEL PercentID'
|
||||||
p[0] = Cbr(p[2], p[3], p[6], p[9])
|
p[0] = ll.Cbr(p[2], p[3], p[6], p[9])
|
||||||
|
|
||||||
def p_operand(self, p):
|
def p_operand(self, p):
|
||||||
'''operand : NULL
|
'''operand : NULL
|
||||||
|
|
Loading…
Reference in New Issue
Block a user