Update parser.
This commit is contained in:
parent
b6a7eefc94
commit
34964c186b
341
parser.py
341
parser.py
|
@ -5,12 +5,55 @@ from collections import namedtuple
|
|||
import ply.lex as lex
|
||||
import ply.yacc as yacc
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class SimpleType(Enum):
|
||||
Void = 1
|
||||
I1 = 2
|
||||
I8 = 3
|
||||
I32 = 4
|
||||
I64 = 5
|
||||
|
||||
|
||||
# Namedtuples for storing AST
|
||||
Program = namedtuple('Program', ['tdecls', 'gdecls', 'fdecls'])
|
||||
|
||||
TypeDec = namedtuple('TypeDec', ['name', 'body'])
|
||||
PointerType = namedtuple('PointerType', ['inner_ty'])
|
||||
StructType = namedtuple('StructType', ['fields'])
|
||||
ArrayType = namedtuple('ArrayType', ['length', 'inner_ty'])
|
||||
FunctionType = namedtuple('FunctionType', ['return_ty', 'parameters'])
|
||||
NamedType = namedtuple('NamedType', ['other_name'])
|
||||
|
||||
GlobalDec = namedtuple('GlobalDec', ['name', 'ty', 'body'])
|
||||
GNull = namedtuple('GNull', [])
|
||||
GGid = namedtuple('GGid', ['val'])
|
||||
GInt = namedtuple('GInt', ['val'])
|
||||
GString = namedtuple('GString', ['val'])
|
||||
GArray = namedtuple('GArray', ['entries'])
|
||||
GStruct = namedtuple('GStruct', ['fields'])
|
||||
|
||||
FunctionDec = namedtuple('FunctionDec', ['return_type', 'name', 'parameters', 'body'])
|
||||
FunctionBody = namedtuple('FunctionBody', ['first_block', 'named_blocks'])
|
||||
Block = namedtuple('Block', ['insns', 'terminator'])
|
||||
|
||||
Binop = namedtuple('Binop', ['bop', 'ty', 'left', 'right'])
|
||||
Alloca = namedtuple('Alloca', ['ty'])
|
||||
Load = namedtuple('Load', ['ty', 'oper'])
|
||||
Store = namedtuple('Store', ['ty', 'value', 'location'])
|
||||
Icmp = namedtuple('Icmp', ['cnd', 'ty', 'left', 'right'])
|
||||
Call = namedtuple('Call', ['return_ty', 'callee', 'arguments'])
|
||||
Bitcast = namedtuple('Bitcast', ['from_ty', 'oper', 'to_ty'])
|
||||
Gep = namedtuple('Gep', ['base_ty', 'oper_ty', 'oper', 'steps'])
|
||||
Zext = namedtuple('Zext', ['from_ty', 'oper', 'to_ty'])
|
||||
Ptrtoint = namedtuple('Ptrtoint', ['pointer_ty', 'oper', 'to_ty'])
|
||||
|
||||
Ret = namedtuple('Ret', ['ty', 'oper'])
|
||||
Br = namedtuple('Br', ['label'])
|
||||
Cbr = namedtuple('Cbr', ['ty', 'oper', 'then_label', 'else_label'])
|
||||
|
||||
|
||||
class LLVMParser(object):
|
||||
# Lexer
|
||||
|
||||
|
@ -22,10 +65,6 @@ class LLVMParser(object):
|
|||
]
|
||||
|
||||
reserved = {
|
||||
'if': 'IF',
|
||||
'then': 'THEN',
|
||||
'else': 'ELSE',
|
||||
'while': 'WHILE',
|
||||
'add': 'ADD',
|
||||
'sub': 'SUB',
|
||||
'mul': 'MUL',
|
||||
|
@ -70,8 +109,7 @@ class LLVMParser(object):
|
|||
'INT', 'STRING',
|
||||
'ASTERIX', 'LPAREN', 'RPAREN', 'LBRACK', 'RBRACK', 'LBRACE', 'RBRACE',
|
||||
'ASSIGN', 'COLON', 'COMMA',
|
||||
'PercentID', 'AtID', 'ID',
|
||||
'COMMENT'
|
||||
'PercentID', 'AtID', 'ID'
|
||||
]
|
||||
|
||||
t_ignore = ' \t'
|
||||
|
@ -87,18 +125,15 @@ class LLVMParser(object):
|
|||
t_COLON = r':'
|
||||
t_COMMA = r','
|
||||
|
||||
|
||||
def t_COMMENT(self, t):
|
||||
r'(;|declare|target).*'
|
||||
pass
|
||||
|
||||
|
||||
def t_newline(self, t):
|
||||
r'\n+'
|
||||
t.lexer.lineno += len(t.value)
|
||||
self.line_begin = t.lexpos
|
||||
|
||||
|
||||
def t_string_newline(self, t):
|
||||
r'\n'
|
||||
print("{}:{}: Newline is not allowed inside strings."
|
||||
|
@ -106,7 +141,6 @@ class LLVMParser(object):
|
|||
t.lexer.lineno += 1
|
||||
self.line_begin = t.lexpos
|
||||
|
||||
|
||||
def t_INT(self, t):
|
||||
r'\d+'
|
||||
t.value = int(t.value)
|
||||
|
@ -121,24 +155,20 @@ class LLVMParser(object):
|
|||
r'\\"'
|
||||
self.current_string += '"'
|
||||
|
||||
|
||||
def t_string_backslash(self, t):
|
||||
r'\\\\'
|
||||
self.current_string += '\\'
|
||||
|
||||
|
||||
def t_string_hex(self, t):
|
||||
r'\\[0-9a-f][0-9a-f]'
|
||||
code = int(t.value[1:], 16)
|
||||
self.current_string += chr(code)
|
||||
|
||||
|
||||
def t_string_singlebackslash(self, t):
|
||||
r'\\'
|
||||
print("{}:{}: Single backslash is not allowed inside strings."
|
||||
.format(t.lineno, t.lexpos - self.line_begin))
|
||||
|
||||
|
||||
def t_string_end(self, t):
|
||||
r'"'
|
||||
t.value = self.current_string
|
||||
|
@ -147,18 +177,15 @@ class LLVMParser(object):
|
|||
t.lexer.begin('INITIAL')
|
||||
return t
|
||||
|
||||
|
||||
def t_string_meat(self, t):
|
||||
r'.'
|
||||
self.current_string += t.value
|
||||
|
||||
|
||||
def t_ID(self, t):
|
||||
r'[a-zA-Z0-9_-]+'
|
||||
t.type = self.reserved.get(t.value, 'ID')
|
||||
return t
|
||||
|
||||
|
||||
def t_PercentID(self, t):
|
||||
r'%[a-zA-Z0-9_-]+'
|
||||
t.value = t.value[1:]
|
||||
|
@ -169,43 +196,159 @@ class LLVMParser(object):
|
|||
t.value = t.value[1:]
|
||||
return t
|
||||
|
||||
|
||||
def t_ANY_error(self, t):
|
||||
print("{}:{}: Illegal character '{}'"
|
||||
.format(t.lineno, t.lexpos - self.line_begin, t.value[0]))
|
||||
t.lexer.skip(1)
|
||||
|
||||
|
||||
# Parser
|
||||
def handle_top_decs(self, smth):
|
||||
# TODO: Implement
|
||||
return [], [], smth
|
||||
tdecs = {}
|
||||
gdecs = {}
|
||||
fdecs = {}
|
||||
for top_dec in smth:
|
||||
name = top_dec.name
|
||||
if isinstance(top_dec, FunctionDec):
|
||||
if name in fdecs.keys():
|
||||
print('ERROR: Function {} is declared more than once'
|
||||
.format(name))
|
||||
fdecs[name] = top_dec
|
||||
elif isinstance(top_dec, TypeDec):
|
||||
if name in tdecs.keys():
|
||||
print('ERROR: Type {} is declared more than once'
|
||||
.format(name))
|
||||
tdecs[name] = top_dec
|
||||
elif isinstance(top_dec, GlobalDec):
|
||||
if name in gdecs.keys():
|
||||
print('ERROR: Global {} is declared more than once'
|
||||
.format(name))
|
||||
gdecs[name] = top_dec
|
||||
else:
|
||||
print('Parser Error: Unknown top level declaration {}'
|
||||
.format(top_dec))
|
||||
return tdecs, gdecs, fdecs
|
||||
|
||||
def p_program(self, p):
|
||||
'program : top_decs'
|
||||
tdecls, gdecls, fdecls = self.handle_top_decs(p[1])
|
||||
p[0] = Program(tdecls, gdecls, fdecls)
|
||||
|
||||
def p_topdecs_fdec(self, p):
|
||||
'top_decs : fdec top_decs'
|
||||
def p_topdecs_some(self, p):
|
||||
'''top_decs : tdec top_decs
|
||||
| gdec top_decs
|
||||
| fdec top_decs'''
|
||||
p[0] = [p[1]] + p[2]
|
||||
|
||||
def p_topdecs_empty(self, p):
|
||||
'top_decs : '
|
||||
p[0] = []
|
||||
|
||||
def p_tdec(self, p):
|
||||
'tdec : PercentID ASSIGN TYPE ty'
|
||||
p[0] = TypeDec(p[1], p[4])
|
||||
|
||||
def p_ty_void(self, p):
|
||||
'ty : VOID'
|
||||
p[0] = SimpleType.Void
|
||||
|
||||
def p_ty_i1(self, p):
|
||||
'ty : I1'
|
||||
p[0] = SimpleType.I1
|
||||
|
||||
def p_ty_i8(self, p):
|
||||
'ty : I8'
|
||||
p[0] = SimpleType.I8
|
||||
|
||||
def p_ty_i32(self, p):
|
||||
'ty : I32'
|
||||
p[0] = SimpleType.I32
|
||||
|
||||
def p_ty_i64(self, p):
|
||||
'ty : I64'
|
||||
p[0] = SimpleType.I64
|
||||
|
||||
def p_ty_ptr(self, p):
|
||||
'ty : ty ASTERIX'
|
||||
p[0] = PointerType(p[1])
|
||||
|
||||
def p_ty_struct(self, p):
|
||||
'ty : LBRACE ty_list RBRACE'
|
||||
p[0] = StructType(p[2])
|
||||
|
||||
def p_ty_array(self, p):
|
||||
'ty : LBRACK INT ID ty RBRACK'
|
||||
if p[3] == 'x':
|
||||
p[0] = ArrayType(p[2], p[4])
|
||||
else:
|
||||
print('Invalid name in array definition: {}\n It should have been an x.'
|
||||
.format(p[3]))
|
||||
raise SyntaxError
|
||||
|
||||
def p_ty_fun(self, p):
|
||||
'ty : ty LPAREN ty_list RPAREN'
|
||||
p[0] = FunctionType(p[1], p[3])
|
||||
|
||||
def p_ty_id(self, p):
|
||||
'ty : PercentID'
|
||||
p[0] = NamedType(p[1])
|
||||
|
||||
def p_ty_list_single(self, p):
|
||||
'ty_list : ty'
|
||||
p[0] = [p[1]]
|
||||
|
||||
def p_ty_list_multiple(self, p):
|
||||
'ty_list : ty COMMA ty_list'
|
||||
p[0] = [p[1]] + p[3]
|
||||
|
||||
def p_ty_list_empty(self, p):
|
||||
'ty_list : '
|
||||
p[0] = []
|
||||
|
||||
def p_gdec(self, p):
|
||||
'gdec : AtID ASSIGN GLOBAL ty ginit'
|
||||
p[0] = GlobalDec(p[1], p[4], p[5])
|
||||
|
||||
def p_ginit_null(self, p):
|
||||
'ginit : NULL'
|
||||
p[0] = GNull()
|
||||
|
||||
def p_ginit_id(self, p):
|
||||
'ginit : AtID'
|
||||
p[0] = GGid(p[1])
|
||||
|
||||
def p_ginit_int(self, p):
|
||||
'ginit : INT'
|
||||
p[0] = GInt(p[1])
|
||||
|
||||
def p_ginit_string(self, p):
|
||||
'ginit : STRING'
|
||||
p[0] = GString(p[1])
|
||||
|
||||
def p_ginit_array(self, p):
|
||||
'ginit : LBRACK ty_ginit_list RBRACK'
|
||||
# TODO This syntax seems weird
|
||||
p[0] = GArray(p[2])
|
||||
|
||||
def p_ginit_struct(self, p):
|
||||
'ginit : LBRACE ty_ginit_list RBRACE'
|
||||
p[0] = GStruct(p[2])
|
||||
|
||||
def p_ty_ginit_list_single(self, p):
|
||||
'ty_ginit_list : ty ginit'
|
||||
p[0] = [(p[1], p[2])]
|
||||
|
||||
def p_ty_ginit_list_multiple(self, p):
|
||||
'ty_ginit_list : ty ginit COMMA ty_ginit_list'
|
||||
p[0] = [(p[1], p[2])] + p[4]
|
||||
|
||||
def p_ty_ginit_list_empty(self, p):
|
||||
'ty_ginit_list : '
|
||||
p[0] = []
|
||||
|
||||
def p_fdec(self, p):
|
||||
'fdec : DEFINE ty AtID LPAREN ty_id_list RPAREN LBRACE fbody RBRACE'
|
||||
p[0] = FunctionDec(p[2], p[3], p[5], p[8])
|
||||
|
||||
def p_ty_simple(self, p):
|
||||
'''ty : VOID
|
||||
| I1
|
||||
| I8
|
||||
| I32
|
||||
| I64'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_ty_id_list_single(self, p):
|
||||
'ty_id_list : ty PercentID'
|
||||
p[0] = [(p[1], p[2])]
|
||||
|
@ -218,25 +361,155 @@ class LLVMParser(object):
|
|||
'ty_id_list : '
|
||||
p[0] = []
|
||||
|
||||
def p_fbody_multiple_blocks(self, p):
|
||||
'fbody : block named_block_list'
|
||||
p[0] = FunctionBody(p[1], p[2])
|
||||
|
||||
def p_fbody_one_block(self, p):
|
||||
'fbody : block'
|
||||
p[0] = FunctionBody(p[1], [])
|
||||
|
||||
def p_block_insns_terminator(self, p):
|
||||
'block : insns terminator'
|
||||
p[0] = Block(p[1], p[2])
|
||||
|
||||
def p_block_terminator(self, p):
|
||||
'block : terminator'
|
||||
p[0] = Block([], p[1])
|
||||
|
||||
def p_insns_single(self, p):
|
||||
'insns : optionally_named_insn'
|
||||
p[0] = [p[1]]
|
||||
|
||||
def p_insns_multiple(self, p):
|
||||
'insns : optionally_named_insn insns'
|
||||
p[0] = [p[1]] + p[2]
|
||||
|
||||
def p_optionally_named_insn_some(self, p):
|
||||
'optionally_named_insn : PercentID ASSIGN insn'
|
||||
p[0] = (p[1], p[3])
|
||||
|
||||
def p_optionally_named_insn_none(self, p):
|
||||
'optionally_named_insn : insn'
|
||||
p[0] = (None, p[1])
|
||||
|
||||
def p_insn_bop(self, p):
|
||||
'insn : bop ty operand COMMA operand'
|
||||
p[0] = Binop(p[1], p[2], p[3], p[5])
|
||||
|
||||
def p_insn_alloca(self, p):
|
||||
'insn : ALLOCA ty'
|
||||
p[0] = Alloca(p[2])
|
||||
|
||||
def p_insn_load(self, p):
|
||||
'insn : LOAD ty COMMA ty operand'
|
||||
p[0] = Load(p[2], p[5])
|
||||
|
||||
def p_insn_store(self, p):
|
||||
'insn : STORE ty operand COMMA ty operand'
|
||||
p[0] = Store(p[2], p[3], p[6])
|
||||
|
||||
def p_insn_icmp(self, p):
|
||||
'insn : ICMP cnd ty operand COMMA operand'
|
||||
p[0] = Icmp(p[2], p[3], p[4], p[6])
|
||||
|
||||
def p_insn_call(self, p):
|
||||
'insn : CALL ty operand LPAREN ty_operand_list RPAREN'
|
||||
p[0] = Call(p[2], p[3], p[5])
|
||||
|
||||
def p_insn_call_empty(self, p):
|
||||
'insn : CALL ty operand LPAREN RPAREN'
|
||||
p[0] = Call(p[2], p[3], [])
|
||||
|
||||
def p_insn_bitcast(self, p):
|
||||
'insn : BITCAST ty operand TO ty'
|
||||
p[0] = Bitcast(p[2], p[3], p[5])
|
||||
|
||||
def p_insn_gep(self, p):
|
||||
'insn : GETELEMENTPTR ty COMMA ty operand COMMA ty_operand_list'
|
||||
p[0] = Gep(p[2], p[4], p[5], p[7])
|
||||
|
||||
def p_insn_gep_empty(self, p):
|
||||
'insn : GETELEMENTPTR ty COMMA ty operand'
|
||||
p[0] = Gep(p[2], p[4], p[5], [])
|
||||
|
||||
def p_insn_zext(self, p):
|
||||
'insn : ZEXT ty operand TO ty'
|
||||
p[0] = Zext(p[2], p[3], p[4])
|
||||
|
||||
def p_insn_ptrtoint(self, p):
|
||||
'insn : PTRTOINT ty ASTERIX operand TO ty'
|
||||
p[0] = Ptrtoint(p[2], p[4], p[6])
|
||||
|
||||
def p_bop(self, p):
|
||||
'''bop : ADD
|
||||
| SUB
|
||||
| MUL
|
||||
| SHL
|
||||
| LSHR
|
||||
| ASHR
|
||||
| AND
|
||||
| OR
|
||||
| XOR
|
||||
| SDIV'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_cnd(self, p):
|
||||
'''cnd : EQ
|
||||
| NE
|
||||
| SLT
|
||||
| SLE
|
||||
| SGT
|
||||
| SGE'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_ty_operand_list_single(self, p):
|
||||
'ty_operand_list : ty operand'
|
||||
p[0] = [(p[1], p[2])]
|
||||
|
||||
def p_ty_operand_list_multiple(self, p):
|
||||
'ty_operand_list : ty operand COMMA ty_operand_list'
|
||||
p[0] = [(p[1], p[2])] + p[4]
|
||||
|
||||
def p_terminator_ret_void(self, p):
|
||||
'terminator : RET VOID'
|
||||
p[0] = Ret(SimpleType.Void, None)
|
||||
|
||||
def p_terminator_ret_oper(self, p):
|
||||
'terminator : RET ty operand'
|
||||
p[0] = (p[2], p[3])
|
||||
p[0] = Ret(p[2], p[3])
|
||||
|
||||
def p_terminator_branch(self, p):
|
||||
'terminator : BR LABEL PercentID'
|
||||
p[0] = Br(p[3])
|
||||
|
||||
def p_terminator_conditional_branch(self, p):
|
||||
'terminator : BR ty operand COMMA LABEL PercentID COMMA LABEL PercentID'
|
||||
p[0] = Cbr(p[2], p[3], p[6], p[9])
|
||||
|
||||
def p_operand(self, p):
|
||||
'''operand : NULL
|
||||
| INT
|
||||
| AtID
|
||||
| PercentID'''
|
||||
# TODO: distinguish ids
|
||||
p[0] = p[1]
|
||||
|
||||
def p_named_block_list_single(self, p):
|
||||
'named_block_list : ID COLON block'
|
||||
p[0] = [(p[1], p[3])]
|
||||
|
||||
def p_named_block_list_multiple(self, p):
|
||||
'named_block_list : ID COLON block named_block_list'
|
||||
p[0] = [(p[1], p[3])] + p[4]
|
||||
|
||||
def p_error(self, t):
|
||||
if t is None:
|
||||
print('Syntax error at end of file')
|
||||
else:
|
||||
print('Syntax error at token {}'
|
||||
.format(t))
|
||||
|
||||
def __init__(self):
|
||||
self.tokens += self.reserved.values()
|
||||
|
||||
|
@ -248,12 +521,14 @@ class LLVMParser(object):
|
|||
result = self.parser.parse(data, lexer=self.lexer)
|
||||
print(result)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
p = LLVMParser()
|
||||
p.build()
|
||||
data = r'''
|
||||
define void @tigermain (i64 %U_mainSL_8, i64 %U_mainDummy_9) {
|
||||
ret i64 8
|
||||
%a = add i64 3, 5 ; please be 8
|
||||
ret i64 %a
|
||||
}
|
||||
'''
|
||||
p.test(data)
|
||||
|
|
Loading…
Reference in New Issue
Block a user