llvm--emulator/parser.py
2017-10-29 02:56:26 +02:00

260 lines
5.8 KiB
Python

"""Parser for LLVM--."""
from collections import namedtuple
import ply.lex as lex
import ply.yacc as yacc
# Namedtuples for storing AST
Program = namedtuple('Program', ['tdecls', 'gdecls', 'fdecls'])
FunctionDec = namedtuple('FunctionDec', ['return_type', 'name', 'parameters', 'body'])
FunctionBody = namedtuple('FunctionBody', ['first_block', 'named_blocks'])
Block = namedtuple('Block', ['insns', 'terminator'])
class LLVMParser(object):
# Lexer
current_string = ""
line_begin = 0
states = [
('string', 'exclusive')
]
reserved = {
'if': 'IF',
'then': 'THEN',
'else': 'ELSE',
'while': 'WHILE',
'add': 'ADD',
'sub': 'SUB',
'mul': 'MUL',
'shl': 'SHL',
'lshr': 'LSHR',
'ashr': 'ASHR',
'and': 'AND',
'or': 'OR',
'xor': 'XOR',
'sdiv': 'SDIV',
'eq': 'EQ',
'ne': 'NE',
'slt': 'SLT',
'sle': 'SLE',
'sgt': 'SGT',
'sge': 'SGE',
'alloca': 'ALLOCA',
'load': 'LOAD',
'store': 'STORE',
'icmp': 'ICMP',
'call': 'CALL',
'bitcast': 'BITCAST',
'getelementptr': 'GETELEMENTPTR',
'zext': 'ZEXT',
'ptrtoint': 'PTRTOINT',
'ret': 'RET',
'br': 'BR',
'label': 'LABEL',
'define': 'DEFINE',
'null': 'NULL',
'global': 'GLOBAL',
'type': 'TYPE',
'to': 'TO',
'void': 'VOID',
'i1': 'I1',
'i8': 'I8',
'i32': 'I32',
'i64': 'I64',
}
tokens = [
'INT', 'STRING',
'ASTERIX', 'LPAREN', 'RPAREN', 'LBRACK', 'RBRACK', 'LBRACE', 'RBRACE',
'ASSIGN', 'COLON', 'COMMA',
'PercentID', 'AtID', 'ID',
'COMMENT'
]
t_ignore = ' \t'
t_string_ignore = ''
t_ASTERIX = r'\*'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACK = r'\['
t_RBRACK = r'\]'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_ASSIGN = r'='
t_COLON = r':'
t_COMMA = r','
def t_COMMENT(self, t):
r'(;|declare|target).*'
pass
def t_newline(self, t):
r'\n+'
t.lexer.lineno += len(t.value)
self.line_begin = t.lexpos
def t_string_newline(self, t):
r'\n'
print("{}:{}: Newline is not allowed inside strings."
.format(t.lineno, t.lexpos - self.line_begin))
t.lexer.lineno += 1
self.line_begin = t.lexpos
def t_INT(self, t):
r'\d+'
t.value = int(t.value)
return t
def t_begin_string(self, t):
r'c\"'
t.lexer.begin('string')
self.current_string = ""
def t_string_doublequote(self, t):
r'\\"'
self.current_string += '"'
def t_string_backslash(self, t):
r'\\\\'
self.current_string += '\\'
def t_string_hex(self, t):
r'\\[0-9a-f][0-9a-f]'
code = int(t.value[1:], 16)
self.current_string += chr(code)
def t_string_singlebackslash(self, t):
r'\\'
print("{}:{}: Single backslash is not allowed inside strings."
.format(t.lineno, t.lexpos - self.line_begin))
def t_string_end(self, t):
r'"'
t.value = self.current_string
self.current_string = ""
t.type = "STRING"
t.lexer.begin('INITIAL')
return t
def t_string_meat(self, t):
r'.'
self.current_string += t.value
def t_ID(self, t):
r'[a-zA-Z0-9_-]+'
t.type = self.reserved.get(t.value, 'ID')
return t
def t_PercentID(self, t):
r'%[a-zA-Z0-9_-]+'
t.value = t.value[1:]
return t
def t_AtID(self, t):
r'@[a-zA-Z0-9_-]+'
t.value = t.value[1:]
return t
def t_ANY_error(self, t):
print("{}:{}: Illegal character '{}'"
.format(t.lineno, t.lexpos - self.line_begin, t.value[0]))
t.lexer.skip(1)
# Parser
def handle_top_decs(self, smth):
# TODO: Implement
return [], [], smth
def p_program(self, p):
'program : top_decs'
tdecls, gdecls, fdecls = self.handle_top_decs(p[1])
p[0] = Program(tdecls, gdecls, fdecls)
def p_topdecs_fdec(self, p):
'top_decs : fdec top_decs'
p[0] = [p[1]] + p[2]
def p_topdecs_empty(self, p):
'top_decs : '
p[0] = []
def p_fdec(self, p):
'fdec : DEFINE ty AtID LPAREN ty_id_list RPAREN LBRACE fbody RBRACE'
p[0] = FunctionDec(p[2], p[3], p[5], p[8])
def p_ty_simple(self, p):
'''ty : VOID
| I1
| I8
| I32
| I64'''
p[0] = p[1]
def p_ty_id_list_single(self, p):
'ty_id_list : ty PercentID'
p[0] = [(p[1], p[2])]
def p_ty_id_list_multiple(self, p):
'ty_id_list : ty PercentID COMMA ty_id_list'
p[0] = [(p[1], p[2])] + p[4]
def p_ty_id_list_empty(self, p):
'ty_id_list : '
p[0] = []
def p_fbody_one_block(self, p):
'fbody : block'
p[0] = FunctionBody(p[1], [])
def p_block_terminator(self, p):
'block : terminator'
p[0] = Block([], p[1])
def p_terminator_ret_oper(self, p):
'terminator : RET ty operand'
p[0] = (p[2], p[3])
def p_operand(self, p):
'''operand : NULL
| INT
| AtID
| PercentID'''
p[0] = p[1]
def __init__(self):
self.tokens += self.reserved.values()
def build(self, **kwargs):
self.lexer = lex.lex(module=self, **kwargs)
self.parser = yacc.yacc(module=self, **kwargs)
def test(self, data):
result = self.parser.parse(data, lexer=self.lexer)
print(result)
if __name__ == '__main__':
p = LLVMParser()
p.build()
data = r'''
define void @tigermain (i64 %U_mainSL_8, i64 %U_mainDummy_9) {
ret i64 8
}
'''
p.test(data)