llvm--emulator/parser.py

"""Parser for LLVM--."""

from collections import namedtuple

import ply.lex as lex
import ply.yacc as yacc

# Namedtuples for storing AST
Program = namedtuple('Program', ['tdecls', 'gdecls', 'fdecls'])
FunctionDec = namedtuple('FunctionDec', ['return_type', 'name', 'parameters', 'body'])
FunctionBody = namedtuple('FunctionBody', ['first_block', 'named_blocks'])
Block = namedtuple('Block', ['insns', 'terminator'])

class LLVMParser(object):
    # Lexer

    current_string = ""
    line_begin = 0

    states = [
        ('string', 'exclusive')
    ]

    reserved = {
        'if': 'IF',
        'then': 'THEN',
        'else': 'ELSE',
        'while': 'WHILE',
        'add': 'ADD',
        'sub': 'SUB',
        'mul': 'MUL',
        'shl': 'SHL',
        'lshr': 'LSHR',
        'ashr': 'ASHR',
        'and': 'AND',
        'or': 'OR',
        'xor': 'XOR',
        'sdiv': 'SDIV',
        'eq': 'EQ',
        'ne': 'NE',
        'slt': 'SLT',
        'sle': 'SLE',
        'sgt': 'SGT',
        'sge': 'SGE',
        'alloca': 'ALLOCA',
        'load': 'LOAD',
        'store': 'STORE',
        'icmp': 'ICMP',
        'call': 'CALL',
        'bitcast': 'BITCAST',
        'getelementptr': 'GETELEMENTPTR',
        'zext': 'ZEXT',
        'ptrtoint': 'PTRTOINT',
        'ret': 'RET',
        'br': 'BR',
        'label': 'LABEL',
        'define': 'DEFINE',
        'null': 'NULL',
        'global': 'GLOBAL',
        'type': 'TYPE',
        'to': 'TO',
        'void': 'VOID',
        'i1': 'I1',
        'i8': 'I8',
        'i32': 'I32',
        'i64': 'I64',
       }

    tokens = [
        'INT', 'STRING',
        'ASTERIX', 'LPAREN', 'RPAREN', 'LBRACK', 'RBRACK', 'LBRACE', 'RBRACE',
        'ASSIGN', 'COLON', 'COMMA',
        'PercentID', 'AtID', 'ID',
        'COMMENT'
    ]

    t_ignore = ' \t'
    t_string_ignore = ''
    t_ASTERIX = r'\*'
    t_LPAREN = r'\('
    t_RPAREN = r'\)'
    t_LBRACK = r'\['
    t_RBRACK = r'\]'
    t_LBRACE = r'{'
    t_RBRACE = r'}'
    t_ASSIGN = r'='
    t_COLON = r':'
    t_COMMA = r','


    def t_COMMENT(self, t):
        r'(;|declare|target).*'
        pass


    def t_newline(self, t):
        r'\n+'
        t.lexer.lineno += len(t.value)
        self.line_begin = t.lexpos


    def t_string_newline(self, t):
        r'\n'
        print("{}:{}: Newline is not allowed inside strings."
              .format(t.lineno, t.lexpos - self.line_begin))
        t.lexer.lineno += 1
        self.line_begin = t.lexpos


    def t_INT(self, t):
        r'\d+'
        t.value = int(t.value)
        return t

    def t_begin_string(self, t):
        r'c\"'
        t.lexer.begin('string')
        self.current_string = ""

    def t_string_doublequote(self, t):
        r'\\"'
        self.current_string += '"'


    def t_string_backslash(self, t):
        r'\\\\'
        self.current_string += '\\'


    def t_string_hex(self, t):
        r'\\[0-9a-f][0-9a-f]'
        code = int(t.value[1:], 16)
        self.current_string += chr(code)


    def t_string_singlebackslash(self, t):
        r'\\'
        print("{}:{}: Single backslash is not allowed inside strings."
              .format(t.lineno, t.lexpos - self.line_begin))


    def t_string_end(self, t):
        r'"'
        t.value = self.current_string
        self.current_string = ""
        t.type = "STRING"
        t.lexer.begin('INITIAL')
        return t


    def t_string_meat(self, t):
        r'.'
        self.current_string += t.value


    def t_ID(self, t):
        r'[a-zA-Z0-9_-]+'
        t.type = self.reserved.get(t.value, 'ID')
        return t


    def t_PercentID(self, t):
        r'%[a-zA-Z0-9_-]+'
        t.value = t.value[1:]
        return t

    def t_AtID(self, t):
        r'@[a-zA-Z0-9_-]+'
        t.value = t.value[1:]
        return t


    def t_ANY_error(self, t):
        print("{}:{}: Illegal character '{}'"
              .format(t.lineno, t.lexpos - self.line_begin, t.value[0]))
        t.lexer.skip(1)


    # Parser
    def handle_top_decs(self, smth):
        # TODO: Implement
        return [], [], smth

    def p_program(self, p):
        'program : top_decs'
        tdecls, gdecls, fdecls = self.handle_top_decs(p[1])
        p[0] = Program(tdecls, gdecls, fdecls)

    def p_topdecs_fdec(self, p):
        'top_decs : fdec top_decs'
        p[0] = [p[1]] + p[2]

    def p_topdecs_empty(self, p):
        'top_decs : '
        p[0] = []

    def p_fdec(self, p):
        'fdec : DEFINE ty AtID LPAREN ty_id_list RPAREN LBRACE fbody RBRACE'
        p[0] = FunctionDec(p[2], p[3], p[5], p[8])

    def p_ty_simple(self, p):
        '''ty : VOID
            | I1
            | I8
            | I32
            | I64'''
        p[0] = p[1]

    def p_ty_id_list_single(self, p):
        'ty_id_list : ty PercentID'
        p[0] = [(p[1], p[2])]

    def p_ty_id_list_multiple(self, p):
        'ty_id_list : ty PercentID COMMA ty_id_list'
        p[0] = [(p[1], p[2])] + p[4]

    def p_ty_id_list_empty(self, p):
        'ty_id_list : '
        p[0] = []

    def p_fbody_one_block(self, p):
        'fbody : block'
        p[0] = FunctionBody(p[1], [])

    def p_block_terminator(self, p):
        'block : terminator'
        p[0] = Block([], p[1])

    def p_terminator_ret_oper(self, p):
        'terminator : RET ty operand'
        p[0] = (p[2], p[3])

    def p_operand(self, p):
        '''operand : NULL
            | INT
            | AtID
            | PercentID'''
        p[0] = p[1]

    def __init__(self):
        self.tokens += self.reserved.values()

    def build(self, **kwargs):
        self.lexer = lex.lex(module=self, **kwargs)
        self.parser = yacc.yacc(module=self, **kwargs)

    def test(self, data):
        result = self.parser.parse(data, lexer=self.lexer)
        print(result)

if __name__ == '__main__':
    p = LLVMParser()
    p.build()
    data = r'''
define void @tigermain (i64 %U_mainSL_8, i64 %U_mainDummy_9) {
 ret i64 8
}
    '''
    p.test(data)