llvm--emulator/llvm_emulator/stepper.py

from enum import Enum

import llvm_emulator.ll as ll


def TODO(msg):
    print('TODO: not implemented yet at {}'
          .format(msg))


def err(msg):
    print('ERROR: {}'
          .format(msg))


def warn(msg):
    print('WARNING: {}'
          .format(msg))


class Garbage(Enum):
    GARBAGE = '<<Unitialized memory>>'

    def __repr__(self):
        return '<<Garbage>>'


def step(insns, terminator, blocks, stack_frames, ssa_env, global_env, heap,
         tdecs, fdecs, call_res):
    if len(insns) == 0:
        return terminate(terminator, blocks, stack_frames, ssa_env, global_env, heap,
                         call_res)
    ssa_target, next_insn = insns[0]
    insns_rest = insns[1:]

    # TODO
    print('Evaluating {}'
          .format(ll.insn2s(next_insn)))

    res = None
    if isinstance(next_insn, ll.Binop):
        bop = next_insn.bop
        left = next_insn.left
        right = next_insn.right
        left_v = eval_oper(left, ssa_env, global_env)
        right_v = eval_oper(right, ssa_env, global_env)
        res = eval_binop(bop, left_v, right_v)

        # TODO
        print('{} {}, {}'
              .format(bop, left_v, right_v))
    elif isinstance(next_insn, ll.Alloca):
        ty = next_insn.ty
        base_ty = ty2base_ty(ty, tdecs)
        size = base_ty2size(base_ty)

        # TODO
        print('alloca {}  -->  allocating {} cells'
              .format(ll.ty2s(base_ty), size))

        ptr = len(heap)
        for i in range(max(size, 1)):
            heap.append(Garbage.GARBAGE)

        res = ptr
    elif isinstance(next_insn, ll.Load):
        ty = next_insn.ty
        base_ty = ty2base_ty(ty, tdecs)
        size = base_ty2size(base_ty)
        location = next_insn.location
        location_v = eval_oper(location, ssa_env, global_env)

        # TODO
        print('load heap[{}]'
              .format(location_v))

        if size != 1:
            err(('This emulator cannot load objects larger than 1 cell.'
                 ' Current size is {}')
                .format(size))

        if location_v == 0:
            err('You are not allowed to read from location 0')
            res = 0
        else:
            res = heap[location_v]
    elif isinstance(next_insn, ll.Store):
        ty = next_insn.ty
        base_ty = ty2base_ty(ty, tdecs)
        size = base_ty2size(base_ty)
        value = next_insn.value
        location = next_insn.location

        value_v = eval_oper(value, ssa_env, global_env)
        location_v = eval_oper(location, ssa_env, global_env)

        # TODO
        print('heap[{}] <- {}'
              .format(location_v, value_v))
        if location_v == 0:
            err('You are not allowed to store at location 0 (Null)')
        elif size == 1:
            heap[location_v] = value_v
        else:
            err(('This emulator cannot store objects larger than 1 cell.'
                 ' Current size is {}')
                .format(size))
    elif isinstance(next_insn, ll.Icmp):
        cnd = next_insn.cnd
        left = next_insn.left
        right = next_insn.right
        left_v = eval_oper(left, ssa_env, global_env)
        right_v = eval_oper(right, ssa_env, global_env)
        res = eval_icmp(cnd, left_v, right_v)

        # TODO
        print('icmp {} {}, {}'
              .format(cnd,  left_v, right_v))
    elif isinstance(next_insn, ll.Call):
        callee = next_insn.callee
        arguments = next_insn.arguments

        if not isinstance(callee, ll.Gid):
            err('Cannot call anything but global identifiers: {}'
                .format(ll.oper2s(callee)))
            return insns_rest, terminator, blocks, stack_frames, ssa_env, heap, call_res

        arguments_v = [eval_oper(oper, ssa_env, global_env)
                       for ty, oper in arguments]

        try:
            function = fdecs[callee.val]
        except KeyError:
            err('Could not find function {} in environment:\n{}'
                .format(callee.val, fdecs.keys()))
            return insns_rest, terminator, blocks, stack_frames, ssa_env, heap, call_res

        parameters = function.parameters
        print('call @{} ({})'
              .format(callee.val,
                      ', '.join('%{} <- {}'.format(par[1], arg)
                                for par, arg in zip(parameters, arguments_v))))
        child_insns = function.body.first_block.insns
        child_terminator = function.body.first_block.terminator
        child_blocks = function.body.named_blocks
        child_stack_frames = [(insns_rest, terminator, blocks, ssa_env)] + stack_frames
        child_ssa_env = {par[1]: arg for par, arg in zip(parameters, arguments_v)}
        child_heap = heap
        child_call_res = [ssa_target] + call_res
        return (child_insns, child_terminator, child_blocks, child_stack_frames,
                child_ssa_env, child_heap, child_call_res)

    elif isinstance(next_insn, ll.Bitcast):
        oper = next_insn.oper
        from_ty = next_insn.from_ty
        to_ty = next_insn.to_ty
        oper_v = eval_oper(oper, ssa_env, global_env)
        res = oper_v

        # TODO
        print('bitcast {} {} to {}'
              .format(ll.ty2s(from_ty), oper_v, ll.ty2s(to_ty)))
    elif isinstance(next_insn, ll.Gep):
        res = 0
        base_ty = next_insn.base_ty
        oper_ty = next_insn.oper_ty
        oper = next_insn.oper
        steps = next_insn.steps

        actual_base_ty = ty2base_ty(base_ty, tdecs)
        actual_oper_ty = ty2base_ty(oper_ty, tdecs)

        if not isinstance(actual_oper_ty, ll.PointerType):
            err('Type of main operand to getelementptr must be a pointer type. It was {}'
                .format(ll.ty2s(actual_oper_ty)))
        elif actual_base_ty != actual_oper_ty.inner_ty:
            err(('Type of the main operand does not match the type getelementptr'
                 ' navigates through.\n'
                 '  Getelementptr type: {}\n'
                 '  Operand type:       {}')
                .format(ll.ty2s(actual_base_ty), ll.ty2s(actual_oper_ty.inner_ty)))
        else:
            oper_v = eval_oper(oper, ssa_env, global_env)
            gep_res, formula = handle_gep(oper_v, actual_base_ty, steps, ssa_env,
                                          global_env)
            res = gep_res

            # TODO
            print('Gep formula: {}'
                  .format(formula))
    elif isinstance(next_insn, ll.Zext):
        oper = next_insn.oper
        from_ty = next_insn.from_ty
        to_ty = next_insn.to_ty
        oper_v = eval_oper(oper, ssa_env, global_env)
        res = oper_v

        # TODO
        print('zext {} {} to {}'
              .format(ll.ty2s(from_ty), oper_v, ll.ty2s(to_ty)))
    elif isinstance(next_insn, ll.Ptrtoint):
        oper = next_insn.oper
        pointer_ty = next_insn.pointer_ty
        to_ty = next_insn.to_ty
        oper_v = eval_oper(oper, ssa_env, global_env)
        res = oper_v

        # TODO
        print('ptrtoint {}* {} to {}'
              .format(ll.ty2s(pointer_ty), oper_v, ll.ty2s(to_ty)))
    elif isinstance(next_insn, ll.CallResult):
        res = next_insn.val
    else:
        err('Unknown LLVM instruction: {}'
            .format(next_insn))

    if ssa_target is not None:
        if ssa_target in ssa_env:
            err('Cannot assign to variable twice: {}'
                .format(ssa_target))
        elif res is None:
            err('Cannot assign empty value to %{}'
                .format(ssa_target))
        else:
            # TODO
            print('%{} <- {}'
                  .format(ssa_target, res))
            ssa_env[ssa_target] = res

    return insns_rest, terminator, blocks, stack_frames, ssa_env, heap, call_res


def terminate(terminator, blocks, stack_frames, ssa_env, global_env, heap, call_res):
    def clear_block_from_ssa_env(insns, ssa_env):
        for (id, insn) in insns:
            if id is not None and id in ssa_env:
                del ssa_env[id]

    print('Evaluating {}'
          .format(ll.terminator2s(terminator)))

    if isinstance(terminator, ll.Ret):
        oper = terminator.oper
        if oper is None:
            oper_v = None
        else:
            oper_v = eval_oper(oper, ssa_env, global_env)

        # TODO
        print('Returning {}'
              .format(oper_v))

        if len(stack_frames) == 0:
            new_insns = [(None, ll.CallResult(oper_v))]
            new_terminator = None
            new_blocks = {}
            new_ssa_env = ssa_env
            new_stack_frames = []
            new_call_res = []
        else:
            new_insns, new_terminator, new_blocks, new_ssa_env = stack_frames[0]
            new_insns = [(call_res[0], ll.CallResult(oper_v))] + new_insns
            new_stack_frames = stack_frames[1:]
            new_call_res = call_res[1:]
        return (new_insns, new_terminator, new_blocks, new_stack_frames,
                new_ssa_env, heap, new_call_res)
    elif isinstance(terminator, ll.Br):
        label = terminator.label
        next_block = blocks[label]
        new_insns = next_block.insns
        new_terminator = next_block.terminator

        # TODO: Might need to find a better solution as we will ignore
        # multiple assignments, if they are spread over multiple
        # blocks.
        clear_block_from_ssa_env(new_insns, ssa_env)

        # TODO
        print('Jumping unconditionally to {}'
              .format(label))

        return (new_insns, new_terminator, blocks, stack_frames,
                ssa_env, heap, call_res)
    elif isinstance(terminator, ll.Cbr):
        ty = terminator.ty
        if ty != ll.SimpleType.I1:
            warn('Branching based on value of type {}. You ought to branch on {}'
                 .format(ll.ty2s(ty), ll.ty2s(ll.SimpleType.I1)))
        operand = terminator.oper
        operand_v = eval_oper(operand, ssa_env, global_env)

        if operand_v:
            label = terminator.then_label
        else:
            label = terminator.else_label

        next_block = blocks[label]
        new_insns = next_block.insns
        new_terminator = next_block.terminator
        clear_block_from_ssa_env(new_insns, ssa_env)

        # TODO
        print('Operand was {}. Branching to {}'
              .format(operand_v, label))

        return (new_insns, new_terminator, blocks, stack_frames,
                ssa_env, heap, call_res)
    else:
        err('Unknown LLVM terminator: {}'
            .format(terminator))


def eval_oper(operand, ssa_env, global_env):
    if isinstance(operand, ll.Null):
        return 0
    elif isinstance(operand, ll.Const):
        return operand.val
    elif isinstance(operand, ll.Gid):
        gid = operand.val
        try:
            return global_env[gid]
        except KeyError:
            err('Unable to find @{} in environment:\n{}'
                .format(global_env))
            return 0
    elif isinstance(operand, ll.Id):
        id = operand.val
        try:
            return ssa_env[id]
        except KeyError:
            err('Unable to find %{} in environment:\n{}'
                .format(id, ssa_env))
            return 0


def eval_binop(bop, left, right):
    if bop == 'add':
        return left + right
    elif bop == 'sub':
        return left - right
    elif bop == 'mul':
        return left * right
    elif bop == 'sdiv':
        return left // right
    elif bop == 'shl':
        return left << right
    elif bop == 'ashr':
        return left >> right
    elif bop == 'lshr':
        return (left >> right) % 0x10000000000000000
    elif bop == 'and':
        return left & right
    elif bop == 'or':
        return left | right
    elif bop == 'xor':
        return left ^ right
    else:
        err('Unknown LLVM Binary operator: {}'
            .format(bop))


def eval_icmp(cnd, left, right):
    if cnd == 'eq':
        return left == right
    elif cnd == 'ne':
        return left != right
    elif cnd == 'slt':
        return left < right
    elif cnd == 'sle':
        return left <= right
    elif cnd == 'sgt':
        return left > right
    elif cnd == 'sge':
        return left >= right
    else:
        err('eval_icmp: Unknown cnd: {}'
            .format(cnd))
        return 0


def handle_gep(starting_location, starting_type, starting_steps, ssa_env, global_env):
    def visit(current_location, current_type, current_steps, current_formula):
        if len(current_steps) == 0:
            return current_location, current_formula
        else:
            s_ty, s_oper = current_steps[0]
            s_oper_v = eval_oper(s_oper, ssa_env, global_env)
            next_steps = current_steps[1:]
            if isinstance(current_type, ll.StructType):
                if not isinstance(s_oper, ll.Const):
                    err('Index into struct must be a constant. It was: {}'
                        .format(ll.oper2s(s_oper)))
                    return current_location, current_formula + ' + ???'
                jumps = [base_ty2size(current_type.fields[i])
                         for i in range(s_oper_v)]
                next_location = current_location + sum(jumps)
                next_type = current_type.fields[s_oper_v]
                if len(jumps) > 0:
                    next_formula = ('{} + ({})'
                                    .format(current_formula,
                                            ' + '.join(map(str, jumps))))
                else:
                    next_formula = ('{} + 0'
                                    .format(current_formula))

                return visit(next_location, next_type, next_steps, next_formula)
            elif isinstance(current_type, ll.ArrayType):
                TODO('gep arrays')
                return current_location, current_formula + ' + ???'
            elif isinstance(current_type, ll.PointerType):
                err(('Cannot use getelementptr to traverse pointers.'
                     ' Use Load, and getelementptr on the result from that'
                     ' to go through a pointer.'))
                return current_location, current_formula + ' + ???'
            else:
                err('Unknown type to getelementptr on: {}'
                    .format(ll.ty2s(current_type)))
                return current_location, current_formula + ' + ???'

    if len(starting_steps) == 0:
        err('There must be at least one stepping argument to a getelementptr instruction')
        return 0, ''
    s_ty, s_oper = starting_steps[0]
    s_oper_v = eval_oper(s_oper, ssa_env, global_env)
    size = base_ty2size(starting_type)
    next_location = starting_location + s_oper_v * size
    formula = ('{} + {} * {}'
               .format(starting_location, s_oper_v, size))
    return visit(next_location, starting_type, starting_steps[1:], formula)


def ty2base_ty(ty, tdecs, seen=[]):
    if isinstance(ty, ll.SimpleType):
        return ty
    elif isinstance(ty, ll.PointerType):
        # TODO: Consider if types behind pointers should not be
        # expanded. They might be allowed for cyclic types
        return ll.PointerType(ty2base_ty(ty.inner_ty, tdecs, seen))
    if isinstance(ty, ll.StructType):
        return ll.StructType([ty2base_ty(t, tdecs, seen)
                              for t in ty.fields])
    if isinstance(ty, ll.NamedType):
        other_name = ty.other_name
        if other_name in seen:
            err('Cyclic type definition, offender: {}. Seen: {}'
                .format(other_name, seen))
        elif other_name in tdecs:
            return ty2base_ty(tdecs[other_name].body, tdecs, [other_name] + seen)
        else:
            err('Could not find type {} in global type environment:\n{}'
                .format(ll.ty2s(ty), list(tdecs.keys())))
            return ll.SimpleType.Void
    else:
        # TODO
        err('ty2base_ty: Unknown type: {}'
            .format(ll.ty2s(ty)))
        return ty


def base_ty2size(base_ty):
    if isinstance(base_ty, ll.SimpleType):
        return 1
    elif isinstance(base_ty, ll.PointerType):
        return 1
    elif isinstance(base_ty, ll.StructType):
        return sum(map(base_ty2size, base_ty.fields))
    else:
        # TODO
        err('base_ty2size: Unknown type or illegal type: {}'
            .format(ll.ty2s(base_ty)))
        return 1


def alloc_globals(gdecls, heap):
    def alloc_global(ginit):
        next_idx = len(heap)
        if isinstance(ginit, ll.GNull):
            TODO('alloc_global: GNull')
        elif isinstance(ginit, ll.GGid):
            TODO('alloc_global: GGid')
        elif isinstance(ginit, ll.GInt):
            heap.append(ginit.val)
        elif isinstance(ginit, ll.GString):
            heap.append(ginit.val)
        elif isinstance(ginit, ll.GArray):
            TODO('alloc_global: GArray')
        elif isinstance(ginit, ll.GStruct):
            for ty, field in ginit.fields:
                alloc_global(field)
        else:
            err('alloc_global: Unknown global init value: {}'
                .format(ll.ginit2s(ginit)))
        return next_idx

    global_env = {}

    for gdecl in gdecls.values():
        location = alloc_global(gdecl.body)
        print('heap[{}] <- {}'
              .format(location, ll.gdecl2s(gdecl)))
        global_env[gdecl.name] = location

    return global_env


def auto_step(ast, function_name='tigermain', function_args=[1234, 5678]):
    tdecls = ast.tdecls
    fdecls = ast.fdecls
    gdecls = ast.gdecls

    function = fdecls[function_name]
    body = function.body
    first_block = body.first_block
    blocks = body.named_blocks
    insns = first_block.insns
    terminator = first_block.terminator
    stack_frames = []
    ssa_env = {par[1]: arg for par, arg in zip(function.parameters, function_args)}
    heap = [None]
    call_res = []

    global_env = alloc_globals(gdecls, heap)
    print('Heap after globals are allocated:')
    print(heap)
    print()

    step_cnt = 0
    while True:
        (insns, terminator, blocks,
         stack_frames, ssa_env, heap, call_res) = step(insns, terminator, blocks,
                                                       stack_frames, ssa_env,
                                                       global_env, heap, tdecls,
                                                       fdecls, call_res)
        print()
        step_cnt += 1
        if terminator is None:
            print('Stepping done!\nFinal ssa_env: {}'
                  .format(ssa_env))
            print('Final heap: {}'
                  .format(heap))
            print('Program resulted in {} after {} steps'.
                  format(insns[0][1].val, step_cnt))
            break

        if step_cnt % 100 == 0:
            while True:
                stop_q = input('We have now done {} steps. Continue? [Y/n]: '
                               .format(step_cnt)).lower()
                if stop_q in ['y', 'yes', 'n', 'no', '']:
                    break
            if stop_q in ['n', 'no']:
                break