import ll import parser from enum import Enum def TODO(msg): print('TODO: not implemented yet at {}' .format(msg)) def err(msg): print('ERROR: {}' .format(msg)) def warn(msg): print('WARNING: {}' .format(msg)) class Garbage(Enum): GARBAGE = '<>' def step(insns, terminator, blocks, stack_frames, ssa_env, global_env, heap, tdecs, fdecs, call_res): if len(insns) == 0: return terminate(terminator, blocks, stack_frames, ssa_env, global_env, heap, call_res) ssa_target, next_insn = insns[0] insns_rest = insns[1:] # TODO print('Evaluating {}' .format(ll.insn2s(next_insn))) res = None if isinstance(next_insn, ll.Binop): bop = next_insn.bop left = next_insn.left right = next_insn.right left_v = eval_oper(left, ssa_env, global_env) right_v = eval_oper(right, ssa_env, global_env) res = eval_binop(bop, left_v, right_v) # TODO print('{} {}, {}' .format(bop, left_v, right_v)) elif isinstance(next_insn, ll.Alloca): ty = next_insn.ty base_ty = ty2base_ty(ty, tdecs) size = base_ty2size(base_ty) # TODO print('alloca {} --> allocating {} cells' .format(ll.ty2s(base_ty), size)) ptr = len(heap) for i in range(max(size, 1)): heap.append(Garbage.GARBAGE) res = ptr elif isinstance(next_insn, ll.Load): ty = next_insn.ty base_ty = ty2base_ty(ty, tdecs) size = base_ty2size(base_ty) location = next_insn.location location_v = eval_oper(location, ssa_env, global_env) # TODO print('load heap[{}]' .format(location_v)) if size != 1: err(('This emulator cannot load objects larger than 1 cell.' ' Current size is {}') .format(size)) if location_v == 0: err('You are not allowed to read from location 0') res = 0 else: res = heap[location_v] elif isinstance(next_insn, ll.Store): ty = next_insn.ty base_ty = ty2base_ty(ty, tdecs) size = base_ty2size(base_ty) value = next_insn.value location = next_insn.location value_v = eval_oper(value, ssa_env, global_env) location_v = eval_oper(location, ssa_env, global_env) # TODO print('heap[{}] <- {}' .format(location_v, value_v)) if location_v == 0: err('You are not allowed to store at location 0 (Null)') elif size == 1: heap[location_v] = value_v else: err(('This emulator cannot store objects larger than 1 cell.' ' Current size is {}') .format(size)) elif isinstance(next_insn, ll.Icmp): cnd = next_insn.cnd left = next_insn.left right = next_insn.right left_v = eval_oper(left, ssa_env, global_env) right_v = eval_oper(right, ssa_env, global_env) res = eval_icmp(cnd, left_v, right_v) # TODO print('icmp {} {}, {}' .format(cnd, left_v, right_v)) elif isinstance(next_insn, ll.Call): callee = next_insn.callee arguments = next_insn.arguments if not isinstance(callee, ll.Gid): err('Cannot call anything but global identifiers: {}' .format(ll.oper2s(callee))) return insns_rest, terminator, blocks, stack_frames, ssa_env, heap, call_res arguments_v = [eval_oper(oper, ssa_env, global_env) for ty, oper in arguments] try: function = fdecs[callee.val] except KeyError: err('Could not find function {} in environment:\n{}' .format(callee.val, fdecs.keys())) return insns_rest, terminator, blocks, stack_frames, ssa_env, heap, call_res parameters = function.parameters print('call @{} ({})' .format(callee.val, ', '.join('%{} <- {}'.format(par[1], arg) for par, arg in zip(parameters, arguments_v)))) child_insns = function.body.first_block.insns child_terminator = function.body.first_block.terminator child_blocks = function.body.named_blocks child_stack_frames = [(insns_rest, terminator, blocks, ssa_env)] + stack_frames child_ssa_env = {par[1]: arg for par, arg in zip(parameters, arguments_v)} child_heap = heap child_call_res = [ssa_target] + call_res return (child_insns, child_terminator, child_blocks, child_stack_frames, child_ssa_env, child_heap, child_call_res) elif isinstance(next_insn, ll.Bitcast): oper = next_insn.oper from_ty = next_insn.from_ty to_ty = next_insn.to_ty oper_v = eval_oper(oper, ssa_env, global_env) res = oper_v # TODO print('bitcast {} {} to {}' .format(ll.ty2s(from_ty), oper_v, ll.ty2s(to_ty))) elif isinstance(next_insn, ll.Gep): TODO('Gep') elif isinstance(next_insn, ll.Zext): oper = next_insn.oper from_ty = next_insn.from_ty to_ty = next_insn.to_ty oper_v = eval_oper(oper, ssa_env, global_env) res = oper_v # TODO print('zext {} {} to {}' .format(ll.ty2s(from_ty), oper_v, ll.ty2s(to_ty))) elif isinstance(next_insn, ll.Ptrtoint): oper = next_insn.oper pointer_ty = next_insn.pointer_ty to_ty = next_insn.to_ty oper_v = eval_oper(oper, ssa_env, global_env) res = oper_v # TODO print('ptrtoint {}* {} to {}' .format(ll.ty2s(pointer_ty), oper_v, ll.ty2s(to_ty))) elif isinstance(next_insn, ll.CallResult): res = next_insn.val else: err('Unknown LLVM instruction: {}' .format(next_insn)) if ssa_target is not None: if ssa_target in ssa_env: err('Cannot assign to variable twice: {}' .format(ssa_target)) elif res is None: err('Cannot assign empty value to %{}' .format(ssa_target)) else: # TODO print('%{} <- {}' .format(ssa_target, res)) ssa_env[ssa_target] = res return insns_rest, terminator, blocks, stack_frames, ssa_env, heap, call_res def terminate(terminator, blocks, stack_frames, ssa_env, global_env, heap, call_res): def clear_block_from_ssa_env(insns, ssa_env): for (id, insn) in insns: if id is not None and id in ssa_env: del ssa_env[id] print('Evaluating {}' .format(ll.terminator2s(terminator))) if isinstance(terminator, ll.Ret): oper = terminator.oper if oper is None: oper_v = None else: oper_v = eval_oper(oper, ssa_env, global_env) # TODO print('Returning {}' .format(oper_v)) if len(stack_frames) == 0: new_insns = [(None, ll.CallResult(oper_v))] new_terminator = None new_blocks = {} new_ssa_env = ssa_env new_stack_frames = [] new_call_res = [] else: new_insns, new_terminator, new_blocks, new_ssa_env = stack_frames[0] new_insns = [(call_res[0], ll.CallResult(oper_v))] + new_insns new_stack_frames = stack_frames[1:] new_call_res = call_res[1:] return (new_insns, new_terminator, new_blocks, new_stack_frames, new_ssa_env, heap, new_call_res) elif isinstance(terminator, ll.Br): label = terminator.label next_block = blocks[label] new_insns = next_block.insns new_terminator = next_block.terminator # TODO: Might need to find a better solution as we will ignore # multiple assignments, if they are spread over multiple # blocks. clear_block_from_ssa_env(new_insns, ssa_env) # TODO print('Jumping unconditionally to {}' .format(label)) return (new_insns, new_terminator, blocks, stack_frames, ssa_env, heap, call_res) elif isinstance(terminator, ll.Cbr): ty = terminator.ty if ty != ll.SimpleType.I1: warn('Branching based on value of type {}. You ought to branch on {}' .format(ll.ty2s(ty), ll.ty2s(ll.SimpleType.I1))) operand = terminator.oper operand_v = eval_oper(operand, ssa_env, global_env) if operand_v: label = terminator.then_label else: label = terminator.else_label next_block = blocks[label] new_insns = next_block.insns new_terminator = next_block.terminator clear_block_from_ssa_env(new_insns, ssa_env) # TODO print('Operand was {}. Branching to {}' .format(operand_v, label)) return (new_insns, new_terminator, blocks, stack_frames, ssa_env, heap, call_res) else: err('Unknown LLVM terminator: {}' .format(terminator)) def eval_oper(operand, ssa_env, global_env): if isinstance(operand, ll.Null): return 0 elif isinstance(operand, ll.Const): return operand.val elif isinstance(operand, ll.Gid): TODO('eval_oper Gid') elif isinstance(operand, ll.Id): id = operand.val try: return ssa_env[id] except KeyError: err('Unable to find %{} in environment:\n{}' .format(id, ssa_env)) def eval_binop(bop, left, right): if bop == 'add': return left + right elif bop == 'sub': return left - right elif bop == 'mul': return left * right elif bop == 'sdiv': return left // right elif bop == 'shl': return left << right elif bop == 'ashr': return left >> right elif bop == 'lshr': return (left >> right) % 0x10000000000000000 elif bop == 'and': return left & right elif bop == 'or': return left | right elif bop == 'xor': return left ^ right else: err('Unknown LLVM Binary operator: {}' .format(bop)) def eval_icmp(cnd, left, right): if cnd == 'eq': return left == right elif cnd == 'ne': return left != right elif cnd == 'slt': return left < right elif cnd == 'sle': return left <= right elif cnd == 'sgt': return left > right elif cnd == 'sge': return left >= right else: err('eval_icmp: Unknown cnd: {}' .format(cnd)) return 0 def ty2base_ty(ty, tdecs, seen=[]): if isinstance(ty, ll.SimpleType): return ty elif isinstance(ty, ll.PointerType): return ll.PointerType(ty2base_ty(ty.inner_ty, tdecs, seen)) if isinstance(ty, ll.StructType): return ll.StructType([ty2base_ty(t, tdecs, seen) for t in ty.fields]) if isinstance(ty, ll.NamedType): other_name = ty.other_name if other_name in seen: err('Cyclic type definition, offender: {}. Seen: {}' .format(other_name, seen)) elif other_name in tdecs: return ty2base_ty(tdecs[other_name].body, tdecs, [other_name] + seen) else: err('Could not find type {} in gloval type environment:\n{}' .format(ty2s(ty), tdecs.keys())) return ll.SimpleType.Void else: # TODO err('ty2base_ty: Unknown type: {}' .format(ll.ty2s(ty))) return ty def base_ty2size(base_ty): if isinstance(base_ty, ll.SimpleType): return 1 elif isinstance(base_ty, ll.PointerType): return 1 elif isinstance(base_ty, ll.StructType): return sum(map(base_ty2size, base_ty.fields)) else: # TODO err('base_ty2size: Unknown type or illegal type: {}' .format(ll.ty2s(base_ty))) return 1 def gogo(): p = parser.LLVMParser() p.build() data = r''' %T_tigermain = type { i64, i64 } define i64 @tigermain (i64 %U_mainSL_8, i64 %U_mainDummy_9) { %a = alloca i64 %t = alloca %T_tigermain store i64 9, i64* %a %b = load i64, i64* %a ret i64 %b } ''' print(data) ast = p.parse(data) tdecs = ast.tdecls fdecs = ast.fdecls global_env = ast.gdecls tigermain = ast.fdecls['tigermain'] first_block = tigermain.body.first_block blocks = tigermain.body.named_blocks insns = first_block.insns terminator = first_block.terminator stack_frames = [] ssa_env = {} heap = [None] call_res = [] while True: (insns, terminator, blocks, stack_frames, ssa_env, heap, call_res) = step(insns, terminator, blocks, stack_frames, ssa_env, global_env, heap, tdecs, fdecs, call_res) if terminator is None: print('Stepping done! Final ssa_env:\n{}' .format(ssa_env)) print('Program resulted in {}'. format(insns[0][1].val)) break if __name__ == '__main__': gogo()