llvm--emulator/stepper.py

435 lines
13 KiB
Python
Raw Normal View History

2017-10-29 14:18:14 +00:00
import ll
import parser
from enum import Enum
2017-10-29 17:57:13 +00:00
2017-10-29 14:18:14 +00:00
def TODO(msg):
2017-10-29 20:08:14 +00:00
print('TODO: not implemented yet at {}'
2017-10-29 14:18:14 +00:00
.format(msg))
2017-10-29 17:57:13 +00:00
2017-10-29 14:18:14 +00:00
def err(msg):
print('ERROR: {}'
.format(msg))
2017-10-29 17:57:13 +00:00
2017-10-29 18:28:19 +00:00
def warn(msg):
print('WARNING: {}'
.format(msg))
class Garbage(Enum):
GARBAGE = '<<Unitialized memory>>'
2017-10-29 20:08:14 +00:00
2017-10-29 20:21:25 +00:00
def step(insns, terminator, blocks, stack_frames, ssa_env, global_env, heap,
2017-10-29 14:18:14 +00:00
tdecs, fdecs, call_res):
if len(insns) == 0:
2017-10-29 20:21:25 +00:00
return terminate(terminator, blocks, stack_frames, ssa_env, global_env, heap,
2017-10-29 19:39:58 +00:00
call_res)
2017-10-29 14:18:14 +00:00
ssa_target, next_insn = insns[0]
insns_rest = insns[1:]
# TODO
print('Evaluating {}'
.format(ll.insn2s(next_insn)))
res = None
if isinstance(next_insn, ll.Binop):
bop = next_insn.bop
left = next_insn.left
right = next_insn.right
left_v = eval_oper(left, ssa_env, global_env)
right_v = eval_oper(right, ssa_env, global_env)
res = eval_binop(bop, left_v, right_v)
# TODO
print('{} {}, {}'
.format(bop, left_v, right_v))
2017-10-29 19:44:17 +00:00
elif isinstance(next_insn, ll.Alloca):
2017-10-29 20:08:14 +00:00
ty = next_insn.ty
base_ty = ty2base_ty(ty, tdecs)
size = base_ty2size(base_ty)
# TODO
print('alloca {} --> allocating {} cells'
.format(ll.ty2s(base_ty), size))
2017-10-29 20:21:25 +00:00
ptr = len(heap)
2017-10-29 20:08:14 +00:00
for i in range(max(size, 1)):
2017-10-29 20:21:25 +00:00
heap.append(Garbage.GARBAGE)
2017-10-29 20:08:14 +00:00
res = ptr
2017-10-29 19:44:17 +00:00
elif isinstance(next_insn, ll.Load):
2017-10-29 20:40:26 +00:00
ty = next_insn.ty
base_ty = ty2base_ty(ty, tdecs)
size = base_ty2size(base_ty)
location = next_insn.location
location_v = eval_oper(location, ssa_env, global_env)
# TODO
print('load heap[{}]'
.format(location_v))
if size != 1:
err(('This emulator cannot load objects larger than 1 cell.'
' Current size is {}')
.format(size))
2017-10-29 21:03:31 +00:00
if location_v == 0:
err('You are not allowed to read from location 0')
res = 0
else:
res = heap[location_v]
2017-10-29 19:44:17 +00:00
elif isinstance(next_insn, ll.Store):
2017-10-29 20:23:57 +00:00
ty = next_insn.ty
2017-10-29 20:33:57 +00:00
base_ty = ty2base_ty(ty, tdecs)
size = base_ty2size(base_ty)
2017-10-29 20:23:57 +00:00
value = next_insn.value
location = next_insn.location
value_v = eval_oper(value, ssa_env, global_env)
location_v = eval_oper(location, ssa_env, global_env)
# TODO
print('heap[{}] <- {}'
.format(location_v, value_v))
2017-10-29 21:03:31 +00:00
if location_v == 0:
err('You are not allowed to store at location 0 (Null)')
elif size == 1:
2017-10-29 20:33:57 +00:00
heap[location_v] = value_v
else:
err(('This emulator cannot store objects larger than 1 cell.'
' Current size is {}')
.format(size))
2017-10-29 17:57:13 +00:00
elif isinstance(next_insn, ll.Icmp):
cnd = next_insn.cnd
left = next_insn.left
right = next_insn.right
left_v = eval_oper(left, ssa_env, global_env)
right_v = eval_oper(right, ssa_env, global_env)
res = eval_icmp(cnd, left_v, right_v)
# TODO
print('icmp {} {}, {}'
.format(cnd, left_v, right_v))
2017-10-29 19:39:58 +00:00
elif isinstance(next_insn, ll.Call):
callee = next_insn.callee
arguments = next_insn.arguments
if not isinstance(callee, ll.Gid):
err('Cannot call anything but global identifiers: {}'
.format(ll.oper2s(callee)))
2017-10-29 20:21:25 +00:00
return insns_rest, terminator, blocks, stack_frames, ssa_env, heap, call_res
2017-10-29 19:39:58 +00:00
arguments_v = [eval_oper(oper, ssa_env, global_env)
for ty, oper in arguments]
try:
function = fdecs[callee.val]
except KeyError:
err('Could not find function {} in environment:\n{}'
.format(callee.val, fdecs.keys()))
2017-10-29 20:21:25 +00:00
return insns_rest, terminator, blocks, stack_frames, ssa_env, heap, call_res
2017-10-29 19:39:58 +00:00
parameters = function.parameters
print('call @{} ({})'
.format(callee.val,
', '.join('%{} <- {}'.format(par[1], arg)
for par, arg in zip(parameters, arguments_v))))
child_insns = function.body.first_block.insns
child_terminator = function.body.first_block.terminator
child_blocks = function.body.named_blocks
child_stack_frames = [(insns_rest, terminator, blocks, ssa_env)] + stack_frames
child_ssa_env = {par[1]: arg for par, arg in zip(parameters, arguments_v)}
2017-10-29 20:21:25 +00:00
child_heap = heap
2017-10-29 19:39:58 +00:00
child_call_res = [ssa_target] + call_res
return (child_insns, child_terminator, child_blocks, child_stack_frames,
2017-10-29 20:21:25 +00:00
child_ssa_env, child_heap, child_call_res)
2017-10-29 19:39:58 +00:00
2017-10-29 18:41:40 +00:00
elif isinstance(next_insn, ll.Bitcast):
oper = next_insn.oper
from_ty = next_insn.from_ty
to_ty = next_insn.to_ty
oper_v = eval_oper(oper, ssa_env, global_env)
res = oper_v
# TODO
print('bitcast {} {} to {}'
.format(ll.ty2s(from_ty), oper_v, ll.ty2s(to_ty)))
2017-10-29 19:44:17 +00:00
elif isinstance(next_insn, ll.Gep):
TODO('Gep')
2017-10-29 18:39:09 +00:00
elif isinstance(next_insn, ll.Zext):
oper = next_insn.oper
from_ty = next_insn.from_ty
to_ty = next_insn.to_ty
oper_v = eval_oper(oper, ssa_env, global_env)
res = oper_v
# TODO
print('zext {} {} to {}'
.format(ll.ty2s(from_ty), oper_v, ll.ty2s(to_ty)))
elif isinstance(next_insn, ll.Ptrtoint):
oper = next_insn.oper
pointer_ty = next_insn.pointer_ty
to_ty = next_insn.to_ty
oper_v = eval_oper(oper, ssa_env, global_env)
res = oper_v
# TODO
print('ptrtoint {}* {} to {}'
.format(ll.ty2s(pointer_ty), oper_v, ll.ty2s(to_ty)))
2017-10-29 19:39:58 +00:00
elif isinstance(next_insn, ll.CallResult):
res = next_insn.val
2017-10-29 14:18:14 +00:00
else:
err('Unknown LLVM instruction: {}'
.format(next_insn))
if ssa_target is not None:
if ssa_target in ssa_env:
err('Cannot assign to variable twice: {}'
.format(ssa_target))
2017-10-29 19:44:17 +00:00
elif res is None:
err('Cannot assign empty value to %{}'
.format(ssa_target))
2017-10-29 14:18:14 +00:00
else:
# TODO
print('%{} <- {}'
.format(ssa_target, res))
ssa_env[ssa_target] = res
2017-10-29 20:21:25 +00:00
return insns_rest, terminator, blocks, stack_frames, ssa_env, heap, call_res
2017-10-29 14:18:14 +00:00
2017-10-29 20:21:25 +00:00
def terminate(terminator, blocks, stack_frames, ssa_env, global_env, heap, call_res):
2017-10-29 17:42:38 +00:00
def clear_block_from_ssa_env(insns, ssa_env):
for (id, insn) in insns:
if id is not None and id in ssa_env:
del ssa_env[id]
2017-10-29 18:12:30 +00:00
print('Evaluating {}'
.format(ll.terminator2s(terminator)))
2017-10-29 14:18:14 +00:00
if isinstance(terminator, ll.Ret):
oper = terminator.oper
if oper is None:
oper_v = None
else:
oper_v = eval_oper(oper, ssa_env, global_env)
2017-10-29 18:12:30 +00:00
# TODO
print('Returning {}'
.format(oper_v))
2017-10-29 19:39:58 +00:00
2017-10-29 14:18:14 +00:00
if len(stack_frames) == 0:
2017-10-29 19:39:58 +00:00
new_insns = [(None, ll.CallResult(oper_v))]
2017-10-29 17:42:56 +00:00
new_terminator = None
new_blocks = {}
new_ssa_env = ssa_env
new_stack_frames = []
2017-10-29 19:39:58 +00:00
new_call_res = []
2017-10-29 14:18:14 +00:00
else:
new_insns, new_terminator, new_blocks, new_ssa_env = stack_frames[0]
2017-10-29 19:39:58 +00:00
new_insns = [(call_res[0], ll.CallResult(oper_v))] + new_insns
2017-10-29 14:18:14 +00:00
new_stack_frames = stack_frames[1:]
2017-10-29 19:39:58 +00:00
new_call_res = call_res[1:]
2017-10-29 17:42:56 +00:00
return (new_insns, new_terminator, new_blocks, new_stack_frames,
2017-10-29 20:21:25 +00:00
new_ssa_env, heap, new_call_res)
2017-10-29 17:42:38 +00:00
elif isinstance(terminator, ll.Br):
label = terminator.label
next_block = blocks[label]
new_insns = next_block.insns
new_terminator = next_block.terminator
# TODO: Might need to find a better solution as we will ignore
# multiple assignments, if they are spread over multiple
# blocks.
clear_block_from_ssa_env(new_insns, ssa_env)
2017-10-29 18:12:30 +00:00
# TODO
2017-10-29 17:42:38 +00:00
print('Jumping unconditionally to {}'
.format(label))
2017-10-29 18:28:19 +00:00
return (new_insns, new_terminator, blocks, stack_frames,
2017-10-29 20:21:25 +00:00
ssa_env, heap, call_res)
2017-10-29 18:28:19 +00:00
elif isinstance(terminator, ll.Cbr):
ty = terminator.ty
if ty != ll.SimpleType.I1:
warn('Branching based on value of type {}. You ought to branch on {}'
.format(ll.ty2s(ty), ll.ty2s(ll.SimpleType.I1)))
operand = terminator.oper
operand_v = eval_oper(operand, ssa_env, global_env)
if operand_v:
label = terminator.then_label
else:
label = terminator.else_label
next_block = blocks[label]
new_insns = next_block.insns
new_terminator = next_block.terminator
clear_block_from_ssa_env(new_insns, ssa_env)
# TODO
print('Operand was {}. Branching to {}'
.format(operand_v, label))
2017-10-29 17:42:38 +00:00
return (new_insns, new_terminator, blocks, stack_frames,
2017-10-29 20:21:25 +00:00
ssa_env, heap, call_res)
2017-10-29 14:18:14 +00:00
else:
err('Unknown LLVM terminator: {}'
.format(terminator))
def eval_oper(operand, ssa_env, global_env):
if isinstance(operand, ll.Null):
return 0
elif isinstance(operand, ll.Const):
return operand.val
elif isinstance(operand, ll.Gid):
TODO('eval_oper Gid')
elif isinstance(operand, ll.Id):
id = operand.val
try:
return ssa_env[id]
except KeyError:
err('Unable to find %{} in environment:\n{}'
.format(id, ssa_env))
def eval_binop(bop, left, right):
if bop == 'add':
return left + right
2017-10-29 15:02:00 +00:00
elif bop == 'sub':
return left - right
elif bop == 'mul':
return left * right
elif bop == 'sdiv':
return left // right
elif bop == 'shl':
return left << right
elif bop == 'ashr':
return left >> right
elif bop == 'lshr':
return (left >> right) % 0x10000000000000000
elif bop == 'and':
return left & right
elif bop == 'or':
return left | right
elif bop == 'xor':
return left ^ right
2017-10-29 14:18:14 +00:00
else:
err('Unknown LLVM Binary operator: {}'
.format(bop))
2017-10-29 17:57:13 +00:00
def eval_icmp(cnd, left, right):
if cnd == 'eq':
return left == right
elif cnd == 'ne':
return left != right
elif cnd == 'slt':
return left < right
elif cnd == 'sle':
return left <= right
elif cnd == 'sgt':
return left > right
elif cnd == 'sge':
return left >= right
2017-10-29 17:57:13 +00:00
else:
err('eval_icmp: Unknown cnd: {}'
.format(cnd))
return 0
2017-10-29 14:18:14 +00:00
2017-10-29 18:01:29 +00:00
2017-10-29 21:00:18 +00:00
def ty2base_ty(ty, tdecs, seen=[]):
2017-10-29 20:08:14 +00:00
if isinstance(ty, ll.SimpleType):
return ty
elif isinstance(ty, ll.PointerType):
2017-10-29 21:00:18 +00:00
return ll.PointerType(ty2base_ty(ty.inner_ty, tdecs, seen))
if isinstance(ty, ll.StructType):
return ll.StructType([ty2base_ty(t, tdecs, seen)
for t in ty.fields])
if isinstance(ty, ll.NamedType):
other_name = ty.other_name
if other_name in seen:
err('Cyclic type definition, offender: {}. Seen: {}'
.format(other_name, seen))
elif other_name in tdecs:
return ty2base_ty(tdecs[other_name].body, tdecs, [other_name] + seen)
else:
err('Could not find type {} in gloval type environment:\n{}'
.format(ty2s(ty), tdecs.keys()))
return ll.SimpleType.Void
2017-10-29 20:08:14 +00:00
else:
# TODO
err('ty2base_ty: Unknown type: {}'
.format(ll.ty2s(ty)))
return ty
def base_ty2size(base_ty):
if isinstance(base_ty, ll.SimpleType):
return 1
2017-10-29 21:00:18 +00:00
elif isinstance(base_ty, ll.PointerType):
return 1
elif isinstance(base_ty, ll.StructType):
return sum(map(base_ty2size, base_ty.fields))
2017-10-29 20:08:14 +00:00
else:
# TODO
err('base_ty2size: Unknown type or illegal type: {}'
.format(ll.ty2s(base_ty)))
return 1
2017-10-29 14:18:14 +00:00
def gogo():
p = parser.LLVMParser()
p.build()
data = r'''
2017-10-29 21:00:18 +00:00
%T_tigermain = type { i64, i64 }
2017-10-29 14:18:14 +00:00
define i64 @tigermain (i64 %U_mainSL_8, i64 %U_mainDummy_9) {
2017-10-29 20:08:14 +00:00
%a = alloca i64
2017-10-29 21:00:18 +00:00
%t = alloca %T_tigermain
2017-10-29 20:23:57 +00:00
store i64 9, i64* %a
2017-10-29 20:40:26 +00:00
%b = load i64, i64* %a
ret i64 %b
2017-10-29 14:18:14 +00:00
}
2017-10-29 17:42:38 +00:00
'''
2017-10-29 14:18:14 +00:00
2017-10-29 17:42:38 +00:00
print(data)
2017-10-29 14:18:14 +00:00
ast = p.parse(data)
tdecs = ast.tdecls
fdecs = ast.fdecls
global_env = ast.gdecls
tigermain = ast.fdecls['tigermain']
first_block = tigermain.body.first_block
blocks = tigermain.body.named_blocks
insns = first_block.insns
terminator = first_block.terminator
stack_frames = []
ssa_env = {}
2017-10-29 20:21:25 +00:00
heap = [None]
2017-10-29 19:39:58 +00:00
call_res = []
2017-10-29 14:18:14 +00:00
while True:
(insns, terminator, blocks,
2017-10-29 20:21:25 +00:00
stack_frames, ssa_env, heap, call_res) = step(insns, terminator, blocks,
stack_frames, ssa_env,
global_env, heap, tdecs,
fdecs, call_res)
2017-10-29 14:18:14 +00:00
if terminator is None:
print('Stepping done! Final ssa_env:\n{}'
.format(ssa_env))
print('Program resulted in {}'.
2017-10-29 19:39:58 +00:00
format(insns[0][1].val))
2017-10-29 14:18:14 +00:00
break
if __name__ == '__main__':
gogo()