backgammon/main.py

81 lines
2.8 KiB
Python

import argparse
import sys
import time
def print_train_outcome(outcome, trained_eps = 0):
format_vars = { 'trained_eps': trained_eps,
'count': len(train_outcome),
'sum': sum(train_outcome),
'mean': sum(train_outcome) / len(train_outcome),
'time': int(time.time())
}
print("train;{time};{trained_eps};{count};{sum};{mean}".format(**format_vars))
def print_eval_outcomes(outcomes, trained_eps = 0):
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'trained_eps': trained_eps,
'method': outcome[0],
'count': len(scores),
'sum': sum(scores),
'mean': sum(scores) / len(scores),
'time': int(time.time())
}
print("eval;{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars))
parser = argparse.ArgumentParser(description="Backgammon games")
parser.add_argument('--episodes', action='store', dest='episode_count',
type=int, default=1000,
help='number of episodes to train')
parser.add_argument('--model-path', action='store', dest='model_path',
default='./model',
help='path to Tensorflow model')
parser.add_argument('--eval-methods', action='store',
default=['random'], nargs='*',
help='specifies evaluation methods')
parser.add_argument('--eval', action='store_true',
help='whether to evaluate the neural network with a random choice bot')
parser.add_argument('--train', action='store_true',
help='whether to train the neural network')
parser.add_argument('--play', action='store_true',
help='whether to play with the neural network')
args = parser.parse_args()
config = {
'model_path': args.model_path,
'episode_count': args.episode_count,
'eval_methods': args.eval_methods,
'train': args.train,
'play': args.play,
'eval': args.eval
}
#print("-"*30)
#print(type(args.eval_methods))
#print(args.eval_methods)
#print("-"*30)
import game
g = game.Game(config = config)
g.set_up_bots()
episode_count = args.episode_count
if args.train:
eps = 0
while True:
train_outcome = g.train_model(episodes = episode_count, trained_eps = eps)
eps += episode_count
print_train_outcome(train_outcome, trained_eps = eps)
if args.eval:
eval_outcomes = g.eval(trained_eps = eps)
print_eval_outcomes(eval_outcomes, trained_eps = eps)
sys.stdout.flush()
elif args.eval:
outcomes = g.eval()
print_eval_outcomes(outcomes, trained_eps = 0)
#elif args.play:
# g.play(episodes = episode_count)