2018-03-08 15:27:16 +00:00
|
|
|
import argparse
|
2018-03-08 16:13:25 +00:00
|
|
|
import sys
|
2018-03-09 23:39:55 +00:00
|
|
|
import os
|
2018-03-09 20:05:38 +00:00
|
|
|
import time
|
2018-03-08 15:27:16 +00:00
|
|
|
|
2018-03-09 23:39:55 +00:00
|
|
|
# Parse command line arguments
|
2018-03-08 15:27:16 +00:00
|
|
|
parser = argparse.ArgumentParser(description="Backgammon games")
|
|
|
|
parser.add_argument('--episodes', action='store', dest='episode_count',
|
|
|
|
type=int, default=1000,
|
|
|
|
help='number of episodes to train')
|
2018-03-11 23:11:55 +00:00
|
|
|
parser.add_argument('--model', action='store', dest='model',
|
|
|
|
default='default',
|
|
|
|
help='name of Tensorflow model to use')
|
2018-03-08 15:27:16 +00:00
|
|
|
parser.add_argument('--eval-methods', action='store',
|
|
|
|
default=['random'], nargs='*',
|
|
|
|
help='specifies evaluation methods')
|
|
|
|
parser.add_argument('--eval', action='store_true',
|
2018-03-26 14:45:26 +00:00
|
|
|
help='evaluate the neural network with a random choice bot')
|
|
|
|
parser.add_argument('--bench-eval-scores', action='store_true',
|
|
|
|
help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
|
2018-03-08 15:27:16 +00:00
|
|
|
parser.add_argument('--train', action='store_true',
|
2018-03-26 14:45:26 +00:00
|
|
|
help='train the neural network')
|
2018-03-09 23:39:55 +00:00
|
|
|
parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
|
2018-03-26 14:45:26 +00:00
|
|
|
help='evaluate after each training session')
|
2018-03-08 15:27:16 +00:00
|
|
|
parser.add_argument('--play', action='store_true',
|
2018-03-26 14:45:26 +00:00
|
|
|
help='play with the neural network')
|
2018-03-09 23:22:20 +00:00
|
|
|
parser.add_argument('--start-episode', action='store', dest='start_episode',
|
|
|
|
type=int, default=0,
|
|
|
|
help='episode count to start at; purely for display purposes')
|
2018-03-12 14:18:44 +00:00
|
|
|
parser.add_argument('--train-perpetually', action='store_true',
|
|
|
|
help='start new training session as soon as the previous is finished')
|
|
|
|
parser.add_argument('--list-models', action='store_true',
|
|
|
|
help='list all known models')
|
2018-05-06 18:52:35 +00:00
|
|
|
parser.add_argument('--board-rep', action='store', dest='board_rep',
|
|
|
|
help='name of board representation to use as input to neural network')
|
2018-05-10 08:39:22 +00:00
|
|
|
parser.add_argument('--verbose', action='store_true',
|
|
|
|
help='If set, a lot of stuff will be printed')
|
2018-05-12 10:14:47 +00:00
|
|
|
parser.add_argument('--ply', action='store', dest='ply', default='0',
|
2018-05-10 21:22:41 +00:00
|
|
|
help='defines the amount of ply used when deciding what move to make')
|
2018-05-12 10:14:47 +00:00
|
|
|
parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1',
|
|
|
|
help='the amount of times the evaluation method should be repeated')
|
2018-03-08 15:27:16 +00:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
2018-04-24 19:16:54 +00:00
|
|
|
|
2018-03-08 15:27:16 +00:00
|
|
|
config = {
|
2018-03-20 12:03:21 +00:00
|
|
|
'model': args.model,
|
2018-03-08 15:27:16 +00:00
|
|
|
'episode_count': args.episode_count,
|
|
|
|
'eval_methods': args.eval_methods,
|
|
|
|
'train': args.train,
|
|
|
|
'play': args.play,
|
2018-03-09 23:22:20 +00:00
|
|
|
'eval': args.eval,
|
2018-03-26 14:45:26 +00:00
|
|
|
'bench_eval_scores': args.bench_eval_scores,
|
2018-03-09 23:39:55 +00:00
|
|
|
'eval_after_train': args.eval_after_train,
|
2018-03-12 14:18:44 +00:00
|
|
|
'start_episode': args.start_episode,
|
2018-03-14 19:42:09 +00:00
|
|
|
'train_perpetually': args.train_perpetually,
|
2018-03-26 14:45:26 +00:00
|
|
|
'model_storage_path': 'models',
|
2018-03-28 10:00:47 +00:00
|
|
|
'bench_storage_path': 'bench',
|
2018-05-06 18:52:35 +00:00
|
|
|
'board_representation': args.board_rep,
|
2018-05-10 08:39:22 +00:00
|
|
|
'global_step': 0,
|
2018-05-10 21:22:41 +00:00
|
|
|
'verbose': args.verbose,
|
2018-05-12 10:14:47 +00:00
|
|
|
'ply': args.ply,
|
|
|
|
'repeat_eval': args.repeat_eval
|
2018-03-08 15:27:16 +00:00
|
|
|
}
|
|
|
|
|
2018-05-12 10:14:47 +00:00
|
|
|
|
2018-03-26 14:45:26 +00:00
|
|
|
# Create models folder
|
|
|
|
if not os.path.exists(config['model_storage_path']):
|
|
|
|
os.makedirs(config['model_storage_path'])
|
|
|
|
|
|
|
|
model_path = lambda: os.path.join(config['model_storage_path'], config['model'])
|
|
|
|
|
2018-03-09 23:39:55 +00:00
|
|
|
# Make sure directories exist
|
2018-03-26 14:45:26 +00:00
|
|
|
log_path = os.path.join(model_path(), 'logs')
|
|
|
|
if not os.path.isdir(model_path()):
|
|
|
|
os.mkdir(model_path())
|
2018-03-09 23:39:55 +00:00
|
|
|
if not os.path.isdir(log_path):
|
|
|
|
os.mkdir(log_path)
|
2018-03-26 14:45:26 +00:00
|
|
|
|
|
|
|
# Define helper functions
|
2018-05-06 18:41:07 +00:00
|
|
|
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
|
2018-03-26 14:45:26 +00:00
|
|
|
format_vars = { 'trained_eps': trained_eps,
|
2018-05-06 18:41:07 +00:00
|
|
|
'count': len(outcome),
|
|
|
|
'sum': sum(outcome),
|
|
|
|
'mean': sum(outcome) / len(outcome),
|
|
|
|
'time': int(time.time()),
|
2018-05-22 13:10:41 +00:00
|
|
|
'average_diff_in_vals': diff_in_values
|
2018-03-26 14:45:26 +00:00
|
|
|
}
|
2018-05-06 18:52:35 +00:00
|
|
|
|
2018-03-26 14:45:26 +00:00
|
|
|
with open(log_path, 'a+') as f:
|
2018-05-06 18:41:07 +00:00
|
|
|
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
|
2018-03-09 23:39:55 +00:00
|
|
|
|
2018-03-08 15:27:16 +00:00
|
|
|
|
2018-03-26 14:45:26 +00:00
|
|
|
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
|
2018-05-06 18:41:07 +00:00
|
|
|
"""
|
|
|
|
:param outcomes:
|
|
|
|
:param average_diff_in_value:
|
|
|
|
:param trained_eps:
|
|
|
|
:param log_path:
|
|
|
|
:return:
|
|
|
|
"""
|
2018-03-26 14:45:26 +00:00
|
|
|
for outcome in outcomes:
|
|
|
|
scores = outcome[1]
|
|
|
|
format_vars = { 'trained_eps': trained_eps,
|
|
|
|
'method': outcome[0],
|
|
|
|
'count': len(scores),
|
|
|
|
'sum': sum(scores),
|
|
|
|
'mean': sum(scores) / len(scores),
|
|
|
|
'time': int(time.time())
|
|
|
|
}
|
|
|
|
with open(log_path, 'a+') as f:
|
|
|
|
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
|
|
|
|
|
|
|
|
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
|
|
|
|
for outcome in outcomes:
|
|
|
|
scores = outcome[1]
|
|
|
|
format_vars = { 'trained_eps': trained_eps,
|
|
|
|
'method': outcome[0],
|
|
|
|
'count': len(scores),
|
|
|
|
'sum': sum(scores),
|
|
|
|
'mean': sum(scores) / len(scores),
|
|
|
|
'time': time,
|
|
|
|
'index': index,
|
|
|
|
}
|
|
|
|
with open(log_path, 'a+') as f:
|
|
|
|
f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
|
|
|
|
|
2018-05-12 10:14:47 +00:00
|
|
|
def find_board_rep():
|
|
|
|
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
|
|
|
board_rep_path = os.path.join(checkpoint_path, "board_representation")
|
|
|
|
with open(board_rep_path, 'r') as f:
|
|
|
|
return f.read()
|
|
|
|
|
|
|
|
|
|
|
|
def board_rep_file_exists():
|
|
|
|
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
|
|
|
board_rep_path = os.path.join(checkpoint_path, "board_representation")
|
|
|
|
return os.path.isfile(board_rep_path)
|
|
|
|
|
|
|
|
def create_board_rep():
|
|
|
|
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
|
|
|
board_rep_path = os.path.join(checkpoint_path, "board_representation")
|
|
|
|
with open(board_rep_path, 'a+') as f:
|
|
|
|
f.write(config['board_representation'])
|
|
|
|
|
2018-03-09 23:39:55 +00:00
|
|
|
# Do actions specified by command-line
|
2018-03-12 14:18:44 +00:00
|
|
|
if args.list_models:
|
|
|
|
def get_eps_trained(folder):
|
|
|
|
with open(os.path.join(folder, 'episodes_trained'), 'r') as f:
|
|
|
|
return int(f.read())
|
|
|
|
model_folders = [ f.path
|
|
|
|
for f
|
2018-03-26 14:45:26 +00:00
|
|
|
in os.scandir(config['model_storage_path'])
|
2018-03-12 14:18:44 +00:00
|
|
|
if f.is_dir() ]
|
|
|
|
models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
|
|
|
|
sys.stderr.write("Found {} model(s)\n".format(len(models)))
|
|
|
|
for model in models:
|
|
|
|
sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1]))
|
2018-03-20 12:17:38 +00:00
|
|
|
|
|
|
|
exit()
|
|
|
|
|
2018-03-22 14:30:47 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
# Set up network
|
|
|
|
from network import Network
|
|
|
|
|
|
|
|
# Set up variables
|
|
|
|
episode_count = config['episode_count']
|
2018-05-12 10:14:47 +00:00
|
|
|
|
|
|
|
if config['board_representation'] is None:
|
|
|
|
if board_rep_file_exists():
|
|
|
|
config['board_representation'] = find_board_rep()
|
|
|
|
else:
|
|
|
|
sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n")
|
|
|
|
exit()
|
|
|
|
else:
|
|
|
|
if not board_rep_file_exists():
|
|
|
|
create_board_rep()
|
|
|
|
else:
|
|
|
|
if config['board_representation'] != find_board_rep():
|
|
|
|
sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n".
|
|
|
|
format(given = config['board_representation'], board_rep = find_board_rep()))
|
|
|
|
exit()
|
|
|
|
|
2018-03-12 14:18:44 +00:00
|
|
|
|
2018-03-22 14:30:47 +00:00
|
|
|
if args.train:
|
2018-03-26 14:45:26 +00:00
|
|
|
network = Network(config, config['model'])
|
|
|
|
start_episode = network.episodes_trained
|
2018-03-22 14:30:47 +00:00
|
|
|
while True:
|
2018-05-06 18:41:07 +00:00
|
|
|
train_outcome, diff_in_values = network.train_model(episodes = episode_count, trained_eps = start_episode)
|
2018-03-22 14:30:47 +00:00
|
|
|
start_episode += episode_count
|
2018-05-06 18:41:07 +00:00
|
|
|
log_train_outcome(train_outcome, diff_in_values, trained_eps = start_episode)
|
2018-03-22 14:30:47 +00:00
|
|
|
if config['eval_after_train']:
|
|
|
|
eval_outcomes = network.eval(trained_eps = start_episode)
|
|
|
|
log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
|
|
|
|
if not config['train_perpetually']:
|
|
|
|
break
|
2018-03-26 14:45:26 +00:00
|
|
|
|
2018-05-14 11:07:48 +00:00
|
|
|
elif args.play:
|
|
|
|
network = Network(config, config['model'])
|
|
|
|
network.play_against_network()
|
|
|
|
|
2018-03-22 14:30:47 +00:00
|
|
|
elif args.eval:
|
2018-03-26 14:45:26 +00:00
|
|
|
network = Network(config, config['model'])
|
2018-05-12 10:14:47 +00:00
|
|
|
for i in range(int(config['repeat_eval'])):
|
|
|
|
start_episode = network.episodes_trained
|
|
|
|
# Evaluation measures are described in `config`
|
|
|
|
outcomes = network.eval(config['episode_count'])
|
|
|
|
log_eval_outcomes(outcomes, trained_eps = start_episode)
|
|
|
|
# elif args.play:
|
|
|
|
# g.play(episodes = episode_count)
|
2018-03-26 14:45:26 +00:00
|
|
|
|
|
|
|
|
|
|
|
elif args.bench_eval_scores:
|
|
|
|
# Make sure benchmark directory exists
|
|
|
|
if not os.path.isdir(config['bench_storage_path']):
|
|
|
|
os.mkdir(config['bench_storage_path'])
|
|
|
|
|
|
|
|
config = config.copy()
|
|
|
|
config['model'] = 'bench'
|
|
|
|
|
|
|
|
network = Network(config, config['model'])
|
|
|
|
start_episode = network.episodes_trained
|
|
|
|
|
|
|
|
if start_episode == 0:
|
|
|
|
print("Model not trained! Beware of using non-existing models!")
|
|
|
|
exit()
|
|
|
|
|
|
|
|
sample_count = 20
|
|
|
|
episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
|
|
|
|
10000, 20000]
|
|
|
|
|
2018-05-09 21:15:35 +00:00
|
|
|
def do_eval():
|
2018-03-26 14:45:26 +00:00
|
|
|
for eval_method in config['eval_methods']:
|
|
|
|
result_path = os.path.join(config['bench_storage_path'],
|
|
|
|
eval_method) + "-{}.log".format(int(time.time()))
|
|
|
|
for n in episode_counts:
|
|
|
|
for i in range(sample_count):
|
|
|
|
start_time = time.time()
|
|
|
|
# Evaluation measure to be benchmarked are described in `config`
|
2018-05-09 21:15:35 +00:00
|
|
|
outcomes = network.eval(episode_count = n)
|
2018-03-26 14:45:26 +00:00
|
|
|
time_diff = time.time() - start_time
|
|
|
|
log_bench_eval_outcomes(outcomes,
|
|
|
|
time = time_diff,
|
|
|
|
index = i,
|
|
|
|
trained_eps = start_episode,
|
|
|
|
log_path = result_path)
|
|
|
|
|
|
|
|
# CMM: oh no
|
|
|
|
import tensorflow as tf
|
2018-05-09 21:15:35 +00:00
|
|
|
|
|
|
|
network.restore_model()
|
|
|
|
do_eval()
|
2018-03-26 14:45:26 +00:00
|
|
|
|
|
|
|
|