Alexander Munch-Hansen
926a331df0
again and it is possible to play against the ai. There is no flag for this yet, so this has to be added.
262 lines
10 KiB
Python
262 lines
10 KiB
Python
import argparse
|
|
import sys
|
|
import os
|
|
import time
|
|
|
|
# Parse command line arguments
|
|
parser = argparse.ArgumentParser(description="Backgammon games")
|
|
parser.add_argument('--episodes', action='store', dest='episode_count',
|
|
type=int, default=1000,
|
|
help='number of episodes to train')
|
|
parser.add_argument('--model', action='store', dest='model',
|
|
default='default',
|
|
help='name of Tensorflow model to use')
|
|
parser.add_argument('--eval-methods', action='store',
|
|
default=['random'], nargs='*',
|
|
help='specifies evaluation methods')
|
|
parser.add_argument('--eval', action='store_true',
|
|
help='evaluate the neural network with a random choice bot')
|
|
parser.add_argument('--bench-eval-scores', action='store_true',
|
|
help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
|
|
parser.add_argument('--train', action='store_true',
|
|
help='train the neural network')
|
|
parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
|
|
help='evaluate after each training session')
|
|
parser.add_argument('--play', action='store_true',
|
|
help='play with the neural network')
|
|
parser.add_argument('--start-episode', action='store', dest='start_episode',
|
|
type=int, default=0,
|
|
help='episode count to start at; purely for display purposes')
|
|
parser.add_argument('--train-perpetually', action='store_true',
|
|
help='start new training session as soon as the previous is finished')
|
|
parser.add_argument('--list-models', action='store_true',
|
|
help='list all known models')
|
|
parser.add_argument('--board-rep', action='store', dest='board_rep',
|
|
help='name of board representation to use as input to neural network')
|
|
parser.add_argument('--verbose', action='store_true',
|
|
help='If set, a lot of stuff will be printed')
|
|
parser.add_argument('--ply', action='store', dest='ply', default='0',
|
|
help='defines the amount of ply used when deciding what move to make')
|
|
parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1',
|
|
help='the amount of times the evaluation method should be repeated')
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
config = {
|
|
'model': args.model,
|
|
'episode_count': args.episode_count,
|
|
'eval_methods': args.eval_methods,
|
|
'train': args.train,
|
|
'play': args.play,
|
|
'eval': args.eval,
|
|
'bench_eval_scores': args.bench_eval_scores,
|
|
'eval_after_train': args.eval_after_train,
|
|
'start_episode': args.start_episode,
|
|
'train_perpetually': args.train_perpetually,
|
|
'model_storage_path': 'models',
|
|
'bench_storage_path': 'bench',
|
|
'board_representation': args.board_rep,
|
|
'global_step': 0,
|
|
'verbose': args.verbose,
|
|
'ply': args.ply,
|
|
'repeat_eval': args.repeat_eval
|
|
}
|
|
|
|
|
|
# Create models folder
|
|
if not os.path.exists(config['model_storage_path']):
|
|
os.makedirs(config['model_storage_path'])
|
|
|
|
model_path = lambda: os.path.join(config['model_storage_path'], config['model'])
|
|
|
|
# Make sure directories exist
|
|
log_path = os.path.join(model_path(), 'logs')
|
|
if not os.path.isdir(model_path()):
|
|
os.mkdir(model_path())
|
|
if not os.path.isdir(log_path):
|
|
os.mkdir(log_path)
|
|
|
|
|
|
def save_config():
|
|
import yaml
|
|
# checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
|
# config_path = os.path.join(checkpoint_path, 'config')
|
|
# with open(config_path, 'a+') as f:
|
|
# print("lol")
|
|
print(yaml.dump(config))
|
|
|
|
# Define helper functions
|
|
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
|
|
format_vars = { 'trained_eps': trained_eps,
|
|
'count': len(outcome),
|
|
'sum': sum(outcome),
|
|
'mean': sum(outcome) / len(outcome),
|
|
'time': int(time.time()),
|
|
'average_diff_in_vals': diff_in_values/len(outcome)
|
|
}
|
|
|
|
with open(log_path, 'a+') as f:
|
|
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
|
|
|
|
|
|
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
|
|
"""
|
|
:param outcomes:
|
|
:param average_diff_in_value:
|
|
:param trained_eps:
|
|
:param log_path:
|
|
:return:
|
|
"""
|
|
for outcome in outcomes:
|
|
scores = outcome[1]
|
|
format_vars = { 'trained_eps': trained_eps,
|
|
'method': outcome[0],
|
|
'count': len(scores),
|
|
'sum': sum(scores),
|
|
'mean': sum(scores) / len(scores),
|
|
'time': int(time.time())
|
|
}
|
|
with open(log_path, 'a+') as f:
|
|
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
|
|
|
|
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
|
|
for outcome in outcomes:
|
|
scores = outcome[1]
|
|
format_vars = { 'trained_eps': trained_eps,
|
|
'method': outcome[0],
|
|
'count': len(scores),
|
|
'sum': sum(scores),
|
|
'mean': sum(scores) / len(scores),
|
|
'time': time,
|
|
'index': index,
|
|
}
|
|
with open(log_path, 'a+') as f:
|
|
f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
|
|
|
|
def find_board_rep():
|
|
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
|
board_rep_path = os.path.join(checkpoint_path, "board_representation")
|
|
with open(board_rep_path, 'r') as f:
|
|
return f.read()
|
|
|
|
|
|
def board_rep_file_exists():
|
|
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
|
board_rep_path = os.path.join(checkpoint_path, "board_representation")
|
|
return os.path.isfile(board_rep_path)
|
|
|
|
def create_board_rep():
|
|
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
|
board_rep_path = os.path.join(checkpoint_path, "board_representation")
|
|
with open(board_rep_path, 'a+') as f:
|
|
f.write(config['board_representation'])
|
|
|
|
# Do actions specified by command-line
|
|
if args.list_models:
|
|
def get_eps_trained(folder):
|
|
with open(os.path.join(folder, 'episodes_trained'), 'r') as f:
|
|
return int(f.read())
|
|
model_folders = [ f.path
|
|
for f
|
|
in os.scandir(config['model_storage_path'])
|
|
if f.is_dir() ]
|
|
models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
|
|
sys.stderr.write("Found {} model(s)\n".format(len(models)))
|
|
for model in models:
|
|
sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1]))
|
|
|
|
exit()
|
|
|
|
if __name__ == "__main__":
|
|
# Set up network
|
|
from network import Network
|
|
|
|
save_config()
|
|
# Set up variables
|
|
episode_count = config['episode_count']
|
|
|
|
if config['board_representation'] is None:
|
|
if board_rep_file_exists():
|
|
config['board_representation'] = find_board_rep()
|
|
else:
|
|
sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n")
|
|
exit()
|
|
else:
|
|
if not board_rep_file_exists():
|
|
create_board_rep()
|
|
else:
|
|
if config['board_representation'] != find_board_rep():
|
|
sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n".
|
|
format(given = config['board_representation'], board_rep = find_board_rep()))
|
|
exit()
|
|
|
|
|
|
if args.train:
|
|
network = Network(config, config['model'])
|
|
start_episode = network.episodes_trained
|
|
while True:
|
|
train_outcome, diff_in_values = network.train_model(episodes = episode_count, trained_eps = start_episode)
|
|
start_episode += episode_count
|
|
log_train_outcome(train_outcome, diff_in_values, trained_eps = start_episode)
|
|
if config['eval_after_train']:
|
|
eval_outcomes = network.eval(trained_eps = start_episode)
|
|
log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
|
|
if not config['train_perpetually']:
|
|
break
|
|
|
|
|
|
elif args.eval:
|
|
network = Network(config, config['model'])
|
|
for i in range(int(config['repeat_eval'])):
|
|
start_episode = network.episodes_trained
|
|
# Evaluation measures are described in `config`
|
|
outcomes = network.eval(config['episode_count'])
|
|
log_eval_outcomes(outcomes, trained_eps = start_episode)
|
|
# elif args.play:
|
|
# g.play(episodes = episode_count)
|
|
|
|
|
|
elif args.bench_eval_scores:
|
|
# Make sure benchmark directory exists
|
|
if not os.path.isdir(config['bench_storage_path']):
|
|
os.mkdir(config['bench_storage_path'])
|
|
|
|
config = config.copy()
|
|
config['model'] = 'bench'
|
|
|
|
network = Network(config, config['model'])
|
|
start_episode = network.episodes_trained
|
|
|
|
if start_episode == 0:
|
|
print("Model not trained! Beware of using non-existing models!")
|
|
exit()
|
|
|
|
sample_count = 20
|
|
episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
|
|
10000, 20000]
|
|
|
|
def do_eval():
|
|
for eval_method in config['eval_methods']:
|
|
result_path = os.path.join(config['bench_storage_path'],
|
|
eval_method) + "-{}.log".format(int(time.time()))
|
|
for n in episode_counts:
|
|
for i in range(sample_count):
|
|
start_time = time.time()
|
|
# Evaluation measure to be benchmarked are described in `config`
|
|
outcomes = network.eval(episode_count = n)
|
|
time_diff = time.time() - start_time
|
|
log_bench_eval_outcomes(outcomes,
|
|
time = time_diff,
|
|
index = i,
|
|
trained_eps = start_episode,
|
|
log_path = result_path)
|
|
|
|
# CMM: oh no
|
|
import tensorflow as tf
|
|
|
|
network.restore_model()
|
|
do_eval()
|
|
|
|
|