backgammon/main.py

203 lines
8.2 KiB
Python
Raw Normal View History

2018-03-08 15:27:16 +00:00
import argparse
2018-03-08 16:13:25 +00:00
import sys
import os
import time
2018-03-08 15:27:16 +00:00
# Parse command line arguments
2018-03-08 15:27:16 +00:00
parser = argparse.ArgumentParser(description="Backgammon games")
parser.add_argument('--episodes', action='store', dest='episode_count',
type=int, default=1000,
help='number of episodes to train')
2018-03-11 23:11:55 +00:00
parser.add_argument('--model', action='store', dest='model',
default='default',
help='name of Tensorflow model to use')
2018-03-08 15:27:16 +00:00
parser.add_argument('--eval-methods', action='store',
default=['random'], nargs='*',
help='specifies evaluation methods')
parser.add_argument('--eval', action='store_true',
help='evaluate the neural network with a random choice bot')
parser.add_argument('--bench-eval-scores', action='store_true',
help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
2018-03-08 15:27:16 +00:00
parser.add_argument('--train', action='store_true',
help='train the neural network')
parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
help='evaluate after each training session')
2018-03-08 15:27:16 +00:00
parser.add_argument('--play', action='store_true',
help='play with the neural network')
parser.add_argument('--start-episode', action='store', dest='start_episode',
type=int, default=0,
help='episode count to start at; purely for display purposes')
2018-03-12 14:18:44 +00:00
parser.add_argument('--train-perpetually', action='store_true',
help='start new training session as soon as the previous is finished')
parser.add_argument('--list-models', action='store_true',
help='list all known models')
parser.add_argument('--force-creation', action='store_true',
help='force model creation if model does not exist')
2018-03-08 15:27:16 +00:00
args = parser.parse_args()
if args.model == "baseline_model":
print("Model name 'baseline_model' not allowed")
2018-04-24 19:16:54 +00:00
exit()
2018-03-08 15:27:16 +00:00
config = {
2018-03-20 12:03:21 +00:00
'model': args.model,
2018-03-08 15:27:16 +00:00
'episode_count': args.episode_count,
'eval_methods': args.eval_methods,
'train': args.train,
'play': args.play,
'eval': args.eval,
'bench_eval_scores': args.bench_eval_scores,
'eval_after_train': args.eval_after_train,
2018-03-12 14:18:44 +00:00
'start_episode': args.start_episode,
2018-03-14 19:42:09 +00:00
'train_perpetually': args.train_perpetually,
'model_storage_path': 'models',
'bench_storage_path': 'bench',
'board_representation': 'quack',
'force_creation': args.force_creation
2018-03-08 15:27:16 +00:00
}
# Create models folder
if not os.path.exists(config['model_storage_path']):
os.makedirs(config['model_storage_path'])
model_path = lambda: os.path.join(config['model_storage_path'], config['model'])
# Make sure directories exist
log_path = os.path.join(model_path(), 'logs')
if not os.path.isdir(model_path()):
os.mkdir(model_path())
if not os.path.isdir(log_path):
os.mkdir(log_path)
# Define helper functions
def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
format_vars = { 'trained_eps': trained_eps,
'count': len(train_outcome),
'sum': sum(train_outcome),
'mean': sum(train_outcome) / len(train_outcome),
'time': int(time.time())
}
with open(log_path, 'a+') as f:
f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
2018-03-08 15:27:16 +00:00
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'trained_eps': trained_eps,
'method': outcome[0],
'count': len(scores),
'sum': sum(scores),
'mean': sum(scores) / len(scores),
'time': int(time.time())
}
with open(log_path, 'a+') as f:
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'trained_eps': trained_eps,
'method': outcome[0],
'count': len(scores),
'sum': sum(scores),
'mean': sum(scores) / len(scores),
'time': time,
'index': index,
}
with open(log_path, 'a+') as f:
f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
# Do actions specified by command-line
2018-03-12 14:18:44 +00:00
if args.list_models:
def get_eps_trained(folder):
with open(os.path.join(folder, 'episodes_trained'), 'r') as f:
return int(f.read())
model_folders = [ f.path
for f
in os.scandir(config['model_storage_path'])
2018-03-12 14:18:44 +00:00
if f.is_dir() ]
models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
sys.stderr.write("Found {} model(s)\n".format(len(models)))
for model in models:
sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1]))
2018-03-20 12:17:38 +00:00
exit()
2018-03-22 14:30:47 +00:00
if __name__ == "__main__":
# Set up network
from network import Network
# Set up variables
episode_count = config['episode_count']
2018-03-12 14:18:44 +00:00
2018-03-22 14:30:47 +00:00
if args.train:
network = Network(config, config['model'])
start_episode = network.episodes_trained
2018-03-22 14:30:47 +00:00
while True:
train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)
start_episode += episode_count
log_train_outcome(train_outcome, trained_eps = start_episode)
if config['eval_after_train']:
eval_outcomes = network.eval(trained_eps = start_episode)
log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
if not config['train_perpetually']:
break
2018-03-22 14:30:47 +00:00
elif args.eval:
network = Network(config, config['model'])
start_episode = network.episodes_trained
# Evaluation measures are described in `config`
outcomes = network.eval(config['episode_count'])
2018-03-22 14:30:47 +00:00
log_eval_outcomes(outcomes, trained_eps = start_episode)
# elif args.play:
# g.play(episodes = episode_count)
elif args.bench_eval_scores:
# Make sure benchmark directory exists
if not os.path.isdir(config['bench_storage_path']):
os.mkdir(config['bench_storage_path'])
config = config.copy()
config['model'] = 'bench'
network = Network(config, config['model'])
start_episode = network.episodes_trained
if start_episode == 0:
print("Model not trained! Beware of using non-existing models!")
exit()
sample_count = 20
episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
10000, 20000]
def do_eval(sess):
for eval_method in config['eval_methods']:
result_path = os.path.join(config['bench_storage_path'],
eval_method) + "-{}.log".format(int(time.time()))
for n in episode_counts:
for i in range(sample_count):
start_time = time.time()
# Evaluation measure to be benchmarked are described in `config`
outcomes = network.eval(episode_count = n,
tf_session = sess)
time_diff = time.time() - start_time
log_bench_eval_outcomes(outcomes,
time = time_diff,
index = i,
trained_eps = start_episode,
log_path = result_path)
# CMM: oh no
import tensorflow as tf
with tf.Session() as session:
network.restore_model(session)
do_eval(session)