import argparse import sys import os import time import subprocess # Parse command line arguments parser = argparse.ArgumentParser(description="Backgammon games") parser.add_argument('--episodes', action='store', dest='episode_count', type=int, default=1000, help='number of episodes to train') parser.add_argument('--model', action='store', dest='model', default='default', help='name of Tensorflow model to use') parser.add_argument('--eval-methods', action='store', default=['random'], nargs='*', help='specifies evaluation methods') parser.add_argument('--eval', action='store_true', help='evaluate the neural network with a random choice bot') parser.add_argument('--bench-eval-scores', action='store_true', help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.') parser.add_argument('--train', action='store_true', help='train the neural network') parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train', help='evaluate after each training session') parser.add_argument('--play', action='store_true', help='play with the neural network') parser.add_argument('--start-episode', action='store', dest='start_episode', type=int, default=0, help='episode count to start at; purely for display purposes') parser.add_argument('--train-perpetually', action='store_true', help='start new training session as soon as the previous is finished') parser.add_argument('--list-models', action='store_true', help='list all known models') parser.add_argument('--board-rep', action='store', dest='board_rep', help='name of board representation to use as input to neural network') parser.add_argument('--verbose', action='store_true', help='If set, a lot of stuff will be printed') parser.add_argument('--ply', action='store', dest='ply', default='0', help='defines the amount of ply used when deciding what move to make') parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1', help='the amount of times the evaluation method should be repeated') args = parser.parse_args() config = { 'model': args.model, 'episode_count': args.episode_count, 'eval_methods': args.eval_methods, 'train': args.train, 'play': args.play, 'eval': args.eval, 'bench_eval_scores': args.bench_eval_scores, 'eval_after_train': args.eval_after_train, 'start_episode': args.start_episode, 'train_perpetually': args.train_perpetually, 'model_storage_path': 'models', 'bench_storage_path': 'bench', 'board_representation': args.board_rep, 'global_step': 0, 'verbose': args.verbose, 'ply': args.ply, 'repeat_eval': args.repeat_eval } # Create models folder if not os.path.exists(config['model_storage_path']): os.makedirs(config['model_storage_path']) model_path = lambda: os.path.join(config['model_storage_path'], config['model']) # Make sure directories exist log_path = os.path.join(model_path(), 'logs') if not os.path.isdir(model_path()): os.mkdir(model_path()) if not os.path.isdir(log_path): os.mkdir(log_path) # Define helper functions def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")): commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip() format_vars = { 'trained_eps': trained_eps, 'count': len(outcome), 'sum': sum(outcome), 'mean': sum(outcome) / len(outcome), 'time': int(time.time()), 'average_diff_in_vals': diff_in_values, 'commit': commit } with open(log_path, 'a+') as f: f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n") def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")): """ :param outcomes: :param average_diff_in_value: :param trained_eps: :param log_path: :return: """ commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip() for outcome in outcomes: scores = outcome[1] format_vars = { 'commit': commit, 'trained_eps': trained_eps, 'method': outcome[0], 'count': len(scores), 'sum': sum(scores), 'mean': sum(scores) / len(scores), 'time': int(time.time()) } with open(log_path, 'a+') as f: f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n") def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0): commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip() for outcome in outcomes: scores = outcome[1] format_vars = { 'trained_eps': trained_eps, 'method': outcome[0], 'count': len(scores), 'sum': sum(scores), 'mean': sum(scores) / len(scores), 'time': time, 'index': index, 'commit': commit } with open(log_path, 'a+') as f: f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n") def find_board_rep(): checkpoint_path = os.path.join(config['model_storage_path'], config['model']) board_rep_path = os.path.join(checkpoint_path, "board_representation") with open(board_rep_path, 'r') as f: return f.read() def board_rep_file_exists(): checkpoint_path = os.path.join(config['model_storage_path'], config['model']) board_rep_path = os.path.join(checkpoint_path, "board_representation") return os.path.isfile(board_rep_path) def create_board_rep(): checkpoint_path = os.path.join(config['model_storage_path'], config['model']) board_rep_path = os.path.join(checkpoint_path, "board_representation") with open(board_rep_path, 'a+') as f: f.write(config['board_representation']) # Do actions specified by command-line if args.list_models: def get_eps_trained(folder): with open(os.path.join(folder, 'episodes_trained'), 'r') as f: return int(f.read()) model_folders = [ f.path for f in os.scandir(config['model_storage_path']) if f.is_dir() ] models = [ (folder, get_eps_trained(folder)) for folder in model_folders ] sys.stderr.write("Found {} model(s)\n".format(len(models))) for model in models: sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1])) exit() if __name__ == "__main__": # Set up network from network import Network # Set up variables episode_count = config['episode_count'] if config['board_representation'] is None: if board_rep_file_exists(): config['board_representation'] = find_board_rep() else: sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n") exit() else: if not board_rep_file_exists(): create_board_rep() else: if config['board_representation'] != find_board_rep(): sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n". format(given = config['board_representation'], board_rep = find_board_rep())) exit() if args.train: network = Network(config, config['model']) start_episode = network.episodes_trained while True: train_outcome, diff_in_values = network.train_model(episodes = episode_count, trained_eps = start_episode) start_episode += episode_count log_train_outcome(train_outcome, diff_in_values, trained_eps = start_episode) if config['eval_after_train']: eval_outcomes = network.eval(trained_eps = start_episode) log_eval_outcomes(eval_outcomes, trained_eps = start_episode) if not config['train_perpetually']: break elif args.play: network = Network(config, config['model']) network.play_against_network() elif args.eval: network = Network(config, config['model']) network.restore_model() for i in range(int(config['repeat_eval'])): start_episode = network.episodes_trained # Evaluation measures are described in `config` outcomes = network.eval(config['episode_count']) log_eval_outcomes(outcomes, trained_eps = start_episode) # elif args.play: # g.play(episodes = episode_count) elif args.bench_eval_scores: # Make sure benchmark directory exists if not os.path.isdir(config['bench_storage_path']): os.mkdir(config['bench_storage_path']) config = config.copy() config['model'] = 'bench' network = Network(config, config['model']) start_episode = network.episodes_trained if start_episode == 0: print("Model not trained! Beware of using non-existing models!") exit() sample_count = 20 episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000, 10000, 20000] def do_eval(): for eval_method in config['eval_methods']: result_path = os.path.join(config['bench_storage_path'], eval_method) + "-{}.log".format(int(time.time())) for n in episode_counts: for i in range(sample_count): start_time = time.time() # Evaluation measure to be benchmarked are described in `config` outcomes = network.eval(episode_count = n) time_diff = time.time() - start_time log_bench_eval_outcomes(outcomes, time = time_diff, index = i, trained_eps = start_episode, log_path = result_path) # CMM: oh no import tensorflow as tf network.restore_model() do_eval()