diff --git a/.gitignore b/.gitignore index 08bc86a..03ee050 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,6 @@ venv.bak/ README.* !README.org models/ +.DS_Store +bench/ + diff --git a/main.py b/main.py index bc8de09..b5a8ad0 100644 --- a/main.py +++ b/main.py @@ -3,38 +3,6 @@ import sys import os import time -model_storage_path = 'models' - -# Create models folder -if not os.path.exists(model_storage_path): - os.makedirs(model_storage_path) - -# Define helper functions -def log_train_outcome(outcome, trained_eps = 0): - format_vars = { 'trained_eps': trained_eps, - 'count': len(train_outcome), - 'sum': sum(train_outcome), - 'mean': sum(train_outcome) / len(train_outcome), - 'time': int(time.time()) - } - with open(os.path.join(config['model_path'], 'logs', "train.log"), 'a+') as f: - f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") - - -def log_eval_outcomes(outcomes, trained_eps = 0): - for outcome in outcomes: - scores = outcome[1] - format_vars = { 'trained_eps': trained_eps, - 'method': outcome[0], - 'count': len(scores), - 'sum': sum(scores), - 'mean': sum(scores) / len(scores), - 'time': int(time.time()) - } - with open(os.path.join(config['model_path'], 'logs', "eval.log"), 'a+') as f: - f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") - - # Parse command line arguments parser = argparse.ArgumentParser(description="Backgammon games") parser.add_argument('--episodes', action='store', dest='episode_count', @@ -47,13 +15,15 @@ parser.add_argument('--eval-methods', action='store', default=['random'], nargs='*', help='specifies evaluation methods') parser.add_argument('--eval', action='store_true', - help='whether to evaluate the neural network with a random choice bot') + help='evaluate the neural network with a random choice bot') +parser.add_argument('--bench-eval-scores', action='store_true', + help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.') parser.add_argument('--train', action='store_true', - help='whether to train the neural network') + help='train the neural network') parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train', - help='whether to evaluate after each training session') + help='evaluate after each training session') parser.add_argument('--play', action='store_true', - help='whether to play with the neural network') + help='play with the neural network') parser.add_argument('--start-episode', action='store', dest='start_episode', type=int, default=0, help='episode count to start at; purely for display purposes') @@ -66,27 +36,73 @@ args = parser.parse_args() config = { 'model': args.model, - 'model_path': os.path.join(model_storage_path, args.model), 'episode_count': args.episode_count, 'eval_methods': args.eval_methods, 'train': args.train, 'play': args.play, 'eval': args.eval, + 'bench_eval_scores': args.bench_eval_scores, 'eval_after_train': args.eval_after_train, 'start_episode': args.start_episode, 'train_perpetually': args.train_perpetually, - 'model_storage_path': model_storage_path + 'model_storage_path': 'models', + 'bench_storage_path': 'bench' } +# Create models folder +if not os.path.exists(config['model_storage_path']): + os.makedirs(config['model_storage_path']) + +model_path = lambda: os.path.join(config['model_storage_path'], config['model']) + # Make sure directories exist -model_path = os.path.join(config['model_path']) -log_path = os.path.join(model_path, 'logs') -if not os.path.isdir(model_path): - os.mkdir(model_path) +log_path = os.path.join(model_path(), 'logs') +if not os.path.isdir(model_path()): + os.mkdir(model_path()) if not os.path.isdir(log_path): os.mkdir(log_path) + + + +# Define helper functions +def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")): + format_vars = { 'trained_eps': trained_eps, + 'count': len(train_outcome), + 'sum': sum(train_outcome), + 'mean': sum(train_outcome) / len(train_outcome), + 'time': int(time.time()) + } + with open(log_path, 'a+') as f: + f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") +def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")): + for outcome in outcomes: + scores = outcome[1] + format_vars = { 'trained_eps': trained_eps, + 'method': outcome[0], + 'count': len(scores), + 'sum': sum(scores), + 'mean': sum(scores) / len(scores), + 'time': int(time.time()) + } + with open(log_path, 'a+') as f: + f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") + +def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0): + for outcome in outcomes: + scores = outcome[1] + format_vars = { 'trained_eps': trained_eps, + 'method': outcome[0], + 'count': len(scores), + 'sum': sum(scores), + 'mean': sum(scores) / len(scores), + 'time': time, + 'index': index, + } + with open(log_path, 'a+') as f: + f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n") + # Do actions specified by command-line if args.list_models: def get_eps_trained(folder): @@ -94,7 +110,7 @@ if args.list_models: return int(f.read()) model_folders = [ f.path for f - in os.scandir(model_storage_path) + in os.scandir(config['model_storage_path']) if f.is_dir() ] models = [ (folder, get_eps_trained(folder)) for folder in model_folders ] sys.stderr.write("Found {} model(s)\n".format(len(models))) @@ -106,13 +122,13 @@ if args.list_models: if __name__ == "__main__": # Set up network from network import Network - network = Network(config, config['model']) - start_episode = network.episodes_trained # Set up variables episode_count = config['episode_count'] if args.train: + network = Network(config, config['model']) + start_episode = network.episodes_trained while True: train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode) start_episode += episode_count @@ -122,9 +138,58 @@ if __name__ == "__main__": log_eval_outcomes(eval_outcomes, trained_eps = start_episode) if not config['train_perpetually']: break + + elif args.eval: - outcomes = network.eval() + network = Network(config, config['model']) + start_episode = network.episodes_trained + # Evaluation measures are described in `config` + outcomes = network.eval(config['episode_count']) log_eval_outcomes(outcomes, trained_eps = start_episode) # elif args.play: # g.play(episodes = episode_count) - + + + elif args.bench_eval_scores: + # Make sure benchmark directory exists + if not os.path.isdir(config['bench_storage_path']): + os.mkdir(config['bench_storage_path']) + + config = config.copy() + config['model'] = 'bench' + + network = Network(config, config['model']) + start_episode = network.episodes_trained + + if start_episode == 0: + print("Model not trained! Beware of using non-existing models!") + exit() + + sample_count = 20 + episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000, + 10000, 20000] + + def do_eval(sess): + for eval_method in config['eval_methods']: + result_path = os.path.join(config['bench_storage_path'], + eval_method) + "-{}.log".format(int(time.time())) + for n in episode_counts: + for i in range(sample_count): + start_time = time.time() + # Evaluation measure to be benchmarked are described in `config` + outcomes = network.eval(episode_count = n, + tf_session = sess) + time_diff = time.time() - start_time + log_bench_eval_outcomes(outcomes, + time = time_diff, + index = i, + trained_eps = start_episode, + log_path = result_path) + + # CMM: oh no + import tensorflow as tf + with tf.Session() as session: + network.restore_model(session) + do_eval(session) + + diff --git a/network.py b/network.py index 1dc4b62..6358761 100644 --- a/network.py +++ b/network.py @@ -22,7 +22,7 @@ class Network: def __init__(self, config, name): self.config = config - self.checkpoint_path = config['model_path'] + self.checkpoint_path = os.path.join(config['model_storage_path'], config['model']) self.name = name @@ -388,7 +388,25 @@ class Network: print_time_estimate(episode) sys.stderr.write("[TRAIN] Saving model for final episode...\n") - self.save_model(sess, episode + trained_eps) + self.save_model(sess, episode+trained_eps) + + writer.close() + + return outcomes + + + # take turn, which finds the best state and picks it, based on the current network + # save current state + # run training operation (session.run(self.training_op, {x:x, value_next, value_next})), (something which does the backprop, based on the state after having taken a turn, found before, and the state we saved in the beginning and from now we'll save it at the end of the turn + # save the current state again, so we can continue running backprop based on the "previous" turn. + + # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it! + + + + def eval(self, episode_count, trained_eps = 0, tf_session = None): + def do_eval(sess, method, episodes = 1000, trained_eps = 0): + start_time = time.time() writer.close() @@ -403,3 +421,23 @@ class Network: # save the current state again, so we can continue running backprop based on the "previous" turn. + + if tf_session == None: + with tf.Session(): + session.run(tf.global_variables_initializer()) + self.restore_model(session) + outcomes = [ (method, do_eval(session, + method, + episode_count, + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ] + return outcomes + else: + outcomes = [ (method, do_eval(tf_session, + method, + episode_count, + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ] + return outcomes diff --git a/plot.py b/plot.py index 5a94f51..5957854 100644 --- a/plot.py +++ b/plot.py @@ -9,9 +9,26 @@ import matplotlib.dates as mdates train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean'] eval_headers = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean'] +bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean'] model_path = 'models' +def plot_bench(data_path): + df = pd.read_csv(data_path, sep=";", + names=bench_headers, index_col=[0,1,2]) + for method_label in df.index.levels[0]: + df_prime = df[['mean']].loc[method_label].unstack().T + plot = df_prime.plot.box() + plot.set_title("Evaluation variance, {}".format(method_label)) + plot.set_xlabel("Sample count") + plot.set_ylabel("Mean score") + plt.show(plot.figure) + + # for later use: + variances = df_prime.var() + print(variances) + + del df_prime, plot, variances def dataframes(model_name): def df_timestamp_to_datetime(df):