From 1f1e806306eb0aded61c2582f416b55655145d94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Mon, 26 Mar 2018 15:55:48 +0200 Subject: [PATCH 1/4] fix errant whitespace --- network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/network.py b/network.py index f058d48..d32c6b9 100644 --- a/network.py +++ b/network.py @@ -357,7 +357,7 @@ class Network: return [0] with tf.Session() as session: - session .run(tf.global_variables_initializer()) + session.run(tf.global_variables_initializer()) self.restore_model(session) outcomes = [ (method, do_eval(session, method, From 4c43bf19a3f38bf424922631157ea09f3902ea34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Mon, 26 Mar 2018 16:45:26 +0200 Subject: [PATCH 2/4] Add evaluation variance benchmark To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores --eval-methods pubeval` Logs will be placed in directory `bench` Use `plot_bench(data_path)` in `plot.py` for plotting --- main.py | 159 +++++++++++++++++++++++++++++++++++++---------------- network.py | 28 +++++++--- plot.py | 12 ++++ 3 files changed, 143 insertions(+), 56 deletions(-) diff --git a/main.py b/main.py index bc8de09..b5a8ad0 100644 --- a/main.py +++ b/main.py @@ -3,38 +3,6 @@ import sys import os import time -model_storage_path = 'models' - -# Create models folder -if not os.path.exists(model_storage_path): - os.makedirs(model_storage_path) - -# Define helper functions -def log_train_outcome(outcome, trained_eps = 0): - format_vars = { 'trained_eps': trained_eps, - 'count': len(train_outcome), - 'sum': sum(train_outcome), - 'mean': sum(train_outcome) / len(train_outcome), - 'time': int(time.time()) - } - with open(os.path.join(config['model_path'], 'logs', "train.log"), 'a+') as f: - f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") - - -def log_eval_outcomes(outcomes, trained_eps = 0): - for outcome in outcomes: - scores = outcome[1] - format_vars = { 'trained_eps': trained_eps, - 'method': outcome[0], - 'count': len(scores), - 'sum': sum(scores), - 'mean': sum(scores) / len(scores), - 'time': int(time.time()) - } - with open(os.path.join(config['model_path'], 'logs', "eval.log"), 'a+') as f: - f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") - - # Parse command line arguments parser = argparse.ArgumentParser(description="Backgammon games") parser.add_argument('--episodes', action='store', dest='episode_count', @@ -47,13 +15,15 @@ parser.add_argument('--eval-methods', action='store', default=['random'], nargs='*', help='specifies evaluation methods') parser.add_argument('--eval', action='store_true', - help='whether to evaluate the neural network with a random choice bot') + help='evaluate the neural network with a random choice bot') +parser.add_argument('--bench-eval-scores', action='store_true', + help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.') parser.add_argument('--train', action='store_true', - help='whether to train the neural network') + help='train the neural network') parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train', - help='whether to evaluate after each training session') + help='evaluate after each training session') parser.add_argument('--play', action='store_true', - help='whether to play with the neural network') + help='play with the neural network') parser.add_argument('--start-episode', action='store', dest='start_episode', type=int, default=0, help='episode count to start at; purely for display purposes') @@ -66,27 +36,73 @@ args = parser.parse_args() config = { 'model': args.model, - 'model_path': os.path.join(model_storage_path, args.model), 'episode_count': args.episode_count, 'eval_methods': args.eval_methods, 'train': args.train, 'play': args.play, 'eval': args.eval, + 'bench_eval_scores': args.bench_eval_scores, 'eval_after_train': args.eval_after_train, 'start_episode': args.start_episode, 'train_perpetually': args.train_perpetually, - 'model_storage_path': model_storage_path + 'model_storage_path': 'models', + 'bench_storage_path': 'bench' } +# Create models folder +if not os.path.exists(config['model_storage_path']): + os.makedirs(config['model_storage_path']) + +model_path = lambda: os.path.join(config['model_storage_path'], config['model']) + # Make sure directories exist -model_path = os.path.join(config['model_path']) -log_path = os.path.join(model_path, 'logs') -if not os.path.isdir(model_path): - os.mkdir(model_path) +log_path = os.path.join(model_path(), 'logs') +if not os.path.isdir(model_path()): + os.mkdir(model_path()) if not os.path.isdir(log_path): os.mkdir(log_path) + + + +# Define helper functions +def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")): + format_vars = { 'trained_eps': trained_eps, + 'count': len(train_outcome), + 'sum': sum(train_outcome), + 'mean': sum(train_outcome) / len(train_outcome), + 'time': int(time.time()) + } + with open(log_path, 'a+') as f: + f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") +def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")): + for outcome in outcomes: + scores = outcome[1] + format_vars = { 'trained_eps': trained_eps, + 'method': outcome[0], + 'count': len(scores), + 'sum': sum(scores), + 'mean': sum(scores) / len(scores), + 'time': int(time.time()) + } + with open(log_path, 'a+') as f: + f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n") + +def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0): + for outcome in outcomes: + scores = outcome[1] + format_vars = { 'trained_eps': trained_eps, + 'method': outcome[0], + 'count': len(scores), + 'sum': sum(scores), + 'mean': sum(scores) / len(scores), + 'time': time, + 'index': index, + } + with open(log_path, 'a+') as f: + f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n") + # Do actions specified by command-line if args.list_models: def get_eps_trained(folder): @@ -94,7 +110,7 @@ if args.list_models: return int(f.read()) model_folders = [ f.path for f - in os.scandir(model_storage_path) + in os.scandir(config['model_storage_path']) if f.is_dir() ] models = [ (folder, get_eps_trained(folder)) for folder in model_folders ] sys.stderr.write("Found {} model(s)\n".format(len(models))) @@ -106,13 +122,13 @@ if args.list_models: if __name__ == "__main__": # Set up network from network import Network - network = Network(config, config['model']) - start_episode = network.episodes_trained # Set up variables episode_count = config['episode_count'] if args.train: + network = Network(config, config['model']) + start_episode = network.episodes_trained while True: train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode) start_episode += episode_count @@ -122,9 +138,58 @@ if __name__ == "__main__": log_eval_outcomes(eval_outcomes, trained_eps = start_episode) if not config['train_perpetually']: break + + elif args.eval: - outcomes = network.eval() + network = Network(config, config['model']) + start_episode = network.episodes_trained + # Evaluation measures are described in `config` + outcomes = network.eval(config['episode_count']) log_eval_outcomes(outcomes, trained_eps = start_episode) # elif args.play: # g.play(episodes = episode_count) - + + + elif args.bench_eval_scores: + # Make sure benchmark directory exists + if not os.path.isdir(config['bench_storage_path']): + os.mkdir(config['bench_storage_path']) + + config = config.copy() + config['model'] = 'bench' + + network = Network(config, config['model']) + start_episode = network.episodes_trained + + if start_episode == 0: + print("Model not trained! Beware of using non-existing models!") + exit() + + sample_count = 20 + episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000, + 10000, 20000] + + def do_eval(sess): + for eval_method in config['eval_methods']: + result_path = os.path.join(config['bench_storage_path'], + eval_method) + "-{}.log".format(int(time.time())) + for n in episode_counts: + for i in range(sample_count): + start_time = time.time() + # Evaluation measure to be benchmarked are described in `config` + outcomes = network.eval(episode_count = n, + tf_session = sess) + time_diff = time.time() - start_time + log_bench_eval_outcomes(outcomes, + time = time_diff, + index = i, + trained_eps = start_episode, + log_path = result_path) + + # CMM: oh no + import tensorflow as tf + with tf.Session() as session: + network.restore_model(session) + do_eval(session) + + diff --git a/network.py b/network.py index d32c6b9..d9a9f52 100644 --- a/network.py +++ b/network.py @@ -13,7 +13,7 @@ class Network: input_size = 26 output_size = 1 # Can't remember the best learning_rate, look this up - learning_rate = 0.05 + learning_rate = 0.01 # TODO: Actually compile tensorflow properly #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2" @@ -23,7 +23,7 @@ class Network: def __init__(self, config, name): self.config = config - self.checkpoint_path = config['model_path'] + self.checkpoint_path = os.path.join(config['model_storage_path'], config['model']) self.name = name @@ -262,7 +262,7 @@ class Network: - def eval(self, trained_eps = 0): + def eval(self, episode_count, trained_eps = 0, tf_session = None): def do_eval(sess, method, episodes = 1000, trained_eps = 0): start_time = time.time() @@ -356,13 +356,23 @@ class Network: sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) return [0] - with tf.Session() as session: - session.run(tf.global_variables_initializer()) - self.restore_model(session) - outcomes = [ (method, do_eval(session, + + if tf_session == None: + with tf.Session(): + session.run(tf.global_variables_initializer()) + self.restore_model(session) + outcomes = [ (method, do_eval(session, + method, + episode_count, + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ] + return outcomes + else: + outcomes = [ (method, do_eval(tf_session, method, - self.config['episode_count'], + episode_count, trained_eps = trained_eps)) for method in self.config['eval_methods'] ] - return outcomes + return outcomes diff --git a/plot.py b/plot.py index 5a94f51..c820c55 100644 --- a/plot.py +++ b/plot.py @@ -9,9 +9,21 @@ import matplotlib.dates as mdates train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean'] eval_headers = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean'] +bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean'] model_path = 'models' +def plot_bench(data_path): + df = pd.read_csv(data_path, sep=";", + names=bench_headers, index_col=[0,1,2]) + for method_label in df.index.levels[0]: + cur_df = df.loc[method_label] + plot = df[['mean']].loc['pubeval'].unstack().T.plot.box() + plot.set_title("Evaluation variance, {}".format(method_label)) + plot.set_xlabel("Sample count") + plot.set_ylabel("Mean score") + plt.show(plot.figure) + del cur_df, plot def dataframes(model_name): def df_timestamp_to_datetime(df): From 9b2bbfb4d1a855f4fbcc4f3bed5f93929a0d7aaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Mon, 26 Mar 2018 17:06:12 +0200 Subject: [PATCH 3/4] print variances when plotting evaluation variance benchmark --- plot.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/plot.py b/plot.py index c820c55..5957854 100644 --- a/plot.py +++ b/plot.py @@ -17,13 +17,18 @@ def plot_bench(data_path): df = pd.read_csv(data_path, sep=";", names=bench_headers, index_col=[0,1,2]) for method_label in df.index.levels[0]: - cur_df = df.loc[method_label] - plot = df[['mean']].loc['pubeval'].unstack().T.plot.box() + df_prime = df[['mean']].loc[method_label].unstack().T + plot = df_prime.plot.box() plot.set_title("Evaluation variance, {}".format(method_label)) plot.set_xlabel("Sample count") plot.set_ylabel("Mean score") plt.show(plot.figure) - del cur_df, plot + + # for later use: + variances = df_prime.var() + print(variances) + + del df_prime, plot, variances def dataframes(model_name): def df_timestamp_to_datetime(df): From 0eac5434d65db4a91b2a6a401421b7257c36aee4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Tue, 27 Mar 2018 11:55:32 +0200 Subject: [PATCH 4/4] update .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 08bc86a..03ee050 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,6 @@ venv.bak/ README.* !README.org models/ +.DS_Store +bench/ +