From cba0f67ae292b2078c2d2435ca39bd7383adb846 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 19 Apr 2018 15:22:00 +0200 Subject: [PATCH] fixed *the* bug --- network.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/network.py b/network.py index 467c7fc..4486790 100644 --- a/network.py +++ b/network.py @@ -7,7 +7,7 @@ import time import sys import random from eval import Eval - +import glob class Network: # board_features_quack has size 28 @@ -38,10 +38,10 @@ class Network: # Can't remember the best learning_rate, look this up self.max_learning_rate = 0.1 self.min_learning_rate = 0.001 - # self.learning_rate = 0.01 + self.learning_rate = 0.01 self.global_step = tf.Variable(0, trainable=False, name="global_step") - self.learning_rate = tf.maximum(self.min_learning_rate, tf.train.exponential_decay(self.max_learning_rate, self.global_step, 50000, 0.96, staircase=True), name="learning_rate") + # self.learning_rate = tf.maximum(self.min_learning_rate, tf.train.exponential_decay(self.max_learning_rate, self.global_step, 50000, 0.96, staircase=True), name="learning_rate") @@ -88,7 +88,8 @@ class Network: apply_gradients = [] global_step_op = self.global_step.assign_add(1) - + + with tf.variable_scope('apply_gradients'): for gradient, trainable_var in zip(gradients, trainable_vars): # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t. @@ -96,6 +97,9 @@ class Network: grad_apply = trainable_var.assign_add(backprop_calc) apply_gradients.append(grad_apply) + + with tf.control_dependencies([global_step_op]): + self.training_op = tf.group(*apply_gradients, name='training_op') self.saver = tf.train.Saver(max_to_keep=1) @@ -145,7 +149,8 @@ class Network: f.write(str(episode_count) + "\n") def restore_model(self, sess): - if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')): + if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')): + latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) print("[NETWK] ({name}) Restoring model from:".format(name=self.name), str(latest_checkpoint)) @@ -162,6 +167,8 @@ class Network: if os.path.isfile(episode_count_path): with open(episode_count_path, 'r') as f: self.config['start_episode'] = int(f.read()) + else: + assert False def make_move(self, sess, board, roll, player): # print(Board.pretty(board)) @@ -234,9 +241,6 @@ class Network: board = Eval.make_pubeval_move(board, -1, roll)[0][0:26] # print("post pubeval:", board, sep="\n") - # print("*"*30) - # print(board) - # print("+"*30) sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) outcomes.append(Board.outcome(board)[1]) sys.stderr.write("\n") @@ -271,9 +275,6 @@ class Network: board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26] # print("post pubeval:", board, sep="\n") - # print("*"*30) - # print(board) - # print("+"*30) sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) outcomes.append(Board.outcome(board)[1]) sys.stderr.write("\n") @@ -405,7 +406,7 @@ class Network: with tf.name_scope("final"): merged = tf.summary.merge_all() - summary, _, global_step = sess.run([merged, self.training_op, self.global_step], + global_step, summary, _ = sess.run([self.global_step, merged, self.training_op], feed_dict={self.x: self.board_trans_func(prev_board, player), self.value_next: scaled_final_score.reshape((1, 1))}) writer.add_summary(summary, episode + trained_eps) @@ -420,7 +421,7 @@ class Network: print_time_estimate(episode) sys.stderr.write("[TRAIN] Saving model for final episode...\n") - self.save_model(sess, episode+trained_eps, global_step=global_step) + self.save_model(sess, episode+trained_eps, global_step) writer.close()