fixed *the* bug

This commit is contained in:
Alexander Munch-Hansen 2018-04-19 15:22:00 +02:00
parent b6c52ba476
commit cba0f67ae2

View File

@ -7,7 +7,7 @@ import time
import sys import sys
import random import random
from eval import Eval from eval import Eval
import glob
class Network: class Network:
# board_features_quack has size 28 # board_features_quack has size 28
@ -38,10 +38,10 @@ class Network:
# Can't remember the best learning_rate, look this up # Can't remember the best learning_rate, look this up
self.max_learning_rate = 0.1 self.max_learning_rate = 0.1
self.min_learning_rate = 0.001 self.min_learning_rate = 0.001
# self.learning_rate = 0.01 self.learning_rate = 0.01
self.global_step = tf.Variable(0, trainable=False, name="global_step") self.global_step = tf.Variable(0, trainable=False, name="global_step")
self.learning_rate = tf.maximum(self.min_learning_rate, tf.train.exponential_decay(self.max_learning_rate, self.global_step, 50000, 0.96, staircase=True), name="learning_rate") # self.learning_rate = tf.maximum(self.min_learning_rate, tf.train.exponential_decay(self.max_learning_rate, self.global_step, 50000, 0.96, staircase=True), name="learning_rate")
@ -89,6 +89,7 @@ class Network:
global_step_op = self.global_step.assign_add(1) global_step_op = self.global_step.assign_add(1)
with tf.variable_scope('apply_gradients'): with tf.variable_scope('apply_gradients'):
for gradient, trainable_var in zip(gradients, trainable_vars): for gradient, trainable_var in zip(gradients, trainable_vars):
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t. # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
@ -96,6 +97,9 @@ class Network:
grad_apply = trainable_var.assign_add(backprop_calc) grad_apply = trainable_var.assign_add(backprop_calc)
apply_gradients.append(grad_apply) apply_gradients.append(grad_apply)
with tf.control_dependencies([global_step_op]):
self.training_op = tf.group(*apply_gradients, name='training_op') self.training_op = tf.group(*apply_gradients, name='training_op')
self.saver = tf.train.Saver(max_to_keep=1) self.saver = tf.train.Saver(max_to_keep=1)
@ -145,7 +149,8 @@ class Network:
f.write(str(episode_count) + "\n") f.write(str(episode_count) + "\n")
def restore_model(self, sess): def restore_model(self, sess):
if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')): if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')):
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
print("[NETWK] ({name}) Restoring model from:".format(name=self.name), print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
str(latest_checkpoint)) str(latest_checkpoint))
@ -162,6 +167,8 @@ class Network:
if os.path.isfile(episode_count_path): if os.path.isfile(episode_count_path):
with open(episode_count_path, 'r') as f: with open(episode_count_path, 'r') as f:
self.config['start_episode'] = int(f.read()) self.config['start_episode'] = int(f.read())
else:
assert False
def make_move(self, sess, board, roll, player): def make_move(self, sess, board, roll, player):
# print(Board.pretty(board)) # print(Board.pretty(board))
@ -234,9 +241,6 @@ class Network:
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26] board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
# print("post pubeval:", board, sep="\n") # print("post pubeval:", board, sep="\n")
# print("*"*30)
# print(board)
# print("+"*30)
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
outcomes.append(Board.outcome(board)[1]) outcomes.append(Board.outcome(board)[1])
sys.stderr.write("\n") sys.stderr.write("\n")
@ -271,9 +275,6 @@ class Network:
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26] board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
# print("post pubeval:", board, sep="\n") # print("post pubeval:", board, sep="\n")
# print("*"*30)
# print(board)
# print("+"*30)
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
outcomes.append(Board.outcome(board)[1]) outcomes.append(Board.outcome(board)[1])
sys.stderr.write("\n") sys.stderr.write("\n")
@ -405,7 +406,7 @@ class Network:
with tf.name_scope("final"): with tf.name_scope("final"):
merged = tf.summary.merge_all() merged = tf.summary.merge_all()
summary, _, global_step = sess.run([merged, self.training_op, self.global_step], global_step, summary, _ = sess.run([self.global_step, merged, self.training_op],
feed_dict={self.x: self.board_trans_func(prev_board, player), feed_dict={self.x: self.board_trans_func(prev_board, player),
self.value_next: scaled_final_score.reshape((1, 1))}) self.value_next: scaled_final_score.reshape((1, 1))})
writer.add_summary(summary, episode + trained_eps) writer.add_summary(summary, episode + trained_eps)
@ -420,7 +421,7 @@ class Network:
print_time_estimate(episode) print_time_estimate(episode)
sys.stderr.write("[TRAIN] Saving model for final episode...\n") sys.stderr.write("[TRAIN] Saving model for final episode...\n")
self.save_model(sess, episode+trained_eps, global_step=global_step) self.save_model(sess, episode+trained_eps, global_step)
writer.close() writer.close()