fixed *the* bug
This commit is contained in:
parent
b6c52ba476
commit
cba0f67ae2
27
network.py
27
network.py
|
@ -7,7 +7,7 @@ import time
|
|||
import sys
|
||||
import random
|
||||
from eval import Eval
|
||||
|
||||
import glob
|
||||
|
||||
class Network:
|
||||
# board_features_quack has size 28
|
||||
|
@ -38,10 +38,10 @@ class Network:
|
|||
# Can't remember the best learning_rate, look this up
|
||||
self.max_learning_rate = 0.1
|
||||
self.min_learning_rate = 0.001
|
||||
# self.learning_rate = 0.01
|
||||
self.learning_rate = 0.01
|
||||
|
||||
self.global_step = tf.Variable(0, trainable=False, name="global_step")
|
||||
self.learning_rate = tf.maximum(self.min_learning_rate, tf.train.exponential_decay(self.max_learning_rate, self.global_step, 50000, 0.96, staircase=True), name="learning_rate")
|
||||
# self.learning_rate = tf.maximum(self.min_learning_rate, tf.train.exponential_decay(self.max_learning_rate, self.global_step, 50000, 0.96, staircase=True), name="learning_rate")
|
||||
|
||||
|
||||
|
||||
|
@ -88,7 +88,8 @@ class Network:
|
|||
apply_gradients = []
|
||||
|
||||
global_step_op = self.global_step.assign_add(1)
|
||||
|
||||
|
||||
|
||||
with tf.variable_scope('apply_gradients'):
|
||||
for gradient, trainable_var in zip(gradients, trainable_vars):
|
||||
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
|
||||
|
@ -96,6 +97,9 @@ class Network:
|
|||
grad_apply = trainable_var.assign_add(backprop_calc)
|
||||
apply_gradients.append(grad_apply)
|
||||
|
||||
|
||||
with tf.control_dependencies([global_step_op]):
|
||||
|
||||
self.training_op = tf.group(*apply_gradients, name='training_op')
|
||||
|
||||
self.saver = tf.train.Saver(max_to_keep=1)
|
||||
|
@ -145,7 +149,8 @@ class Network:
|
|||
f.write(str(episode_count) + "\n")
|
||||
|
||||
def restore_model(self, sess):
|
||||
if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
|
||||
if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')):
|
||||
|
||||
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
|
||||
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
||||
str(latest_checkpoint))
|
||||
|
@ -162,6 +167,8 @@ class Network:
|
|||
if os.path.isfile(episode_count_path):
|
||||
with open(episode_count_path, 'r') as f:
|
||||
self.config['start_episode'] = int(f.read())
|
||||
else:
|
||||
assert False
|
||||
|
||||
def make_move(self, sess, board, roll, player):
|
||||
# print(Board.pretty(board))
|
||||
|
@ -234,9 +241,6 @@ class Network:
|
|||
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
|
||||
# print("post pubeval:", board, sep="\n")
|
||||
|
||||
# print("*"*30)
|
||||
# print(board)
|
||||
# print("+"*30)
|
||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||
outcomes.append(Board.outcome(board)[1])
|
||||
sys.stderr.write("\n")
|
||||
|
@ -271,9 +275,6 @@ class Network:
|
|||
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
|
||||
# print("post pubeval:", board, sep="\n")
|
||||
|
||||
# print("*"*30)
|
||||
# print(board)
|
||||
# print("+"*30)
|
||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||
outcomes.append(Board.outcome(board)[1])
|
||||
sys.stderr.write("\n")
|
||||
|
@ -405,7 +406,7 @@ class Network:
|
|||
|
||||
with tf.name_scope("final"):
|
||||
merged = tf.summary.merge_all()
|
||||
summary, _, global_step = sess.run([merged, self.training_op, self.global_step],
|
||||
global_step, summary, _ = sess.run([self.global_step, merged, self.training_op],
|
||||
feed_dict={self.x: self.board_trans_func(prev_board, player),
|
||||
self.value_next: scaled_final_score.reshape((1, 1))})
|
||||
writer.add_summary(summary, episode + trained_eps)
|
||||
|
@ -420,7 +421,7 @@ class Network:
|
|||
print_time_estimate(episode)
|
||||
|
||||
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
|
||||
self.save_model(sess, episode+trained_eps, global_step=global_step)
|
||||
self.save_model(sess, episode+trained_eps, global_step)
|
||||
|
||||
writer.close()
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user