fixed *the* bug
This commit is contained in:
parent
b6c52ba476
commit
cba0f67ae2
25
network.py
25
network.py
|
@ -7,7 +7,7 @@ import time
|
||||||
import sys
|
import sys
|
||||||
import random
|
import random
|
||||||
from eval import Eval
|
from eval import Eval
|
||||||
|
import glob
|
||||||
|
|
||||||
class Network:
|
class Network:
|
||||||
# board_features_quack has size 28
|
# board_features_quack has size 28
|
||||||
|
@ -38,10 +38,10 @@ class Network:
|
||||||
# Can't remember the best learning_rate, look this up
|
# Can't remember the best learning_rate, look this up
|
||||||
self.max_learning_rate = 0.1
|
self.max_learning_rate = 0.1
|
||||||
self.min_learning_rate = 0.001
|
self.min_learning_rate = 0.001
|
||||||
# self.learning_rate = 0.01
|
self.learning_rate = 0.01
|
||||||
|
|
||||||
self.global_step = tf.Variable(0, trainable=False, name="global_step")
|
self.global_step = tf.Variable(0, trainable=False, name="global_step")
|
||||||
self.learning_rate = tf.maximum(self.min_learning_rate, tf.train.exponential_decay(self.max_learning_rate, self.global_step, 50000, 0.96, staircase=True), name="learning_rate")
|
# self.learning_rate = tf.maximum(self.min_learning_rate, tf.train.exponential_decay(self.max_learning_rate, self.global_step, 50000, 0.96, staircase=True), name="learning_rate")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -89,6 +89,7 @@ class Network:
|
||||||
|
|
||||||
global_step_op = self.global_step.assign_add(1)
|
global_step_op = self.global_step.assign_add(1)
|
||||||
|
|
||||||
|
|
||||||
with tf.variable_scope('apply_gradients'):
|
with tf.variable_scope('apply_gradients'):
|
||||||
for gradient, trainable_var in zip(gradients, trainable_vars):
|
for gradient, trainable_var in zip(gradients, trainable_vars):
|
||||||
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
|
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
|
||||||
|
@ -96,6 +97,9 @@ class Network:
|
||||||
grad_apply = trainable_var.assign_add(backprop_calc)
|
grad_apply = trainable_var.assign_add(backprop_calc)
|
||||||
apply_gradients.append(grad_apply)
|
apply_gradients.append(grad_apply)
|
||||||
|
|
||||||
|
|
||||||
|
with tf.control_dependencies([global_step_op]):
|
||||||
|
|
||||||
self.training_op = tf.group(*apply_gradients, name='training_op')
|
self.training_op = tf.group(*apply_gradients, name='training_op')
|
||||||
|
|
||||||
self.saver = tf.train.Saver(max_to_keep=1)
|
self.saver = tf.train.Saver(max_to_keep=1)
|
||||||
|
@ -145,7 +149,8 @@ class Network:
|
||||||
f.write(str(episode_count) + "\n")
|
f.write(str(episode_count) + "\n")
|
||||||
|
|
||||||
def restore_model(self, sess):
|
def restore_model(self, sess):
|
||||||
if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
|
if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')):
|
||||||
|
|
||||||
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
|
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
|
||||||
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
||||||
str(latest_checkpoint))
|
str(latest_checkpoint))
|
||||||
|
@ -162,6 +167,8 @@ class Network:
|
||||||
if os.path.isfile(episode_count_path):
|
if os.path.isfile(episode_count_path):
|
||||||
with open(episode_count_path, 'r') as f:
|
with open(episode_count_path, 'r') as f:
|
||||||
self.config['start_episode'] = int(f.read())
|
self.config['start_episode'] = int(f.read())
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
def make_move(self, sess, board, roll, player):
|
def make_move(self, sess, board, roll, player):
|
||||||
# print(Board.pretty(board))
|
# print(Board.pretty(board))
|
||||||
|
@ -234,9 +241,6 @@ class Network:
|
||||||
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
|
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
|
||||||
# print("post pubeval:", board, sep="\n")
|
# print("post pubeval:", board, sep="\n")
|
||||||
|
|
||||||
# print("*"*30)
|
|
||||||
# print(board)
|
|
||||||
# print("+"*30)
|
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||||
outcomes.append(Board.outcome(board)[1])
|
outcomes.append(Board.outcome(board)[1])
|
||||||
sys.stderr.write("\n")
|
sys.stderr.write("\n")
|
||||||
|
@ -271,9 +275,6 @@ class Network:
|
||||||
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
|
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
|
||||||
# print("post pubeval:", board, sep="\n")
|
# print("post pubeval:", board, sep="\n")
|
||||||
|
|
||||||
# print("*"*30)
|
|
||||||
# print(board)
|
|
||||||
# print("+"*30)
|
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||||
outcomes.append(Board.outcome(board)[1])
|
outcomes.append(Board.outcome(board)[1])
|
||||||
sys.stderr.write("\n")
|
sys.stderr.write("\n")
|
||||||
|
@ -405,7 +406,7 @@ class Network:
|
||||||
|
|
||||||
with tf.name_scope("final"):
|
with tf.name_scope("final"):
|
||||||
merged = tf.summary.merge_all()
|
merged = tf.summary.merge_all()
|
||||||
summary, _, global_step = sess.run([merged, self.training_op, self.global_step],
|
global_step, summary, _ = sess.run([self.global_step, merged, self.training_op],
|
||||||
feed_dict={self.x: self.board_trans_func(prev_board, player),
|
feed_dict={self.x: self.board_trans_func(prev_board, player),
|
||||||
self.value_next: scaled_final_score.reshape((1, 1))})
|
self.value_next: scaled_final_score.reshape((1, 1))})
|
||||||
writer.add_summary(summary, episode + trained_eps)
|
writer.add_summary(summary, episode + trained_eps)
|
||||||
|
@ -420,7 +421,7 @@ class Network:
|
||||||
print_time_estimate(episode)
|
print_time_estimate(episode)
|
||||||
|
|
||||||
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
|
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
|
||||||
self.save_model(sess, episode+trained_eps, global_step=global_step)
|
self.save_model(sess, episode+trained_eps, global_step)
|
||||||
|
|
||||||
writer.close()
|
writer.close()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user