Ongoing rewrite of network to use an eager model. We're now capable of
evaluating a list of states with network.py. We can also save and restore models.
This commit is contained in:
parent
7b308be4e2
commit
9a2d87516e
128
network.py
128
network.py
|
@ -8,6 +8,7 @@ import random
|
||||||
from eval import Eval
|
from eval import Eval
|
||||||
import glob
|
import glob
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
import tensorflow.contrib.eager as tfe
|
||||||
|
|
||||||
class Network:
|
class Network:
|
||||||
# board_features_quack has size 28
|
# board_features_quack has size 28
|
||||||
|
@ -25,6 +26,10 @@ class Network:
|
||||||
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
|
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
|
||||||
|
|
||||||
def __init__(self, config, name):
|
def __init__(self, config, name):
|
||||||
|
tf.enable_eager_execution()
|
||||||
|
|
||||||
|
xavier_init = tf.contrib.layers.xavier_initializer()
|
||||||
|
|
||||||
self.config = config
|
self.config = config
|
||||||
self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
||||||
|
|
||||||
|
@ -38,17 +43,7 @@ class Network:
|
||||||
self.hidden_size = 40
|
self.hidden_size = 40
|
||||||
self.max_learning_rate = 0.1
|
self.max_learning_rate = 0.1
|
||||||
self.min_learning_rate = 0.001
|
self.min_learning_rate = 0.001
|
||||||
|
self.global_step = "lol"
|
||||||
self.global_step = tf.Variable(0, trainable=False, name="global_step")
|
|
||||||
self.learning_rate = tf.maximum(self.min_learning_rate,
|
|
||||||
tf.train.exponential_decay(self.max_learning_rate,
|
|
||||||
self.global_step, 50000,
|
|
||||||
0.96,
|
|
||||||
staircase=True),
|
|
||||||
name="learning_rate")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Restore trained episode count for model
|
# Restore trained episode count for model
|
||||||
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
||||||
if os.path.isfile(episode_count_path):
|
if os.path.isfile(episode_count_path):
|
||||||
|
@ -57,62 +52,61 @@ class Network:
|
||||||
else:
|
else:
|
||||||
self.episodes_trained = 0
|
self.episodes_trained = 0
|
||||||
|
|
||||||
self.x = tf.placeholder('float', [1, self.input_size], name='input')
|
|
||||||
self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
|
|
||||||
|
|
||||||
xavier_init = tf.contrib.layers.xavier_initializer()
|
self.model = tf.keras.Sequential([
|
||||||
|
tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init,
|
||||||
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
|
input_shape=(1,30)),
|
||||||
initializer=xavier_init)
|
tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init)
|
||||||
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
|
])
|
||||||
initializer=xavier_init)
|
|
||||||
|
|
||||||
b_1 = tf.get_variable("b_1", (self.hidden_size,),
|
|
||||||
initializer=tf.zeros_initializer)
|
|
||||||
b_2 = tf.get_variable("b_2", (self.output_size,),
|
|
||||||
initializer=tf.zeros_initializer)
|
|
||||||
|
|
||||||
|
|
||||||
value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
|
|
||||||
|
|
||||||
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
|
|
||||||
|
|
||||||
# TODO: Alexander thinks that self.value will be computed twice (instead of once)
|
|
||||||
difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
|
def do_backprop(self, prev_state, value_next):
|
||||||
|
self.learning_rate = tf.maximum(self.min_learning_rate,
|
||||||
|
tf.train.exponential_decay(self.max_learning_rate,
|
||||||
|
self.global_step, 50000,
|
||||||
|
0.96,
|
||||||
|
staircase=True),
|
||||||
|
name="learning_rate")
|
||||||
|
|
||||||
|
|
||||||
|
with tf.GradientTape() as tape:
|
||||||
|
value = self.model(np.array(input).reshape(1, -1))
|
||||||
|
grads = tape.gradient(value, self.model.variables)
|
||||||
|
|
||||||
|
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
|
||||||
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
|
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
|
||||||
|
|
||||||
trainable_vars = tf.trainable_variables()
|
|
||||||
gradients = tf.gradients(self.value, trainable_vars)
|
|
||||||
|
|
||||||
apply_gradients = []
|
|
||||||
|
|
||||||
global_step_op = self.global_step.assign_add(1)
|
global_step_op = self.global_step.assign_add(1)
|
||||||
|
|
||||||
|
|
||||||
with tf.variable_scope('apply_gradients'):
|
with tf.variable_scope('apply_gradients'):
|
||||||
for gradient, trainable_var in zip(gradients, trainable_vars):
|
for grad, train_var in zip(grads, self.model.variables):
|
||||||
backprop_calc = self.learning_rate * difference_in_values * gradient
|
backprop_calc = self.learning_rate * difference_in_values * grad
|
||||||
grad_apply = trainable_var.assign_add(backprop_calc)
|
train_var.assign_add(backprop_calc)
|
||||||
apply_gradients.append(grad_apply)
|
|
||||||
|
|
||||||
|
|
||||||
with tf.control_dependencies([global_step_op]):
|
|
||||||
|
|
||||||
self.training_op = tf.group(*apply_gradients, name='training_op')
|
|
||||||
|
|
||||||
self.saver = tf.train.Saver(max_to_keep=1)
|
|
||||||
|
|
||||||
def eval_state(self, sess, state):
|
def eval_state(self, sess, state):
|
||||||
return sess.run(self.value, feed_dict={self.x: state})
|
return sess.run(self.value, feed_dict={self.x: state})
|
||||||
|
|
||||||
def save_model(self, sess, episode_count, global_step):
|
def save_model(self, episode_count, global_step):
|
||||||
self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
|
tfe.Saver(self.model.variables).save("./tmp_ckpt", global_step=global_step)
|
||||||
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
|
#self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
|
||||||
print("[NETWK] ({name}) Saving model to:".format(name=self.name),
|
#with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
|
||||||
os.path.join(self.checkpoint_path, 'model.ckpt'))
|
# print("[NETWK] ({name}) Saving model to:".format(name=self.name),
|
||||||
f.write(str(episode_count) + "\n")
|
# os.path.join(self.checkpoint_path, 'model.ckpt'))
|
||||||
|
# f.write(str(episode_count) + "\n")
|
||||||
|
|
||||||
def restore_model(self, sess):
|
|
||||||
|
def calc_vals(self, states):
|
||||||
|
values = self.model.predict_on_batch(states)
|
||||||
|
self.save_model(0, 432)
|
||||||
|
return values
|
||||||
|
|
||||||
|
|
||||||
|
def restore_model(self):
|
||||||
"""
|
"""
|
||||||
Restore a model for a session, such that a trained model and either be further trained or
|
Restore a model for a session, such that a trained model and either be further trained or
|
||||||
used for evaluation
|
used for evaluation
|
||||||
|
@ -126,35 +120,29 @@ class Network:
|
||||||
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
|
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
|
||||||
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
||||||
str(latest_checkpoint))
|
str(latest_checkpoint))
|
||||||
self.saver.restore(sess, latest_checkpoint)
|
tfe.Saver(model.variables).restore(latest_checkpoint)
|
||||||
variables_names = [v.name for v in tf.trainable_variables()]
|
|
||||||
values = sess.run(variables_names)
|
variables_names = [v.name for v in self.model.variables]
|
||||||
for k, v in zip(variables_names, values):
|
|
||||||
print("Variable: ", k)
|
|
||||||
print("Shape: ", v.shape)
|
|
||||||
print(v)
|
|
||||||
|
|
||||||
# Restore trained episode count for model
|
# Restore trained episode count for model
|
||||||
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
||||||
if os.path.isfile(episode_count_path):
|
if os.path.isfile(episode_count_path):
|
||||||
with open(episode_count_path, 'r') as f:
|
with open(episode_count_path, 'r') as f:
|
||||||
self.config['start_episode'] = int(f.read())
|
self.config['start_episode'] = int(f.read())
|
||||||
elif self.config['use_baseline'] and glob.glob(os.path.join(os.path.join(self.config['model_storage_path'], "baseline_model"), 'model.ckpt*.index')):
|
else:
|
||||||
checkpoint_path = os.path.join(self.config['model_storage_path'], "baseline_model")
|
latest_checkpoint = tf.train.latest_checkpoint("./")
|
||||||
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
|
|
||||||
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
||||||
str(latest_checkpoint))
|
str(latest_checkpoint))
|
||||||
self.saver.restore(sess, latest_checkpoint)
|
tfe.Saver(self.model.variables).restore(latest_checkpoint)
|
||||||
|
|
||||||
variables_names = [v.name for v in tf.trainable_variables()]
|
#variables_names = [v.name for v in self.model.variables]
|
||||||
values = sess.run(variables_names)
|
|
||||||
for k, v in zip(variables_names, values):
|
# Restore trained episode count for model
|
||||||
print("Variable: ", k)
|
#episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
||||||
print("Shape: ", v.shape)
|
#if os.path.isfile(episode_count_path):
|
||||||
print(v)
|
# with open(episode_count_path, 'r') as f:
|
||||||
elif not self.config['force_creation']:
|
# self.config['start_episode'] = int(f.read())
|
||||||
print("You need to have baseline_model inside models")
|
|
||||||
exit()
|
|
||||||
|
|
||||||
|
|
||||||
def make_move(self, sess, board, roll, player):
|
def make_move(self, sess, board, roll, player):
|
||||||
|
|
|
@ -11,12 +11,10 @@ import main
|
||||||
config = main.config.copy()
|
config = main.config.copy()
|
||||||
config['model'] = "tesauro_blah"
|
config['model'] = "tesauro_blah"
|
||||||
config['force_creation'] = True
|
config['force_creation'] = True
|
||||||
|
config['board_representation'] = 'quack-fat'
|
||||||
network = Network(config, config['model'])
|
network = Network(config, config['model'])
|
||||||
|
|
||||||
session = tf.Session()
|
network.restore_model()
|
||||||
|
|
||||||
session.run(tf.global_variables_initializer())
|
|
||||||
network.restore_model(session)
|
|
||||||
initial_state = Board.initial_state
|
initial_state = Board.initial_state
|
||||||
|
|
||||||
initial_state_1 = ( 0,
|
initial_state_1 = ( 0,
|
||||||
|
@ -51,14 +49,7 @@ def gen_21_rolls():
|
||||||
|
|
||||||
return a
|
return a
|
||||||
|
|
||||||
def calc_all_scores(board, player):
|
|
||||||
scores = []
|
|
||||||
trans_board = network.board_trans_func(board, player)
|
|
||||||
rolls = gen_21_rolls()
|
|
||||||
for roll in rolls:
|
|
||||||
score = network.eval_state(session, trans_board)
|
|
||||||
scores.append(score)
|
|
||||||
return scores
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_possible_states(board):
|
def calculate_possible_states(board):
|
||||||
|
@ -83,9 +74,16 @@ def calculate_possible_states(board):
|
||||||
#print("-"*30)
|
#print("-"*30)
|
||||||
#print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1))
|
#print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1))
|
||||||
|
|
||||||
|
board = network.board_trans_func(Board.initial_state, 1)
|
||||||
|
|
||||||
|
input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0]
|
||||||
|
all_input = np.array([input for _ in range(20)])
|
||||||
|
print(network.calc_vals(all_input))
|
||||||
|
|
||||||
|
|
||||||
#print(" "*10 + "network_test")
|
#print(" "*10 + "network_test")
|
||||||
print(" "*20 + "Depth 1")
|
#print(" "*20 + "Depth 1")
|
||||||
print(network.calc_n_ply(2, session, Board.initial_state, 1, [2, 4]))
|
#print(network.calc_n_ply(1, session, Board.initial_state, 1, [2, 4]))
|
||||||
|
|
||||||
#print(scores)
|
#print(scores)
|
||||||
|
|
||||||
|
|
|
@ -1,25 +1,32 @@
|
||||||
import time
|
import time
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
import tensorflow.contrib.eager as tfe
|
||||||
|
|
||||||
|
|
||||||
tf.enable_eager_execution()
|
tf.enable_eager_execution()
|
||||||
|
xavier_init = tf.contrib.layers.xavier_initializer()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1)
|
||||||
|
|
||||||
output_size = 1
|
output_size = 1
|
||||||
hidden_size = 40
|
hidden_size = 40
|
||||||
input_size = 30
|
input_size = 30
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
model = tf.keras.Sequential([
|
model = tf.keras.Sequential([
|
||||||
tf.keras.layers.Dense(40, activation="sigmoid", input_shape=(1,30)),
|
tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, input_shape=(1,input_size)),
|
||||||
tf.keras.layers.Dense(1, activation="sigmoid")
|
tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init)
|
||||||
])
|
])
|
||||||
|
|
||||||
|
#tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
|
||||||
|
|
||||||
input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0]
|
input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0]
|
||||||
|
|
||||||
all_input = np.array([input for _ in range(8500)])
|
all_input = np.array([input for _ in range(20)])
|
||||||
|
|
||||||
|
|
||||||
single_in = np.array(input).reshape(1,-1)
|
single_in = np.array(input).reshape(1,-1)
|
||||||
|
|
||||||
|
@ -34,8 +41,33 @@ print(time.time() - start)
|
||||||
|
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
all_predictions = [model(single_in) for _ in range(8500)]
|
all_predictions = [model(single_in) for _ in range(20)]
|
||||||
|
|
||||||
print(all_predictions[:10])
|
#print(all_predictions[:10])
|
||||||
print(time.time() - start)
|
print(time.time() - start)
|
||||||
|
|
||||||
|
print("-"*30)
|
||||||
|
with tf.GradientTape() as tape:
|
||||||
|
val = model(np.array(input).reshape(1,-1))
|
||||||
|
grads = tape.gradient(val, model.variables)
|
||||||
|
|
||||||
|
grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
|
||||||
|
|
||||||
|
# print(model.variables[0][0])
|
||||||
|
weights_before = model.weights[0]
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
#[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
#for gradient, trainable_var in zip(grads, model.variables):
|
||||||
|
# backprop_calc = 0.1 * (val - np.random.uniform(-1, 1)) * gradient
|
||||||
|
# trainable_var.assign_add(backprop_calc)
|
||||||
|
|
||||||
|
opt.apply_gradients(zip(grads, model.variables))
|
||||||
|
|
||||||
|
print(time.time() - start)
|
||||||
|
|
||||||
|
print(model(np.array(input).reshape(1,-1)))
|
||||||
|
|
||||||
|
tfe.Saver(model.variables).save("./tmp_ckpt")
|
||||||
|
|
|
@ -29,12 +29,30 @@ class Everything:
|
||||||
|
|
||||||
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
|
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
|
||||||
|
|
||||||
|
apply_gradients = []
|
||||||
|
|
||||||
|
|
||||||
|
trainable_vars = tf.trainable_variables()
|
||||||
|
gradients = tf.gradients(self.value, trainable_vars)
|
||||||
|
|
||||||
|
|
||||||
|
with tf.variable_scope('apply_gradients'):
|
||||||
|
for gradient, trainable_var in zip(gradients, trainable_vars):
|
||||||
|
backprop_calc = self.learning_rate * difference_in_values * gradient
|
||||||
|
grad_apply = trainable_var.assign_add(backprop_calc)
|
||||||
|
apply_gradients.append(grad_apply)
|
||||||
|
|
||||||
|
with tf.control_dependencies([global_step_op]):
|
||||||
|
self.training_op = tf.group(*apply_gradients, name='training_op')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def eval(self):
|
def eval(self):
|
||||||
input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0])
|
input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0])
|
||||||
start = time.time()
|
start = time.time()
|
||||||
sess = tf.Session()
|
sess = tf.Session()
|
||||||
sess.run(tf.global_variables_initializer())
|
sess.run(tf.global_variables_initializer())
|
||||||
for i in range(8500):
|
for i in range(20):
|
||||||
val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)})
|
val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)})
|
||||||
print(time.time() - start)
|
print(time.time() - start)
|
||||||
print(val)
|
print(val)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user