From 9a2d87516e68f687bea4dd53251cc12e70d52713 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Wed, 9 May 2018 00:33:05 +0200 Subject: [PATCH] Ongoing rewrite of network to use an eager model. We're now capable of evaluating a list of states with network.py. We can also save and restore models. --- network.py | 128 ++++++++++++--------------- network_test.py | 26 +++--- tensorflow_impl_tests/eager_main.py | 44 +++++++-- tensorflow_impl_tests/normal_main.py | 20 ++++- 4 files changed, 127 insertions(+), 91 deletions(-) diff --git a/network.py b/network.py index 84802e3..4f63b75 100644 --- a/network.py +++ b/network.py @@ -8,6 +8,7 @@ import random from eval import Eval import glob from operator import itemgetter +import tensorflow.contrib.eager as tfe class Network: # board_features_quack has size 28 @@ -25,6 +26,10 @@ class Network: return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) def __init__(self, config, name): + tf.enable_eager_execution() + + xavier_init = tf.contrib.layers.xavier_initializer() + self.config = config self.checkpoint_path = os.path.join(config['model_storage_path'], config['model']) @@ -38,17 +43,7 @@ class Network: self.hidden_size = 40 self.max_learning_rate = 0.1 self.min_learning_rate = 0.001 - - self.global_step = tf.Variable(0, trainable=False, name="global_step") - self.learning_rate = tf.maximum(self.min_learning_rate, - tf.train.exponential_decay(self.max_learning_rate, - self.global_step, 50000, - 0.96, - staircase=True), - name="learning_rate") - - - + self.global_step = "lol" # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): @@ -57,62 +52,61 @@ class Network: else: self.episodes_trained = 0 - self.x = tf.placeholder('float', [1, self.input_size], name='input') - self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next") - xavier_init = tf.contrib.layers.xavier_initializer() - - W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size), - initializer=xavier_init) - W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size), - initializer=xavier_init) - - b_1 = tf.get_variable("b_1", (self.hidden_size,), - initializer=tf.zeros_initializer) - b_2 = tf.get_variable("b_2", (self.output_size,), - initializer=tf.zeros_initializer) + self.model = tf.keras.Sequential([ + tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, + input_shape=(1,30)), + tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init) + ]) - value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer') - self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') - # TODO: Alexander thinks that self.value will be computed twice (instead of once) - difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), []) + + def do_backprop(self, prev_state, value_next): + self.learning_rate = tf.maximum(self.min_learning_rate, + tf.train.exponential_decay(self.max_learning_rate, + self.global_step, 50000, + 0.96, + staircase=True), + name="learning_rate") + + + with tf.GradientTape() as tape: + value = self.model(np.array(input).reshape(1, -1)) + grads = tape.gradient(value, self.model.variables) + + difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) - trainable_vars = tf.trainable_variables() - gradients = tf.gradients(self.value, trainable_vars) - - apply_gradients = [] - global_step_op = self.global_step.assign_add(1) - with tf.variable_scope('apply_gradients'): - for gradient, trainable_var in zip(gradients, trainable_vars): - backprop_calc = self.learning_rate * difference_in_values * gradient - grad_apply = trainable_var.assign_add(backprop_calc) - apply_gradients.append(grad_apply) + for grad, train_var in zip(grads, self.model.variables): + backprop_calc = self.learning_rate * difference_in_values * grad + train_var.assign_add(backprop_calc) - - with tf.control_dependencies([global_step_op]): - self.training_op = tf.group(*apply_gradients, name='training_op') - - self.saver = tf.train.Saver(max_to_keep=1) def eval_state(self, sess, state): return sess.run(self.value, feed_dict={self.x: state}) - def save_model(self, sess, episode_count, global_step): - self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step) - with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: - print("[NETWK] ({name}) Saving model to:".format(name=self.name), - os.path.join(self.checkpoint_path, 'model.ckpt')) - f.write(str(episode_count) + "\n") + def save_model(self, episode_count, global_step): + tfe.Saver(self.model.variables).save("./tmp_ckpt", global_step=global_step) + #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step) + #with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: + # print("[NETWK] ({name}) Saving model to:".format(name=self.name), + # os.path.join(self.checkpoint_path, 'model.ckpt')) + # f.write(str(episode_count) + "\n") - def restore_model(self, sess): + + def calc_vals(self, states): + values = self.model.predict_on_batch(states) + self.save_model(0, 432) + return values + + + def restore_model(self): """ Restore a model for a session, such that a trained model and either be further trained or used for evaluation @@ -126,35 +120,29 @@ class Network: latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) print("[NETWK] ({name}) Restoring model from:".format(name=self.name), str(latest_checkpoint)) - self.saver.restore(sess, latest_checkpoint) - variables_names = [v.name for v in tf.trainable_variables()] - values = sess.run(variables_names) - for k, v in zip(variables_names, values): - print("Variable: ", k) - print("Shape: ", v.shape) - print(v) + tfe.Saver(model.variables).restore(latest_checkpoint) + + variables_names = [v.name for v in self.model.variables] + # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): with open(episode_count_path, 'r') as f: self.config['start_episode'] = int(f.read()) - elif self.config['use_baseline'] and glob.glob(os.path.join(os.path.join(self.config['model_storage_path'], "baseline_model"), 'model.ckpt*.index')): - checkpoint_path = os.path.join(self.config['model_storage_path'], "baseline_model") - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path) + else: + latest_checkpoint = tf.train.latest_checkpoint("./") print("[NETWK] ({name}) Restoring model from:".format(name=self.name), str(latest_checkpoint)) - self.saver.restore(sess, latest_checkpoint) + tfe.Saver(self.model.variables).restore(latest_checkpoint) - variables_names = [v.name for v in tf.trainable_variables()] - values = sess.run(variables_names) - for k, v in zip(variables_names, values): - print("Variable: ", k) - print("Shape: ", v.shape) - print(v) - elif not self.config['force_creation']: - print("You need to have baseline_model inside models") - exit() + #variables_names = [v.name for v in self.model.variables] + + # Restore trained episode count for model + #episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") + #if os.path.isfile(episode_count_path): + # with open(episode_count_path, 'r') as f: + # self.config['start_episode'] = int(f.read()) def make_move(self, sess, board, roll, player): diff --git a/network_test.py b/network_test.py index a514dfc..58fec8a 100644 --- a/network_test.py +++ b/network_test.py @@ -11,12 +11,10 @@ import main config = main.config.copy() config['model'] = "tesauro_blah" config['force_creation'] = True +config['board_representation'] = 'quack-fat' network = Network(config, config['model']) -session = tf.Session() - -session.run(tf.global_variables_initializer()) -network.restore_model(session) +network.restore_model() initial_state = Board.initial_state initial_state_1 = ( 0, @@ -51,14 +49,7 @@ def gen_21_rolls(): return a -def calc_all_scores(board, player): - scores = [] - trans_board = network.board_trans_func(board, player) - rolls = gen_21_rolls() - for roll in rolls: - score = network.eval_state(session, trans_board) - scores.append(score) - return scores + def calculate_possible_states(board): @@ -83,9 +74,16 @@ def calculate_possible_states(board): #print("-"*30) #print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1)) +board = network.board_trans_func(Board.initial_state, 1) + +input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0] +all_input = np.array([input for _ in range(20)]) +print(network.calc_vals(all_input)) + + #print(" "*10 + "network_test") -print(" "*20 + "Depth 1") -print(network.calc_n_ply(2, session, Board.initial_state, 1, [2, 4])) +#print(" "*20 + "Depth 1") +#print(network.calc_n_ply(1, session, Board.initial_state, 1, [2, 4])) #print(scores) diff --git a/tensorflow_impl_tests/eager_main.py b/tensorflow_impl_tests/eager_main.py index 1b58abc..b2da143 100644 --- a/tensorflow_impl_tests/eager_main.py +++ b/tensorflow_impl_tests/eager_main.py @@ -1,25 +1,32 @@ import time import numpy as np import tensorflow as tf +import tensorflow.contrib.eager as tfe + tf.enable_eager_execution() +xavier_init = tf.contrib.layers.xavier_initializer() +opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1) + output_size = 1 hidden_size = 40 input_size = 30 - model = tf.keras.Sequential([ - tf.keras.layers.Dense(40, activation="sigmoid", input_shape=(1,30)), - tf.keras.layers.Dense(1, activation="sigmoid") + tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, input_shape=(1,input_size)), + tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init) ]) +#tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./")) + input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0] -all_input = np.array([input for _ in range(8500)]) +all_input = np.array([input for _ in range(20)]) + single_in = np.array(input).reshape(1,-1) @@ -34,8 +41,33 @@ print(time.time() - start) start = time.time() -all_predictions = [model(single_in) for _ in range(8500)] +all_predictions = [model(single_in) for _ in range(20)] -print(all_predictions[:10]) +#print(all_predictions[:10]) print(time.time() - start) +print("-"*30) +with tf.GradientTape() as tape: + val = model(np.array(input).reshape(1,-1)) +grads = tape.gradient(val, model.variables) + +grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)] + +# print(model.variables[0][0]) +weights_before = model.weights[0] + +start = time.time() +#[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)] + +start = time.time() +#for gradient, trainable_var in zip(grads, model.variables): +# backprop_calc = 0.1 * (val - np.random.uniform(-1, 1)) * gradient +# trainable_var.assign_add(backprop_calc) + +opt.apply_gradients(zip(grads, model.variables)) + +print(time.time() - start) + +print(model(np.array(input).reshape(1,-1))) + +tfe.Saver(model.variables).save("./tmp_ckpt") diff --git a/tensorflow_impl_tests/normal_main.py b/tensorflow_impl_tests/normal_main.py index acfc044..865f017 100644 --- a/tensorflow_impl_tests/normal_main.py +++ b/tensorflow_impl_tests/normal_main.py @@ -29,12 +29,30 @@ class Everything: self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') + apply_gradients = [] + + + trainable_vars = tf.trainable_variables() + gradients = tf.gradients(self.value, trainable_vars) + + + with tf.variable_scope('apply_gradients'): + for gradient, trainable_var in zip(gradients, trainable_vars): + backprop_calc = self.learning_rate * difference_in_values * gradient + grad_apply = trainable_var.assign_add(backprop_calc) + apply_gradients.append(grad_apply) + + with tf.control_dependencies([global_step_op]): + self.training_op = tf.group(*apply_gradients, name='training_op') + + + def eval(self): input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0]) start = time.time() sess = tf.Session() sess.run(tf.global_variables_initializer()) - for i in range(8500): + for i in range(20): val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)}) print(time.time() - start) print(val)