diff --git a/network.py b/network.py index 6a91198..f6e4914 100644 --- a/network.py +++ b/network.py @@ -160,7 +160,8 @@ class Network: def calc_vals(self, states): """ - :param states: + Calculate a score of each state in states + :param states: A number of states. The states have to be transformed before being given to this function. :return: """ values = self.model.predict_on_batch(states) @@ -205,8 +206,8 @@ class Network: def make_move_0_ply(self, board, roll, player): """ Find the best move given a board, roll and a player, by finding all possible states one can go to - and then picking the best, by using the network to evaluate each state. The highest score is picked - for the 1-player and the max(1-score) is picked for the -1-player. + and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead. + The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player. :param sess: :param board: Current board @@ -231,6 +232,7 @@ class Network: def make_move_1_ply(self, board, roll, player): """ + Return the best board and best score based on a 1-ply look-ahead. :param board: :param roll: :param player: @@ -244,9 +246,9 @@ class Network: def calculate_1_ply(self, board, roll, player): """ - Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an - exhaustive search is performed on the best 15 moves from the single ply. - + Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then + all moves and scores are found for them. The expected score is then calculated for each of the boards from the + 0-ply. :param sess: :param board: :param roll: The original roll @@ -343,7 +345,6 @@ class Network: def calc_n_ply(self, n_init, sess, board, player, roll): """ - :param n_init: :param sess: :param board: @@ -371,7 +372,6 @@ class Network: def n_ply(self, n_init, sess, boards_init, player_init): """ - :param n_init: :param sess: :param boards_init: diff --git a/tensorflow_impl_tests/eager_main.py b/tensorflow_impl_tests/eager_main.py index f68f65f..0cce81f 100644 --- a/tensorflow_impl_tests/eager_main.py +++ b/tensorflow_impl_tests/eager_main.py @@ -18,11 +18,12 @@ input_size = 30 model = tf.keras.Sequential([ - tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, input_shape=(1,input_size)), - tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init) + tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)), + tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2)) ]) -#tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./")) + +# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./")) input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0] @@ -38,45 +39,56 @@ start = time.time() all_predictions = model.predict_on_batch(all_input) -print(all_predictions) -print(time.time() - start) +learning_rate = 0.1 - -start = time.time() -all_predictions = [model(single_in) for _ in range(20)] - -#print(all_predictions[:10]) -print(time.time() - start) - -print("-"*30) with tf.GradientTape() as tape: - val = model(single_in) -grads = tape.gradient(val, model.variables) + value = model(single_in) -# grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)] -# print(model.variables[0][0]) -weights_before = model.weights[0] +print("Before:", value) -start = time.time() -#[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)] +grads = tape.gradient(value, model.variables) +print("/"*40,"model_variables","/"*40) +print(model.variables) +print("/"*40,"grads","/"*40) +print(grads) -start = time.time() -for gradient, trainable_var in zip(grads, model.variables): - backprop_calc = 0.1 * (0.9 - val) * gradient - trainable_var.assign_add(backprop_calc) +difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), []) -# opt.apply_gradients(zip(grads, model.variables)) +for grad, train_var in zip(grads, model.variables): + backprop_calc = 0.1 * difference_in_values * grad + train_var.assign_add(backprop_calc) -print(time.time() - start) +value = model(single_in) +print("/"*40,"model_variables","/"*40) +print(model.variables) +print("After:", value) -print(model(single_in)) -vals = model.predict_on_batch(all_input) -vals = list(vals) -vals[3] = 4 -print(vals) -print(np.argmax(np.array(vals))) +# # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)] +# +# # print(model.variables[0][0]) +# weights_before = model.weights[0] +# +# start = time.time() +# #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)] +# +# start = time.time() +# for gradient, trainable_var in zip(grads, model.variables): +# backprop_calc = 0.1 * (0.9 - val) * gradient +# trainable_var.assign_add(backprop_calc) +# +# # opt.apply_gradients(zip(grads, model.variables)) +# +# print(time.time() - start) +# +# print(model(single_in)) +# +# vals = model.predict_on_batch(all_input) +# vals = list(vals) +# vals[3] = 4 +# print(vals) +# print(np.argmax(np.array(vals))) # tfe.Saver(model.variables).save("./tmp_ckpt") diff --git a/tensorflow_impl_tests/normal_main.py b/tensorflow_impl_tests/normal_main.py index 8e3887d..a8b106c 100644 --- a/tensorflow_impl_tests/normal_main.py +++ b/tensorflow_impl_tests/normal_main.py @@ -16,9 +16,9 @@ class Everything: W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size), - initializer=xavier_init) + initializer=tf.constant_initializer(-2)) W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size), - initializer=xavier_init) + initializer=tf.constant_initializer(0.2)) b_1 = tf.get_variable("b_1", (self.hidden_size,), initializer=tf.zeros_initializer)