More comments, backprop have been somewhat tested in the eager_main.py

and normal_main.py.
This commit is contained in:
Alexander Munch-Hansen 2018-05-11 13:35:01 +02:00
parent 504308a9af
commit 93224864a4
3 changed files with 54 additions and 42 deletions

View File

@ -160,7 +160,8 @@ class Network:
def calc_vals(self, states): def calc_vals(self, states):
""" """
:param states: Calculate a score of each state in states
:param states: A number of states. The states have to be transformed before being given to this function.
:return: :return:
""" """
values = self.model.predict_on_batch(states) values = self.model.predict_on_batch(states)
@ -205,8 +206,8 @@ class Network:
def make_move_0_ply(self, board, roll, player): def make_move_0_ply(self, board, roll, player):
""" """
Find the best move given a board, roll and a player, by finding all possible states one can go to Find the best move given a board, roll and a player, by finding all possible states one can go to
and then picking the best, by using the network to evaluate each state. The highest score is picked and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
for the 1-player and the max(1-score) is picked for the -1-player. The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
:param sess: :param sess:
:param board: Current board :param board: Current board
@ -231,6 +232,7 @@ class Network:
def make_move_1_ply(self, board, roll, player): def make_move_1_ply(self, board, roll, player):
""" """
Return the best board and best score based on a 1-ply look-ahead.
:param board: :param board:
:param roll: :param roll:
:param player: :param player:
@ -244,9 +246,9 @@ class Network:
def calculate_1_ply(self, board, roll, player): def calculate_1_ply(self, board, roll, player):
""" """
Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
exhaustive search is performed on the best 15 moves from the single ply. all moves and scores are found for them. The expected score is then calculated for each of the boards from the
0-ply.
:param sess: :param sess:
:param board: :param board:
:param roll: The original roll :param roll: The original roll
@ -343,7 +345,6 @@ class Network:
def calc_n_ply(self, n_init, sess, board, player, roll): def calc_n_ply(self, n_init, sess, board, player, roll):
""" """
:param n_init: :param n_init:
:param sess: :param sess:
:param board: :param board:
@ -371,7 +372,6 @@ class Network:
def n_ply(self, n_init, sess, boards_init, player_init): def n_ply(self, n_init, sess, boards_init, player_init):
""" """
:param n_init: :param n_init:
:param sess: :param sess:
:param boards_init: :param boards_init:

View File

@ -18,11 +18,12 @@ input_size = 30
model = tf.keras.Sequential([ model = tf.keras.Sequential([
tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, input_shape=(1,input_size)), tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)),
tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init) tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2))
]) ])
#tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0] input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0]
@ -38,45 +39,56 @@ start = time.time()
all_predictions = model.predict_on_batch(all_input) all_predictions = model.predict_on_batch(all_input)
print(all_predictions)
print(time.time() - start)
learning_rate = 0.1
start = time.time()
all_predictions = [model(single_in) for _ in range(20)]
#print(all_predictions[:10])
print(time.time() - start)
print("-"*30)
with tf.GradientTape() as tape: with tf.GradientTape() as tape:
val = model(single_in) value = model(single_in)
grads = tape.gradient(val, model.variables)
# grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
# print(model.variables[0][0]) print("Before:", value)
weights_before = model.weights[0]
start = time.time() grads = tape.gradient(value, model.variables)
#[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)] print("/"*40,"model_variables","/"*40)
print(model.variables)
print("/"*40,"grads","/"*40)
print(grads)
start = time.time() difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), [])
for gradient, trainable_var in zip(grads, model.variables):
backprop_calc = 0.1 * (0.9 - val) * gradient
trainable_var.assign_add(backprop_calc)
# opt.apply_gradients(zip(grads, model.variables)) for grad, train_var in zip(grads, model.variables):
backprop_calc = 0.1 * difference_in_values * grad
train_var.assign_add(backprop_calc)
print(time.time() - start) value = model(single_in)
print("/"*40,"model_variables","/"*40)
print(model.variables)
print("After:", value)
print(model(single_in))
vals = model.predict_on_batch(all_input) # # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
vals = list(vals) #
vals[3] = 4 # # print(model.variables[0][0])
print(vals) # weights_before = model.weights[0]
print(np.argmax(np.array(vals))) #
# start = time.time()
# #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
#
# start = time.time()
# for gradient, trainable_var in zip(grads, model.variables):
# backprop_calc = 0.1 * (0.9 - val) * gradient
# trainable_var.assign_add(backprop_calc)
#
# # opt.apply_gradients(zip(grads, model.variables))
#
# print(time.time() - start)
#
# print(model(single_in))
#
# vals = model.predict_on_batch(all_input)
# vals = list(vals)
# vals[3] = 4
# print(vals)
# print(np.argmax(np.array(vals)))
# tfe.Saver(model.variables).save("./tmp_ckpt") # tfe.Saver(model.variables).save("./tmp_ckpt")

View File

@ -16,9 +16,9 @@ class Everything:
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size), W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
initializer=xavier_init) initializer=tf.constant_initializer(-2))
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size), W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
initializer=xavier_init) initializer=tf.constant_initializer(0.2))
b_1 = tf.get_variable("b_1", (self.hidden_size,), b_1 = tf.get_variable("b_1", (self.hidden_size,),
initializer=tf.zeros_initializer) initializer=tf.zeros_initializer)