More comments, backprop have been somewhat tested in the eager_main.py
and normal_main.py.
This commit is contained in:
parent
504308a9af
commit
93224864a4
16
network.py
16
network.py
|
@ -160,7 +160,8 @@ class Network:
|
||||||
|
|
||||||
def calc_vals(self, states):
|
def calc_vals(self, states):
|
||||||
"""
|
"""
|
||||||
:param states:
|
Calculate a score of each state in states
|
||||||
|
:param states: A number of states. The states have to be transformed before being given to this function.
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
values = self.model.predict_on_batch(states)
|
values = self.model.predict_on_batch(states)
|
||||||
|
@ -205,8 +206,8 @@ class Network:
|
||||||
def make_move_0_ply(self, board, roll, player):
|
def make_move_0_ply(self, board, roll, player):
|
||||||
"""
|
"""
|
||||||
Find the best move given a board, roll and a player, by finding all possible states one can go to
|
Find the best move given a board, roll and a player, by finding all possible states one can go to
|
||||||
and then picking the best, by using the network to evaluate each state. The highest score is picked
|
and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
|
||||||
for the 1-player and the max(1-score) is picked for the -1-player.
|
The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
|
||||||
|
|
||||||
:param sess:
|
:param sess:
|
||||||
:param board: Current board
|
:param board: Current board
|
||||||
|
@ -231,6 +232,7 @@ class Network:
|
||||||
|
|
||||||
def make_move_1_ply(self, board, roll, player):
|
def make_move_1_ply(self, board, roll, player):
|
||||||
"""
|
"""
|
||||||
|
Return the best board and best score based on a 1-ply look-ahead.
|
||||||
:param board:
|
:param board:
|
||||||
:param roll:
|
:param roll:
|
||||||
:param player:
|
:param player:
|
||||||
|
@ -244,9 +246,9 @@ class Network:
|
||||||
|
|
||||||
def calculate_1_ply(self, board, roll, player):
|
def calculate_1_ply(self, board, roll, player):
|
||||||
"""
|
"""
|
||||||
Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
|
Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
|
||||||
exhaustive search is performed on the best 15 moves from the single ply.
|
all moves and scores are found for them. The expected score is then calculated for each of the boards from the
|
||||||
|
0-ply.
|
||||||
:param sess:
|
:param sess:
|
||||||
:param board:
|
:param board:
|
||||||
:param roll: The original roll
|
:param roll: The original roll
|
||||||
|
@ -343,7 +345,6 @@ class Network:
|
||||||
|
|
||||||
def calc_n_ply(self, n_init, sess, board, player, roll):
|
def calc_n_ply(self, n_init, sess, board, player, roll):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
:param n_init:
|
:param n_init:
|
||||||
:param sess:
|
:param sess:
|
||||||
:param board:
|
:param board:
|
||||||
|
@ -371,7 +372,6 @@ class Network:
|
||||||
|
|
||||||
def n_ply(self, n_init, sess, boards_init, player_init):
|
def n_ply(self, n_init, sess, boards_init, player_init):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
:param n_init:
|
:param n_init:
|
||||||
:param sess:
|
:param sess:
|
||||||
:param boards_init:
|
:param boards_init:
|
||||||
|
|
|
@ -18,11 +18,12 @@ input_size = 30
|
||||||
|
|
||||||
|
|
||||||
model = tf.keras.Sequential([
|
model = tf.keras.Sequential([
|
||||||
tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, input_shape=(1,input_size)),
|
tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)),
|
||||||
tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init)
|
tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2))
|
||||||
])
|
])
|
||||||
|
|
||||||
#tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
|
|
||||||
|
# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
|
||||||
|
|
||||||
input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0]
|
input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0]
|
||||||
|
|
||||||
|
@ -38,45 +39,56 @@ start = time.time()
|
||||||
|
|
||||||
all_predictions = model.predict_on_batch(all_input)
|
all_predictions = model.predict_on_batch(all_input)
|
||||||
|
|
||||||
print(all_predictions)
|
|
||||||
print(time.time() - start)
|
|
||||||
|
|
||||||
|
learning_rate = 0.1
|
||||||
|
|
||||||
|
|
||||||
start = time.time()
|
|
||||||
all_predictions = [model(single_in) for _ in range(20)]
|
|
||||||
|
|
||||||
#print(all_predictions[:10])
|
|
||||||
print(time.time() - start)
|
|
||||||
|
|
||||||
print("-"*30)
|
|
||||||
with tf.GradientTape() as tape:
|
with tf.GradientTape() as tape:
|
||||||
val = model(single_in)
|
value = model(single_in)
|
||||||
grads = tape.gradient(val, model.variables)
|
|
||||||
|
|
||||||
# grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
|
|
||||||
|
|
||||||
# print(model.variables[0][0])
|
print("Before:", value)
|
||||||
weights_before = model.weights[0]
|
|
||||||
|
|
||||||
start = time.time()
|
grads = tape.gradient(value, model.variables)
|
||||||
#[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
|
print("/"*40,"model_variables","/"*40)
|
||||||
|
print(model.variables)
|
||||||
|
print("/"*40,"grads","/"*40)
|
||||||
|
print(grads)
|
||||||
|
|
||||||
start = time.time()
|
difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), [])
|
||||||
for gradient, trainable_var in zip(grads, model.variables):
|
|
||||||
backprop_calc = 0.1 * (0.9 - val) * gradient
|
|
||||||
trainable_var.assign_add(backprop_calc)
|
|
||||||
|
|
||||||
# opt.apply_gradients(zip(grads, model.variables))
|
for grad, train_var in zip(grads, model.variables):
|
||||||
|
backprop_calc = 0.1 * difference_in_values * grad
|
||||||
|
train_var.assign_add(backprop_calc)
|
||||||
|
|
||||||
print(time.time() - start)
|
value = model(single_in)
|
||||||
|
print("/"*40,"model_variables","/"*40)
|
||||||
|
print(model.variables)
|
||||||
|
print("After:", value)
|
||||||
|
|
||||||
print(model(single_in))
|
|
||||||
|
|
||||||
vals = model.predict_on_batch(all_input)
|
# # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
|
||||||
vals = list(vals)
|
#
|
||||||
vals[3] = 4
|
# # print(model.variables[0][0])
|
||||||
print(vals)
|
# weights_before = model.weights[0]
|
||||||
print(np.argmax(np.array(vals)))
|
#
|
||||||
|
# start = time.time()
|
||||||
|
# #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
|
||||||
|
#
|
||||||
|
# start = time.time()
|
||||||
|
# for gradient, trainable_var in zip(grads, model.variables):
|
||||||
|
# backprop_calc = 0.1 * (0.9 - val) * gradient
|
||||||
|
# trainable_var.assign_add(backprop_calc)
|
||||||
|
#
|
||||||
|
# # opt.apply_gradients(zip(grads, model.variables))
|
||||||
|
#
|
||||||
|
# print(time.time() - start)
|
||||||
|
#
|
||||||
|
# print(model(single_in))
|
||||||
|
#
|
||||||
|
# vals = model.predict_on_batch(all_input)
|
||||||
|
# vals = list(vals)
|
||||||
|
# vals[3] = 4
|
||||||
|
# print(vals)
|
||||||
|
# print(np.argmax(np.array(vals)))
|
||||||
|
|
||||||
# tfe.Saver(model.variables).save("./tmp_ckpt")
|
# tfe.Saver(model.variables).save("./tmp_ckpt")
|
||||||
|
|
|
@ -16,9 +16,9 @@ class Everything:
|
||||||
|
|
||||||
|
|
||||||
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
|
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
|
||||||
initializer=xavier_init)
|
initializer=tf.constant_initializer(-2))
|
||||||
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
|
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
|
||||||
initializer=xavier_init)
|
initializer=tf.constant_initializer(0.2))
|
||||||
|
|
||||||
b_1 = tf.get_variable("b_1", (self.hidden_size,),
|
b_1 = tf.get_variable("b_1", (self.hidden_size,),
|
||||||
initializer=tf.zeros_initializer)
|
initializer=tf.zeros_initializer)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user