backgammon/tensorflow_impl_tests/eager_main.py

95 lines
2.3 KiB
Python

import time
import numpy as np
import tensorflow as tf
from board import Board
import tensorflow.contrib.eager as tfe
tf.enable_eager_execution()
xavier_init = tf.contrib.layers.xavier_initializer()
opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1)
output_size = 1
hidden_size = 40
input_size = 30
model = tf.keras.Sequential([
tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)),
tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2))
])
# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0]
all_input = np.array([Board.board_features_quack_fat(input, 1) for _ in range(20)])
single_in = Board.board_features_quack_fat(input, 1)
start = time.time()
all_predictions = model.predict_on_batch(all_input)
learning_rate = 0.1
with tf.GradientTape() as tape:
value = model(single_in)
print("Before:", value)
grads = tape.gradient(value, model.variables)
print("/"*40,"model_variables","/"*40)
print(model.variables)
print("/"*40,"grads","/"*40)
print(grads)
difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), [])
for grad, train_var in zip(grads, model.variables):
backprop_calc = 0.1 * difference_in_values * grad
train_var.assign_add(backprop_calc)
value = model(single_in)
print("/"*40,"model_variables","/"*40)
print(model.variables)
print("After:", value)
# # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
#
# # print(model.variables[0][0])
# weights_before = model.weights[0]
#
# start = time.time()
# #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
#
# start = time.time()
# for gradient, trainable_var in zip(grads, model.variables):
# backprop_calc = 0.1 * (0.9 - val) * gradient
# trainable_var.assign_add(backprop_calc)
#
# # opt.apply_gradients(zip(grads, model.variables))
#
# print(time.time() - start)
#
# print(model(single_in))
#
# vals = model.predict_on_batch(all_input)
# vals = list(vals)
# vals[3] = 4
# print(vals)
# print(np.argmax(np.array(vals)))
# tfe.Saver(model.variables).save("./tmp_ckpt")