backgammon/tensorflow_impl_tests/normal_main.py

68 lines
2.3 KiB
Python
Raw Permalink Normal View History

import tensorflow as tf
import numpy as np
import time
class Everything:
def __init__(self):
self.output_size = 1
self.hidden_size = 40
self.input_size = 30
self.input = tf.placeholder('float', [1, self.input_size])
xavier_init = tf.contrib.layers.xavier_initializer()
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
initializer=tf.constant_initializer(-2))
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
initializer=tf.constant_initializer(0.2))
b_1 = tf.get_variable("b_1", (self.hidden_size,),
initializer=tf.zeros_initializer)
b_2 = tf.get_variable("b_2", (self.output_size,),
initializer=tf.zeros_initializer)
value_after_input = tf.sigmoid(tf.matmul(self.input, W_1) + b_1, name='hidden_layer')
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
apply_gradients = []
trainable_vars = tf.trainable_variables()
gradients = tf.gradients(self.value, trainable_vars)
difference_in_values = tf.reshape(tf.subtract(0.9, self.value, name='difference_in_values'), [])
with tf.variable_scope('apply_gradients'):
for gradient, trainable_var in zip(gradients, trainable_vars):
backprop_calc = 0.1 * difference_in_values * gradient
grad_apply = trainable_var.assign_add(backprop_calc)
apply_gradients.append(grad_apply)
self.training_op = tf.group(*apply_gradients, name='training_op')
def eval(self):
input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0])
start = time.time()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(20):
val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)})
print(time.time() - start)
print(val)
sess.run(self.training_op, feed_dict={self.input: input.reshape(1,-1)})
val = sess.run(self.value, feed_dict={self.input: input.reshape(1, -1)})
print(val)
everything = Everything()
everything.eval()