added some comments and removed some old code

This commit is contained in:
Alexander Munch-Hansen 2018-04-22 19:13:46 +02:00
parent 77d82f6883
commit 160f5bd737

View File

@ -1,5 +1,4 @@
import tensorflow as tf import tensorflow as tf
from cup import Cup
import numpy as np import numpy as np
from board import Board from board import Board
import os import os
@ -77,10 +76,6 @@ class Network:
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
# tf.reduce_sum basically finds the sum of its input, so this gives the
# difference between the two values, in case they should be lists, which
# they might be if our input changes
# TODO: Alexander thinks that self.value will be computed twice (instead of once) # TODO: Alexander thinks that self.value will be computed twice (instead of once)
difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), []) difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
@ -95,7 +90,6 @@ class Network:
with tf.variable_scope('apply_gradients'): with tf.variable_scope('apply_gradients'):
for gradient, trainable_var in zip(gradients, trainable_vars): for gradient, trainable_var in zip(gradients, trainable_vars):
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
backprop_calc = self.learning_rate * difference_in_values * gradient backprop_calc = self.learning_rate * difference_in_values * gradient
grad_apply = trainable_var.assign_add(backprop_calc) grad_apply = trainable_var.assign_add(backprop_calc)
apply_gradients.append(grad_apply) apply_gradients.append(grad_apply)
@ -148,6 +142,10 @@ class Network:
def gen_21_rolls(self): def gen_21_rolls(self):
"""
Calculate all possible rolls, [[1,1], [1,2] ..]
:return: All possible rolls
"""
a = [] a = []
for x in range(1,7): for x in range(1,7):
for y in range(1,7): for y in range(1,7):
@ -187,7 +185,9 @@ class Network:
spec_roll_scores = [] spec_roll_scores = []
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
spec_roll_scores.append([self.eval_state(sess, self.board_trans_func(new_board, player*-1)) for new_board in all_rolls_boards]) spec_roll_scores.append(
[self.eval_state(sess, self.board_trans_func(new_board, player*-1)) for new_board in all_rolls_boards]
)
best_score = max(spec_roll_scores) best_score = max(spec_roll_scores)
@ -201,8 +201,27 @@ class Network:
return [best_board, max(all_rolls_scores)] return [best_board, max(all_rolls_scores)]
def eval(self, episode_count, trained_eps = 0, tf_session = None): def eval(self, episode_count, trained_eps = 0, tf_session = None):
"""
Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
a model which has been given random weights, so it acts deterministically random.
:param episode_count: The amount of episodes to run
:param trained_eps: The amount of episodes the model we want to evaluate, has trained
:param tf_session:
:return: outcomes: The outcomes of the evaluation session
"""
def do_eval(sess, method, episodes = 1000, trained_eps = 0): def do_eval(sess, method, episodes = 1000, trained_eps = 0):
"""
Do the actual evaluation
:param sess:
:param method: Either pubeval or dumbeval
:param episodes: Amount of episodes to use in the evaluation
:param trained_eps:
:return: outcomes : Described above
"""
start_time = time.time() start_time = time.time()
def print_time_estimate(eps_completed): def print_time_estimate(eps_completed):
@ -337,6 +356,9 @@ class Network:
(random.randrange(1, 7), random.randrange(1, 7)), (random.randrange(1, 7), random.randrange(1, 7)),
player) player)
# print("The evaluation of the previous state:\n", self.eval_state(sess, self.board_trans_func(prev_board, player)))
# print("The evaluation of the current_state:\n", cur_board_value)
# adjust weights # adjust weights
sess.run(self.training_op, sess.run(self.training_op,
feed_dict={self.x: self.board_trans_func(prev_board, player), feed_dict={self.x: self.board_trans_func(prev_board, player),