diff --git a/network.py b/network.py index 6c38216..5945eb9 100644 --- a/network.py +++ b/network.py @@ -1,5 +1,4 @@ import tensorflow as tf -from cup import Cup import numpy as np from board import Board import os @@ -77,10 +76,6 @@ class Network: self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') - # tf.reduce_sum basically finds the sum of its input, so this gives the - # difference between the two values, in case they should be lists, which - # they might be if our input changes - # TODO: Alexander thinks that self.value will be computed twice (instead of once) difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), []) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) @@ -95,7 +90,6 @@ class Network: with tf.variable_scope('apply_gradients'): for gradient, trainable_var in zip(gradients, trainable_vars): - # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t. backprop_calc = self.learning_rate * difference_in_values * gradient grad_apply = trainable_var.assign_add(backprop_calc) apply_gradients.append(grad_apply) @@ -148,6 +142,10 @@ class Network: def gen_21_rolls(self): + """ + Calculate all possible rolls, [[1,1], [1,2] ..] + :return: All possible rolls + """ a = [] for x in range(1,7): for y in range(1,7): @@ -187,7 +185,9 @@ class Network: spec_roll_scores = [] all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) - spec_roll_scores.append([self.eval_state(sess, self.board_trans_func(new_board, player*-1)) for new_board in all_rolls_boards]) + spec_roll_scores.append( + [self.eval_state(sess, self.board_trans_func(new_board, player*-1)) for new_board in all_rolls_boards] + ) best_score = max(spec_roll_scores) @@ -201,8 +201,27 @@ class Network: return [best_board, max(all_rolls_scores)] def eval(self, episode_count, trained_eps = 0, tf_session = None): + """ + Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval + a model which has been given random weights, so it acts deterministically random. + + :param episode_count: The amount of episodes to run + :param trained_eps: The amount of episodes the model we want to evaluate, has trained + :param tf_session: + :return: outcomes: The outcomes of the evaluation session + """ def do_eval(sess, method, episodes = 1000, trained_eps = 0): + """ + Do the actual evaluation + + :param sess: + :param method: Either pubeval or dumbeval + :param episodes: Amount of episodes to use in the evaluation + :param trained_eps: + :return: outcomes : Described above + """ + start_time = time.time() def print_time_estimate(eps_completed): @@ -337,6 +356,9 @@ class Network: (random.randrange(1, 7), random.randrange(1, 7)), player) + # print("The evaluation of the previous state:\n", self.eval_state(sess, self.board_trans_func(prev_board, player))) + # print("The evaluation of the current_state:\n", cur_board_value) + # adjust weights sess.run(self.training_op, feed_dict={self.x: self.board_trans_func(prev_board, player),