added some comments and removed some old code
This commit is contained in:
parent
77d82f6883
commit
160f5bd737
36
network.py
36
network.py
|
@ -1,5 +1,4 @@
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from cup import Cup
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from board import Board
|
from board import Board
|
||||||
import os
|
import os
|
||||||
|
@ -77,10 +76,6 @@ class Network:
|
||||||
|
|
||||||
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
|
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
|
||||||
|
|
||||||
# tf.reduce_sum basically finds the sum of its input, so this gives the
|
|
||||||
# difference between the two values, in case they should be lists, which
|
|
||||||
# they might be if our input changes
|
|
||||||
|
|
||||||
# TODO: Alexander thinks that self.value will be computed twice (instead of once)
|
# TODO: Alexander thinks that self.value will be computed twice (instead of once)
|
||||||
difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
|
difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
|
||||||
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
|
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
|
||||||
|
@ -95,7 +90,6 @@ class Network:
|
||||||
|
|
||||||
with tf.variable_scope('apply_gradients'):
|
with tf.variable_scope('apply_gradients'):
|
||||||
for gradient, trainable_var in zip(gradients, trainable_vars):
|
for gradient, trainable_var in zip(gradients, trainable_vars):
|
||||||
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
|
|
||||||
backprop_calc = self.learning_rate * difference_in_values * gradient
|
backprop_calc = self.learning_rate * difference_in_values * gradient
|
||||||
grad_apply = trainable_var.assign_add(backprop_calc)
|
grad_apply = trainable_var.assign_add(backprop_calc)
|
||||||
apply_gradients.append(grad_apply)
|
apply_gradients.append(grad_apply)
|
||||||
|
@ -148,6 +142,10 @@ class Network:
|
||||||
|
|
||||||
|
|
||||||
def gen_21_rolls(self):
|
def gen_21_rolls(self):
|
||||||
|
"""
|
||||||
|
Calculate all possible rolls, [[1,1], [1,2] ..]
|
||||||
|
:return: All possible rolls
|
||||||
|
"""
|
||||||
a = []
|
a = []
|
||||||
for x in range(1,7):
|
for x in range(1,7):
|
||||||
for y in range(1,7):
|
for y in range(1,7):
|
||||||
|
@ -187,7 +185,9 @@ class Network:
|
||||||
spec_roll_scores = []
|
spec_roll_scores = []
|
||||||
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
||||||
|
|
||||||
spec_roll_scores.append([self.eval_state(sess, self.board_trans_func(new_board, player*-1)) for new_board in all_rolls_boards])
|
spec_roll_scores.append(
|
||||||
|
[self.eval_state(sess, self.board_trans_func(new_board, player*-1)) for new_board in all_rolls_boards]
|
||||||
|
)
|
||||||
|
|
||||||
best_score = max(spec_roll_scores)
|
best_score = max(spec_roll_scores)
|
||||||
|
|
||||||
|
@ -201,8 +201,27 @@ class Network:
|
||||||
return [best_board, max(all_rolls_scores)]
|
return [best_board, max(all_rolls_scores)]
|
||||||
|
|
||||||
def eval(self, episode_count, trained_eps = 0, tf_session = None):
|
def eval(self, episode_count, trained_eps = 0, tf_session = None):
|
||||||
|
"""
|
||||||
|
Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
|
||||||
|
a model which has been given random weights, so it acts deterministically random.
|
||||||
|
|
||||||
|
:param episode_count: The amount of episodes to run
|
||||||
|
:param trained_eps: The amount of episodes the model we want to evaluate, has trained
|
||||||
|
:param tf_session:
|
||||||
|
:return: outcomes: The outcomes of the evaluation session
|
||||||
|
"""
|
||||||
|
|
||||||
def do_eval(sess, method, episodes = 1000, trained_eps = 0):
|
def do_eval(sess, method, episodes = 1000, trained_eps = 0):
|
||||||
|
"""
|
||||||
|
Do the actual evaluation
|
||||||
|
|
||||||
|
:param sess:
|
||||||
|
:param method: Either pubeval or dumbeval
|
||||||
|
:param episodes: Amount of episodes to use in the evaluation
|
||||||
|
:param trained_eps:
|
||||||
|
:return: outcomes : Described above
|
||||||
|
"""
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
def print_time_estimate(eps_completed):
|
def print_time_estimate(eps_completed):
|
||||||
|
@ -337,6 +356,9 @@ class Network:
|
||||||
(random.randrange(1, 7), random.randrange(1, 7)),
|
(random.randrange(1, 7), random.randrange(1, 7)),
|
||||||
player)
|
player)
|
||||||
|
|
||||||
|
# print("The evaluation of the previous state:\n", self.eval_state(sess, self.board_trans_func(prev_board, player)))
|
||||||
|
# print("The evaluation of the current_state:\n", cur_board_value)
|
||||||
|
|
||||||
# adjust weights
|
# adjust weights
|
||||||
sess.run(self.training_op,
|
sess.run(self.training_op,
|
||||||
feed_dict={self.x: self.board_trans_func(prev_board, player),
|
feed_dict={self.x: self.board_trans_func(prev_board, player),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user