Added a lot of comments

This commit is contained in:
Alexander Munch-Hansen 2018-05-10 15:28:33 +02:00
parent f2a67ca92e
commit 4efb229d34

View File

@ -19,13 +19,18 @@ class Network:
'quack-fat' : (30, Board.board_features_quack_fat), 'quack-fat' : (30, Board.board_features_quack_fat),
'quack' : (28, Board.board_features_quack), 'quack' : (28, Board.board_features_quack),
'tesauro' : (198, Board.board_features_tesauro), 'tesauro' : (198, Board.board_features_tesauro),
'quack-norm': (30, Board.board_features_quack_norm) 'quack-norm' : (30, Board.board_features_quack_norm),
'tesauro-poop': (198, Board.board_features_tesauro_wrong)
} }
def custom_tanh(self, x, name=None): def custom_tanh(self, x, name=None):
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
def __init__(self, config, name): def __init__(self, config, name):
"""
:param config:
:param name:
"""
tf.enable_eager_execution() tf.enable_eager_execution()
xavier_init = tf.contrib.layers.xavier_initializer() xavier_init = tf.contrib.layers.xavier_initializer()
@ -44,7 +49,6 @@ class Network:
self.max_learning_rate = 0.1 self.max_learning_rate = 0.1
self.min_learning_rate = 0.001 self.min_learning_rate = 0.001
#tf.train.get_or_create_global_step()
# Restore trained episode count for model # Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path): if os.path.isfile(episode_count_path):
@ -61,7 +65,6 @@ class Network:
self.global_step = 0 self.global_step = 0
self.model = tf.keras.Sequential([ self.model = tf.keras.Sequential([
tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init,
input_shape=(1,self.input_size)), input_shape=(1,self.input_size)),
@ -69,19 +72,29 @@ class Network:
]) ])
def exp_decay(self, max_lr, global_step, decay_rate, decay_steps):
"""
def exp_decay(self, max_lr, epi_counter, decay_rate, decay_steps): Calculates the exponential decay on a learning rate
res = max_lr * decay_rate**(epi_counter // decay_steps) :param max_lr: The learning rate that the network starts at
:param global_step: The global step
:param decay_rate: The rate at which the learning rate should decay
:param decay_steps: The amount of steps between each decay
:return: The result of the exponential decay performed on the learning rate
"""
res = max_lr * decay_rate**(global_step // decay_steps)
return res return res
def do_backprop(self, prev_state, value_next): def do_backprop(self, prev_state, value_next):
"""
Performs the Temporal-difference backpropagation step on the model
:param prev_state: The previous state of the game, this has its value recalculated
:param value_next: The value of the current move
:return: Nothing, the calculation is performed on the model of the network
"""
self.learning_rate = tf.maximum(self.min_learning_rate, self.learning_rate = tf.maximum(self.min_learning_rate,
self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000), self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
name="learning_rate") name="learning_rate")
with tf.GradientTape() as tape: with tf.GradientTape() as tape:
value = self.model(prev_state.reshape(1,-1)) value = self.model(prev_state.reshape(1,-1))
grads = tape.gradient(value, self.model.variables) grads = tape.gradient(value, self.model.variables)
@ -89,8 +102,6 @@ class Network:
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
# global_step_op = self.global_step.assign_add(1)
with tf.variable_scope('apply_gradients'): with tf.variable_scope('apply_gradients'):
for grad, train_var in zip(grads, self.model.variables): for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad backprop_calc = self.learning_rate * difference_in_values * grad
@ -99,16 +110,25 @@ class Network:
def print_variables(self): def print_variables(self):
"""
Prints all the variables of the model
:return:
"""
variables = self.model.variables variables = self.model.variables
for k in variables: for k in variables:
print(k) print(k)
def eval_state(self, state): def eval_state(self, state):
"""
Evaluates a single state
:param state:
:return:
"""
return self.model(state.reshape(1,-1)) return self.model(state.reshape(1,-1))
def save_model(self, episode_count): def save_model(self, episode_count):
""" """
Saves the model of the network, it references global_step as self.global_step
:param episode_count: :param episode_count:
:return: :return:
""" """
@ -128,6 +148,10 @@ class Network:
def calc_vals(self, states): def calc_vals(self, states):
"""
:param states:
:return:
"""
values = self.model.predict_on_batch(states) values = self.model.predict_on_batch(states)
return values return values
@ -195,6 +219,15 @@ class Network:
return [best_move, best_score] return [best_move, best_score]
def make_move_n_ply(self, sess, board, roll, player, n = 1): def make_move_n_ply(self, sess, board, roll, player, n = 1):
"""
:param sess:
:param board:
:param roll:
:param player:
:param n:
:return:
"""
best_pair = self.calc_n_ply(n, sess, board, player, roll) best_pair = self.calc_n_ply(n, sess, board, player, roll)
return best_pair return best_pair
@ -232,6 +265,15 @@ class Network:
return [best_board, max(all_rolls_scores)] return [best_board, max(all_rolls_scores)]
def calc_n_ply(self, n_init, sess, board, player, roll): def calc_n_ply(self, n_init, sess, board, player, roll):
"""
:param n_init:
:param sess:
:param board:
:param player:
:param roll:
:return:
"""
# find all legal states from the given board and the given roll # find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll) init_legal_states = Board.calculate_legal_states(board, player, roll)
@ -251,6 +293,14 @@ class Network:
def n_ply(self, n_init, sess, boards_init, player_init): def n_ply(self, n_init, sess, boards_init, player_init):
"""
:param n_init:
:param sess:
:param boards_init:
:param player_init:
:return:
"""
def ply(n, boards, player): def ply(n, boards, player):
def calculate_possible_states(board): def calculate_possible_states(board):
possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
@ -504,6 +554,13 @@ class Network:
def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
"""
:param episodes:
:param save_step_size:
:param trained_eps:
:return:
"""
with tf.Session() as sess: with tf.Session() as sess:
difference_in_vals = 0 difference_in_vals = 0
@ -563,11 +620,8 @@ class Network:
final_score = np.array([Board.outcome(final_board)[1]]) final_score = np.array([Board.outcome(final_board)[1]])
scaled_final_score = ((final_score + 2) / 4) scaled_final_score = ((final_score + 2) / 4)
self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
sys.stderr.write("\n") sys.stderr.write("\n")
if episode % min(save_step_size, episodes) == 0: if episode % min(save_step_size, episodes) == 0: