From 4efb229d34746703935830981e931caca128cc78 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 15:28:33 +0200 Subject: [PATCH] Added a lot of comments --- network.py | 90 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 18 deletions(-) diff --git a/network.py b/network.py index d14e1ea..c46f291 100644 --- a/network.py +++ b/network.py @@ -16,16 +16,21 @@ class Network: # board_features_tesauro has size 198 board_reps = { - 'quack-fat' : (30, Board.board_features_quack_fat), - 'quack' : (28, Board.board_features_quack), - 'tesauro' : (198, Board.board_features_tesauro), - 'quack-norm': (30, Board.board_features_quack_norm) + 'quack-fat' : (30, Board.board_features_quack_fat), + 'quack' : (28, Board.board_features_quack), + 'tesauro' : (198, Board.board_features_tesauro), + 'quack-norm' : (30, Board.board_features_quack_norm), + 'tesauro-poop': (198, Board.board_features_tesauro_wrong) } def custom_tanh(self, x, name=None): return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) def __init__(self, config, name): + """ + :param config: + :param name: + """ tf.enable_eager_execution() xavier_init = tf.contrib.layers.xavier_initializer() @@ -44,7 +49,6 @@ class Network: self.max_learning_rate = 0.1 self.min_learning_rate = 0.001 - #tf.train.get_or_create_global_step() # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): @@ -61,7 +65,6 @@ class Network: self.global_step = 0 - self.model = tf.keras.Sequential([ tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, input_shape=(1,self.input_size)), @@ -69,19 +72,29 @@ class Network: ]) - - - def exp_decay(self, max_lr, epi_counter, decay_rate, decay_steps): - res = max_lr * decay_rate**(epi_counter // decay_steps) + def exp_decay(self, max_lr, global_step, decay_rate, decay_steps): + """ + Calculates the exponential decay on a learning rate + :param max_lr: The learning rate that the network starts at + :param global_step: The global step + :param decay_rate: The rate at which the learning rate should decay + :param decay_steps: The amount of steps between each decay + :return: The result of the exponential decay performed on the learning rate + """ + res = max_lr * decay_rate**(global_step // decay_steps) return res def do_backprop(self, prev_state, value_next): - + """ + Performs the Temporal-difference backpropagation step on the model + :param prev_state: The previous state of the game, this has its value recalculated + :param value_next: The value of the current move + :return: Nothing, the calculation is performed on the model of the network + """ self.learning_rate = tf.maximum(self.min_learning_rate, self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000), name="learning_rate") - with tf.GradientTape() as tape: value = self.model(prev_state.reshape(1,-1)) grads = tape.gradient(value, self.model.variables) @@ -89,8 +102,6 @@ class Network: difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) - # global_step_op = self.global_step.assign_add(1) - with tf.variable_scope('apply_gradients'): for grad, train_var in zip(grads, self.model.variables): backprop_calc = self.learning_rate * difference_in_values * grad @@ -99,16 +110,25 @@ class Network: def print_variables(self): + """ + Prints all the variables of the model + :return: + """ variables = self.model.variables - for k in variables: print(k) def eval_state(self, state): + """ + Evaluates a single state + :param state: + :return: + """ return self.model(state.reshape(1,-1)) def save_model(self, episode_count): """ + Saves the model of the network, it references global_step as self.global_step :param episode_count: :return: """ @@ -128,6 +148,10 @@ class Network: def calc_vals(self, states): + """ + :param states: + :return: + """ values = self.model.predict_on_batch(states) return values @@ -195,6 +219,15 @@ class Network: return [best_move, best_score] def make_move_n_ply(self, sess, board, roll, player, n = 1): + """ + + :param sess: + :param board: + :param roll: + :param player: + :param n: + :return: + """ best_pair = self.calc_n_ply(n, sess, board, player, roll) return best_pair @@ -232,6 +265,15 @@ class Network: return [best_board, max(all_rolls_scores)] def calc_n_ply(self, n_init, sess, board, player, roll): + """ + + :param n_init: + :param sess: + :param board: + :param player: + :param roll: + :return: + """ # find all legal states from the given board and the given roll init_legal_states = Board.calculate_legal_states(board, player, roll) @@ -251,6 +293,14 @@ class Network: def n_ply(self, n_init, sess, boards_init, player_init): + """ + + :param n_init: + :param sess: + :param boards_init: + :param player_init: + :return: + """ def ply(n, boards, player): def calculate_possible_states(board): possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), @@ -504,6 +554,13 @@ class Network: def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): + """ + + :param episodes: + :param save_step_size: + :param trained_eps: + :return: + """ with tf.Session() as sess: difference_in_vals = 0 @@ -563,11 +620,8 @@ class Network: final_score = np.array([Board.outcome(final_board)[1]]) scaled_final_score = ((final_score + 2) / 4) - self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) - - sys.stderr.write("\n") if episode % min(save_step_size, episodes) == 0: