Added a lot of comments

2018-05-10 15:28:33 +02:00 · 2018-05-10 15:28:33 +02:00 · 4efb229d34
commit 4efb229d34
parent f2a67ca92e
1 changed files with 72 additions and 18 deletions
--- a/network.py
+++ b/network.py
@ -19,13 +19,18 @@ class Network:
        'quack-fat'   : (30, Board.board_features_quack_fat),
        'quack'       : (28, Board.board_features_quack),
        'tesauro'     : (198, Board.board_features_tesauro),
-        'quack-norm': (30, Board.board_features_quack_norm)
+        'quack-norm'  : (30, Board.board_features_quack_norm),
        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
    }
    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
    def __init__(self, config, name):
        """
        :param config:
        :param name:
        """
        tf.enable_eager_execution()
        xavier_init = tf.contrib.layers.xavier_initializer()
@ -44,7 +49,6 @@ class Network:
        self.max_learning_rate = 0.1
        self.min_learning_rate = 0.001
        #tf.train.get_or_create_global_step()
        # Restore trained episode count for model
        episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
        if os.path.isfile(episode_count_path):
@ -61,7 +65,6 @@ class Network:
            self.global_step = 0
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init,
                                  input_shape=(1,self.input_size)),
@ -69,19 +72,29 @@ class Network:
        ])
-
+    def exp_decay(self, max_lr, global_step, decay_rate, decay_steps):
-
+        """
-    def exp_decay(self, max_lr, epi_counter, decay_rate, decay_steps):
+        Calculates the exponential decay on a learning rate
-        res = max_lr * decay_rate**(epi_counter // decay_steps)
+        :param max_lr: The learning rate that the network starts at
        :param global_step: The global step
        :param decay_rate: The rate at which the learning rate should decay
        :param decay_steps: The amount of steps between each decay
        :return: The result of the exponential decay performed on the learning rate
        """
        res = max_lr * decay_rate**(global_step // decay_steps)
        return res
    def do_backprop(self, prev_state, value_next):
-
+        """
        Performs the Temporal-difference backpropagation step on the model
        :param prev_state: The previous state of the game, this has its value recalculated
        :param value_next: The value of the current move
        :return: Nothing, the calculation is performed on the model of the network
        """
        self.learning_rate = tf.maximum(self.min_learning_rate,
                                         self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
                                         name="learning_rate")
        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
        grads = tape.gradient(value, self.model.variables)
@ -89,8 +102,6 @@ class Network:
        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
        # global_step_op = self.global_step.assign_add(1)
        with tf.variable_scope('apply_gradients'):
            for grad, train_var in zip(grads, self.model.variables):
                backprop_calc = self.learning_rate * difference_in_values * grad
@ -99,16 +110,25 @@ class Network:
    def print_variables(self):
        """
        Prints all the variables of the model
        :return:
        """
        variables = self.model.variables
        for k in variables:
            print(k)
    def eval_state(self, state):
        """
        Evaluates a single state
        :param state:
        :return:
        """
        return self.model(state.reshape(1,-1))
    def save_model(self, episode_count):
        """
        Saves the model of the network, it references global_step as self.global_step
        :param episode_count:
        :return:
        """
@ -128,6 +148,10 @@ class Network:
    def calc_vals(self, states):
        """
        :param states:
        :return:
        """
        values = self.model.predict_on_batch(states)
        return values
@ -195,6 +219,15 @@ class Network:
        return [best_move, best_score]
    def make_move_n_ply(self, sess, board, roll, player, n = 1):
        """
        :param sess:
        :param board:
        :param roll:
        :param player:
        :param n:
        :return:
        """
        best_pair = self.calc_n_ply(n, sess, board, player, roll)
        return best_pair
@ -232,6 +265,15 @@ class Network:
        return [best_board, max(all_rolls_scores)]
    def calc_n_ply(self, n_init, sess, board, player, roll):
        """
        :param n_init:
        :param sess:
        :param board:
        :param player:
        :param roll:
        :return:
        """
        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
@ -251,6 +293,14 @@ class Network:
    def n_ply(self, n_init, sess, boards_init, player_init):
        """
        :param n_init:
        :param sess:
        :param boards_init:
        :param player_init:
        :return:
        """
        def ply(n, boards, player):
            def calculate_possible_states(board):
                possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
@ -504,6 +554,13 @@ class Network:
    def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
        """
        :param episodes:
        :param save_step_size:
        :param trained_eps:
        :return:
        """
        with tf.Session() as sess:
            difference_in_vals = 0
@ -563,11 +620,8 @@ class Network:
                final_score = np.array([Board.outcome(final_board)[1]])
                scaled_final_score = ((final_score + 2) / 4)
                self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
                sys.stderr.write("\n")
                if episode % min(save_step_size, episodes) == 0: