parametric board representation in network

2018-03-28 12:00:47 +02:00 · 2018-03-28 12:00:47 +02:00 · fda2c6e08d
commit fda2c6e08d
parent abce56dd40
3 changed files with 38 additions and 25 deletions
--- a/board.py
+++ b/board.py
@ -35,14 +35,16 @@ class Board:
        board.append(-15 - sum(negatives))
        return tuple(board)

+    # quack
    @staticmethod
-    def board_features_to_slimmed_down_own(board, player):
+    def board_features_quack(board, player):
        board = list(board)
        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
        return np.array(board).reshape(1, -1)

+    # quack-fat
    @staticmethod
-    def board_features_to_own(board, player):
+    def board_features_quack_fat(board, player):
        board = list(board)
        positives = [x if x > 0 else 0 for x in board]
        negatives = [x if x < 0 else 0 for x in board]
@ -52,8 +54,9 @@ class Board:
        return np.array(board).reshape(1,-1)


+    # tesauro
    @staticmethod
-    def board_features_to_tesauro(board, cur_player):
+    def board_features_tesauro(board, cur_player):
        features = []
        for player in [-1,1]:
            sum = 0.0
--- a/main.py
+++ b/main.py
@ -46,7 +46,8 @@ config = {
    'start_episode': args.start_episode,
    'train_perpetually': args.train_perpetually,
    'model_storage_path': 'models',
-    'bench_storage_path': 'bench'
+    'bench_storage_path': 'bench',
+    'board_representation': 'quack'
 }

 # Create models folder
--- a/network.py
+++ b/network.py
@ -10,15 +10,15 @@ from eval import Eval


 class Network:
-    hidden_size = 40
-    input_size = 30
-    output_size = 1
-    # Can't remember the best learning_rate, look this up
-    learning_rate = 0.01
-    # board_features_to_own has size 30
-    # board_features_to_tesauro has size 198
-    # board_features_to_slimmed_down_own has size 28
-    board_rep = Board.board_features_to_own
+    # board_features_quack has size 28
+    # board_features_quack_fat has size 30
+    # board_features_tesauro has size 198
+
+    board_reps = {
+        'quack-fat' : (30, Board.board_features_quack_fat),
+        'quack'     : (28, Board.board_features_quack),
+        'tesauro'   : (198, Board.board_features_tesauro)
+    }

    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
@ -29,6 +29,15 @@ class Network:

        self.name = name

+        # Set board representation from config
+        self.input_size, self.board_trans_func = Network.board_reps[
+            self.config['board_representation']
+        ]
+        self.output_size = 1
+        self.hidden_size = 40
+        # Can't remember the best learning_rate, look this up
+        self.learning_rate = 0.01
+        
        # Restore trained episode count for model
        episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
        if os.path.isfile(episode_count_path):
@ -38,19 +47,19 @@ class Network:
            self.episodes_trained = 0

        # input = x
-        self.x = tf.placeholder('float', [1, Network.input_size], name='input')
-        self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
+        self.x = tf.placeholder('float', [1, self.input_size], name='input')
+        self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")

        xavier_init = tf.contrib.layers.xavier_initializer()

-        W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
+        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
                              initializer=xavier_init)
-        W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
+        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
                              initializer=xavier_init)

-        b_1 = tf.get_variable("b_1", (Network.hidden_size,),
+        b_1 = tf.get_variable("b_1", (self.hidden_size,),
                              initializer=tf.zeros_initializer)
-        b_2 = tf.get_variable("b_2", (Network.output_size,),
+        b_2 = tf.get_variable("b_2", (self.output_size,),
                              initializer=tf.zeros_initializer)


@ -74,7 +83,7 @@ class Network:
        with tf.variable_scope('apply_gradients'):
            for gradient, trainable_var in zip(gradients, trainable_vars):
                # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
-                backprop_calc = Network.learning_rate * difference_in_values * gradient
+                backprop_calc = self.learning_rate * difference_in_values * gradient
                grad_apply = trainable_var.assign_add(backprop_calc)
                apply_gradients.append(grad_apply)

@ -148,7 +157,7 @@ class Network:
    def make_move(self, sess, board, roll, player):
        # print(Board.pretty(board))
        legal_moves = Board.calculate_legal_states(board, player, roll)
-        moves_and_scores = [(move, self.eval_state(sess, Network.board_rep(move, player))) for move in legal_moves]
+        moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
        scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
        best_score_index = np.array(scores).argmax()
        best_move_pair = moves_and_scores[best_score_index]
@ -367,7 +376,7 @@ class Network:

                    # adjust weights
                    sess.run(self.training_op,
-                             feed_dict={self.x: Network.board_rep(prev_board, player),
+                             feed_dict={self.x: self.board_trans_func(prev_board, player),
                                        self.value_next: cur_board_value})

                    player *= -1
@ -386,7 +395,7 @@ class Network:
                with tf.name_scope("final"):
                    merged = tf.summary.merge_all()
                    summary, _ = sess.run([merged, self.training_op],
-                                          feed_dict={self.x: Network.board_rep(prev_board, player),
+                                          feed_dict={self.x: self.board_trans_func(prev_board, player),
                                                     self.value_next: scaled_final_score.reshape((1, 1))})
                    writer.add_summary(summary, episode + trained_eps)