From fda2c6e08d39eca1e6c618db9bcba9fe5092f15c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Wed, 28 Mar 2018 12:00:47 +0200
Subject: [PATCH] parametric board representation in network

---
 board.py   | 13 ++++++++-----
 main.py    |  3 ++-
 network.py | 47 ++++++++++++++++++++++++++++-------------------
 3 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/board.py b/board.py
index 1b018d9..33303c1 100644
--- a/board.py
+++ b/board.py
@@ -31,29 +31,32 @@ class Board:
         board = list(board)
         positives = [x if x > 0 else 0 for x in board]
         negatives = [x if x < 0 else 0 for x in board]
-        board.append(15 - sum(positives))
+        board.append( 15 - sum(positives))
         board.append(-15 - sum(negatives))
         return tuple(board)
 
+    # quack
     @staticmethod
-    def board_features_to_slimmed_down_own(board, player):
+    def board_features_quack(board, player):
         board = list(board)
         board += ([1, 0] if np.sign(player) > 0 else [0, 1])
         return np.array(board).reshape(1, -1)
 
+    # quack-fat
     @staticmethod
-    def board_features_to_own(board, player):
+    def board_features_quack_fat(board, player):
         board = list(board)
         positives = [x if x > 0 else 0 for x in board]
         negatives = [x if x < 0 else 0 for x in board]
-        board.append(15 - sum(positives))
+        board.append( 15 - sum(positives))
         board.append(-15 - sum(negatives))
         board += ([1, 0] if np.sign(player) > 0 else [0, 1])
         return np.array(board).reshape(1,-1)
 
 
+    # tesauro
     @staticmethod
-    def board_features_to_tesauro(board, cur_player):
+    def board_features_tesauro(board, cur_player):
         features = []
         for player in [-1,1]:
             sum = 0.0
diff --git a/main.py b/main.py
index b5a8ad0..f6a375e 100644
--- a/main.py
+++ b/main.py
@@ -46,7 +46,8 @@ config = {
     'start_episode': args.start_episode,
     'train_perpetually': args.train_perpetually,
     'model_storage_path': 'models',
-    'bench_storage_path': 'bench'
+    'bench_storage_path': 'bench',
+    'board_representation': 'quack'
 }
 
 # Create models folder
diff --git a/network.py b/network.py
index f1f1859..d19f23c 100644
--- a/network.py
+++ b/network.py
@@ -10,15 +10,15 @@ from eval import Eval
 
 
 class Network:
-    hidden_size = 40
-    input_size = 30
-    output_size = 1
-    # Can't remember the best learning_rate, look this up
-    learning_rate = 0.01
-    # board_features_to_own has size 30
-    # board_features_to_tesauro has size 198
-    # board_features_to_slimmed_down_own has size 28
-    board_rep = Board.board_features_to_own
+    # board_features_quack has size 28
+    # board_features_quack_fat has size 30
+    # board_features_tesauro has size 198
+
+    board_reps = {
+        'quack-fat' : (30, Board.board_features_quack_fat),
+        'quack'     : (28, Board.board_features_quack),
+        'tesauro'   : (198, Board.board_features_tesauro)
+    }
 
     def custom_tanh(self, x, name=None):
         return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
@@ -29,6 +29,15 @@ class Network:
 
         self.name = name
 
+        # Set board representation from config
+        self.input_size, self.board_trans_func = Network.board_reps[
+            self.config['board_representation']
+        ]
+        self.output_size = 1
+        self.hidden_size = 40
+        # Can't remember the best learning_rate, look this up
+        self.learning_rate = 0.01
+        
         # Restore trained episode count for model
         episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
         if os.path.isfile(episode_count_path):
@@ -38,19 +47,19 @@ class Network:
             self.episodes_trained = 0
 
         # input = x
-        self.x = tf.placeholder('float', [1, Network.input_size], name='input')
-        self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
+        self.x = tf.placeholder('float', [1, self.input_size], name='input')
+        self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
 
         xavier_init = tf.contrib.layers.xavier_initializer()
 
-        W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
+        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
                               initializer=xavier_init)
-        W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
+        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
                               initializer=xavier_init)
 
-        b_1 = tf.get_variable("b_1", (Network.hidden_size,),
+        b_1 = tf.get_variable("b_1", (self.hidden_size,),
                               initializer=tf.zeros_initializer)
-        b_2 = tf.get_variable("b_2", (Network.output_size,),
+        b_2 = tf.get_variable("b_2", (self.output_size,),
                               initializer=tf.zeros_initializer)
 
 
@@ -74,7 +83,7 @@ class Network:
         with tf.variable_scope('apply_gradients'):
             for gradient, trainable_var in zip(gradients, trainable_vars):
                 # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
-                backprop_calc = Network.learning_rate * difference_in_values * gradient
+                backprop_calc = self.learning_rate * difference_in_values * gradient
                 grad_apply = trainable_var.assign_add(backprop_calc)
                 apply_gradients.append(grad_apply)
 
@@ -148,7 +157,7 @@ class Network:
     def make_move(self, sess, board, roll, player):
         # print(Board.pretty(board))
         legal_moves = Board.calculate_legal_states(board, player, roll)
-        moves_and_scores = [(move, self.eval_state(sess, Network.board_rep(move, player))) for move in legal_moves]
+        moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
         scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
         best_score_index = np.array(scores).argmax()
         best_move_pair = moves_and_scores[best_score_index]
@@ -367,7 +376,7 @@ class Network:
 
                     # adjust weights
                     sess.run(self.training_op,
-                             feed_dict={self.x: Network.board_rep(prev_board, player),
+                             feed_dict={self.x: self.board_trans_func(prev_board, player),
                                         self.value_next: cur_board_value})
 
                     player *= -1
@@ -386,7 +395,7 @@ class Network:
                 with tf.name_scope("final"):
                     merged = tf.summary.merge_all()
                     summary, _ = sess.run([merged, self.training_op],
-                                          feed_dict={self.x: Network.board_rep(prev_board, player),
+                                          feed_dict={self.x: self.board_trans_func(prev_board, player),
                                                      self.value_next: scaled_final_score.reshape((1, 1))})
                     writer.add_summary(summary, episode + trained_eps)