proper Tesauro board representation

2018-03-28 14:36:52 +02:00 · 2018-03-28 14:36:52 +02:00 · 17f5b62e9b
commit 17f5b62e9b
parent fda2c6e08d
3 changed files with 338 additions and 27 deletions
--- a/board.py
+++ b/board.py
@ -57,30 +57,33 @@ class Board:
    # tesauro
    @staticmethod
    def board_features_tesauro(board, cur_player):
-        features = []
-        for player in [-1,1]:
-            sum = 0.0
-            for board_range in range(1,25):
-                pin = board[board_range]
-                #print("PIIIN:",pin)
-                feature = [0.0]*4
-                if np.sign(pin) == np.sign(player):
-                    sum += abs(pin)
-                    for i in range(min(abs(pin), 3)):
-                        feature[i] = 1
-                        if (abs(pin) > 3):
-                            feature[3] = (abs(pin)-3)/2
-                features += feature
-            #print("SUUUM:",sum)
-            # Append the amount of men on the bar of the current player divided by 2
-            features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
-            # Calculate how many pieces there must be in the home state and divide it by 15
-            features.append((15 - sum) / 15)
-        features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
-        test = np.array(features).reshape(1,-1)
-        #print("TEST:",test)
-        return test
+        def ordinary_trans(val, player):
+            abs_val = val * player
+            if   abs_val <= 0: return (0,0,0,0)
+            elif abs_val == 1: return (1,0,0,0)
+            elif abs_val == 2: return (1,1,0,0)
+            elif abs_val == 3: return (1,1,1,0)
+            else:              return (1,1,1, (abs_val - 3) / 2)

+        def bar_trans(board, player):
+            if    player == 1: return (abs(board[0]/2),)
+            elif player == -1: return (abs(board[25]/2),)
+
+        # def ordinary_trans_board(board, player):
+        #     return np.array(
+        #         [ordinary_trans(x, player) for x in board[1:25]]
+        #     ).flatten()
+
+        board_rep = []
+        for player in [1,-1]:
+            for x in board[1:25]:
+                board_rep += ordinary_trans(x, player)
+            board_rep += bar_trans(board, player)
+            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
+
+        board_rep += ([1,0] if cur_player == 1 else [0,1])
+
+        return np.array(board_rep).reshape(1,198)


    @staticmethod
@ -295,9 +298,9 @@ class Board:
        return """
  13  14  15  16  17  18               19  20  21  22  23  24
 +--------------------------------------------------------------------------+
-| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
+| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO|
 |---|---|---|---|---|---|------------|---|---|---|---|---|---|             |
-| {13}| {14}| {15}| {16}| {17}| {18}| bar  1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
+| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end  1: TODO|
 +--------------------------------------------------------------------------+
  12  11  10   9   8   7                6   5   4   3   2   1 
 """.format(*temp)
--- a/network.py
+++ b/network.py
@ -365,7 +365,9 @@ class Network:
                # first thing inside of the while loop and then call
                # best_move_and_score to get V_t+1

+                i = 0
                while Board.outcome(prev_board) is None:
+                    i += 1

                    #print("PREEEV_BOOOOAAARD:",prev_board)
                    cur_board, cur_board_value = self.make_move(sess,
@ -385,7 +387,7 @@ class Network:
                    prev_board = cur_board

                final_board = prev_board
-                sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
+                sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
                outcomes.append(Board.outcome(final_board)[1])
                final_score = np.array([Board.outcome(final_board)[1]])
                scaled_final_score = ((final_score + 2) / 4)
--- a/test.py
+++ b/test.py
@ -614,5 +614,311 @@ class TestBoardFlip(unittest.TestCase):

        self.assertEqual(Board.flip(Board.flip(board)), board)

+    def test_tesauro_initial(self):
+        board = Board.initial_state
+
+        expected = (1,1,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,0,0,
+
+                    0.0,
+                    0,
+
+                    1,
+                    0
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, 1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+    def test_tesauro_bars(self):
+        board = list(Board.initial_state)
+        board[1] = 0
+        board[0] = 2
+        board[24] = 0
+        board[25] = -2
+
+        board = tuple(board)
+        
+        expected = (0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1.0,
+                    0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1.0,
+                    0,
+
+                    1,
+                    0
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, 1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+
+    def test_tesauro_home(self):
+        board = list(Board.initial_state)
+
+        board[1] = 0
+        board[24] = 0
+
+        board = tuple(board)
+        
+        expected = (0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    2,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    2,
+
+                    1,
+                    0
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, 1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+
+    def test_tesauro_black_player(self):
+        board = Board.initial_state
+
+        expected = (1,1,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,0,0,
+
+                    0.0,
+                    0,
+
+                    0,
+                    1
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, -1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+        
 if __name__ == '__main__':
    unittest.main()