proper Tesauro board representation

2018-03-28 14:36:52 +02:00 · 2018-03-28 14:36:52 +02:00 · 17f5b62e9b
commit 17f5b62e9b
parent fda2c6e08d
3 changed files with 338 additions and 27 deletions
--- a/board.py
+++ b/board.py
@ -57,30 +57,33 @@ class Board:
    # tesauro
    @staticmethod
    def board_features_tesauro(board, cur_player):
-        features = []
+        def ordinary_trans(val, player):
-        for player in [-1,1]:
+            abs_val = val * player
-            sum = 0.0
+            if   abs_val <= 0: return (0,0,0,0)
-            for board_range in range(1,25):
+            elif abs_val == 1: return (1,0,0,0)
-                pin = board[board_range]
+            elif abs_val == 2: return (1,1,0,0)
-                #print("PIIIN:",pin)
+            elif abs_val == 3: return (1,1,1,0)
-                feature = [0.0]*4
+            else:              return (1,1,1, (abs_val - 3) / 2)
                if np.sign(pin) == np.sign(player):
                    sum += abs(pin)
                    for i in range(min(abs(pin), 3)):
                        feature[i] = 1
                        if (abs(pin) > 3):
                            feature[3] = (abs(pin)-3)/2
                features += feature
            #print("SUUUM:",sum)
            # Append the amount of men on the bar of the current player divided by 2
            features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
            # Calculate how many pieces there must be in the home state and divide it by 15
            features.append((15 - sum) / 15)
        features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
        test = np.array(features).reshape(1,-1)
        #print("TEST:",test)
        return test
        def bar_trans(board, player):
            if    player == 1: return (abs(board[0]/2),)
            elif player == -1: return (abs(board[25]/2),)
        # def ordinary_trans_board(board, player):
        #     return np.array(
        #         [ordinary_trans(x, player) for x in board[1:25]]
        #     ).flatten()
        board_rep = []
        for player in [1,-1]:
            for x in board[1:25]:
                board_rep += ordinary_trans(x, player)
            board_rep += bar_trans(board, player)
            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
        board_rep += ([1,0] if cur_player == 1 else [0,1])
        return np.array(board_rep).reshape(1,198)
    @staticmethod
@ -295,9 +298,9 @@ class Board:
        return """
  13  14  15  16  17  18               19  20  21  22  23  24
 +--------------------------------------------------------------------------+
-| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
+| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO|
 |---|---|---|---|---|---|------------|---|---|---|---|---|---|             |
-| {13}| {14}| {15}| {16}| {17}| {18}| bar  1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
+| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end  1: TODO|
 +--------------------------------------------------------------------------+
  12  11  10   9   8   7                6   5   4   3   2   1 
 """.format(*temp)
--- a/network.py
+++ b/network.py
@ -365,13 +365,15 @@ class Network:
                # first thing inside of the while loop and then call
                # best_move_and_score to get V_t+1
                i = 0
                while Board.outcome(prev_board) is None:
                    i += 1
                    #print("PREEEV_BOOOOAAARD:",prev_board)
                    cur_board, cur_board_value = self.make_move(sess,
                                                                prev_board,
                                                                (random.randrange(1, 7), random.randrange(1, 7)), player)
-                
+
                    #print("The current value:",cur_board_value)
                    # adjust weights
@ -385,7 +387,7 @@ class Network:
                    prev_board = cur_board
                final_board = prev_board
-                sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
+                sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
                outcomes.append(Board.outcome(final_board)[1])
                final_score = np.array([Board.outcome(final_board)[1]])
                scaled_final_score = ((final_score + 2) / 4)
--- a/test.py
+++ b/test.py
@ -613,6 +613,312 @@ class TestBoardFlip(unittest.TestCase):
                 -2)
        self.assertEqual(Board.flip(Board.flip(board)), board)
    def test_tesauro_initial(self):
        board = Board.initial_state
        expected = (1,1,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,0,0,
                    0.0,
                    0,
                    1,
                    0
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_tesauro_bars(self):
        board = list(Board.initial_state)
        board[1] = 0
        board[0] = 2
        board[24] = 0
        board[25] = -2
        board = tuple(board)
        expected = (0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1.0,
                    0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1.0,
                    0,
                    1,
                    0
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_tesauro_home(self):
        board = list(Board.initial_state)
        board[1] = 0
        board[24] = 0
        board = tuple(board)
        expected = (0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    2,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    2,
                    1,
                    0
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_tesauro_black_player(self):
        board = Board.initial_state
        expected = (1,1,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,0,0,
                    0.0,
                    0,
                    0,
                    1
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, -1) ==
                         np.array(expected).reshape(1, 198)).all())
 if __name__ == '__main__':
    unittest.main()