From 17f5b62e9b3fc3e9662f41a8b69ff94ccb506f8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Wed, 28 Mar 2018 14:36:52 +0200 Subject: [PATCH] proper Tesauro board representation --- board.py | 53 +++++----- network.py | 6 +- test.py | 306 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 338 insertions(+), 27 deletions(-) diff --git a/board.py b/board.py index 33303c1..2136e47 100644 --- a/board.py +++ b/board.py @@ -57,30 +57,33 @@ class Board: # tesauro @staticmethod def board_features_tesauro(board, cur_player): - features = [] - for player in [-1,1]: - sum = 0.0 - for board_range in range(1,25): - pin = board[board_range] - #print("PIIIN:",pin) - feature = [0.0]*4 - if np.sign(pin) == np.sign(player): - sum += abs(pin) - for i in range(min(abs(pin), 3)): - feature[i] = 1 - if (abs(pin) > 3): - feature[3] = (abs(pin)-3)/2 - features += feature - #print("SUUUM:",sum) - # Append the amount of men on the bar of the current player divided by 2 - features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0) - # Calculate how many pieces there must be in the home state and divide it by 15 - features.append((15 - sum) / 15) - features += ([1,0] if np.sign(cur_player) > 0 else [0,1]) - test = np.array(features).reshape(1,-1) - #print("TEST:",test) - return test + def ordinary_trans(val, player): + abs_val = val * player + if abs_val <= 0: return (0,0,0,0) + elif abs_val == 1: return (1,0,0,0) + elif abs_val == 2: return (1,1,0,0) + elif abs_val == 3: return (1,1,1,0) + else: return (1,1,1, (abs_val - 3) / 2) + def bar_trans(board, player): + if player == 1: return (abs(board[0]/2),) + elif player == -1: return (abs(board[25]/2),) + + # def ordinary_trans_board(board, player): + # return np.array( + # [ordinary_trans(x, player) for x in board[1:25]] + # ).flatten() + + board_rep = [] + for player in [1,-1]: + for x in board[1:25]: + board_rep += ordinary_trans(x, player) + board_rep += bar_trans(board, player) + board_rep += (15 - Board.num_of_checkers_for_player(board, player),) + + board_rep += ([1,0] if cur_player == 1 else [0,1]) + + return np.array(board_rep).reshape(1,198) @staticmethod @@ -295,9 +298,9 @@ class Board: return """ 13 14 15 16 17 18 19 20 21 22 23 24 +--------------------------------------------------------------------------+ -| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO| +| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO| |---|---|---|---|---|---|------------|---|---|---|---|---|---| | -| {13}| {14}| {15}| {16}| {17}| {18}| bar 1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end 1: TODO| +| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end 1: TODO| +--------------------------------------------------------------------------+ 12 11 10 9 8 7 6 5 4 3 2 1 """.format(*temp) diff --git a/network.py b/network.py index d19f23c..2722f6a 100644 --- a/network.py +++ b/network.py @@ -365,13 +365,15 @@ class Network: # first thing inside of the while loop and then call # best_move_and_score to get V_t+1 + i = 0 while Board.outcome(prev_board) is None: + i += 1 #print("PREEEV_BOOOOAAARD:",prev_board) cur_board, cur_board_value = self.make_move(sess, prev_board, (random.randrange(1, 7), random.randrange(1, 7)), player) - + #print("The current value:",cur_board_value) # adjust weights @@ -385,7 +387,7 @@ class Network: prev_board = cur_board final_board = prev_board - sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1])) + sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i)) outcomes.append(Board.outcome(final_board)[1]) final_score = np.array([Board.outcome(final_board)[1]]) scaled_final_score = ((final_score + 2) / 4) diff --git a/test.py b/test.py index efc243e..6c9c130 100644 --- a/test.py +++ b/test.py @@ -613,6 +613,312 @@ class TestBoardFlip(unittest.TestCase): -2) self.assertEqual(Board.flip(Board.flip(board)), board) + + def test_tesauro_initial(self): + board = Board.initial_state + + expected = (1,1,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0.0, + 0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,0,0, + + 0.0, + 0, + + 1, + 0 + ) + + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, 1) == + np.array(expected).reshape(1, 198)).all()) + + def test_tesauro_bars(self): + board = list(Board.initial_state) + board[1] = 0 + board[0] = 2 + board[24] = 0 + board[25] = -2 + + board = tuple(board) + + expected = (0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1.0, + 0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1.0, + 0, + + 1, + 0 + ) + + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, 1) == + np.array(expected).reshape(1, 198)).all()) + + + def test_tesauro_home(self): + board = list(Board.initial_state) + + board[1] = 0 + board[24] = 0 + + board = tuple(board) + + expected = (0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0.0, + 2, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0.0, + 2, + + 1, + 0 + ) + + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, 1) == + np.array(expected).reshape(1, 198)).all()) + + + def test_tesauro_black_player(self): + board = Board.initial_state + + expected = (1,1,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0.0, + 0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,0,0, + + 0.0, + 0, + + 0, + 1 + ) + + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, -1) == + np.array(expected).reshape(1, 198)).all()) + if __name__ == '__main__': unittest.main()