proper Tesauro board representation
This commit is contained in:
parent
fda2c6e08d
commit
17f5b62e9b
53
board.py
53
board.py
|
@ -57,30 +57,33 @@ class Board:
|
||||||
# tesauro
|
# tesauro
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def board_features_tesauro(board, cur_player):
|
def board_features_tesauro(board, cur_player):
|
||||||
features = []
|
def ordinary_trans(val, player):
|
||||||
for player in [-1,1]:
|
abs_val = val * player
|
||||||
sum = 0.0
|
if abs_val <= 0: return (0,0,0,0)
|
||||||
for board_range in range(1,25):
|
elif abs_val == 1: return (1,0,0,0)
|
||||||
pin = board[board_range]
|
elif abs_val == 2: return (1,1,0,0)
|
||||||
#print("PIIIN:",pin)
|
elif abs_val == 3: return (1,1,1,0)
|
||||||
feature = [0.0]*4
|
else: return (1,1,1, (abs_val - 3) / 2)
|
||||||
if np.sign(pin) == np.sign(player):
|
|
||||||
sum += abs(pin)
|
|
||||||
for i in range(min(abs(pin), 3)):
|
|
||||||
feature[i] = 1
|
|
||||||
if (abs(pin) > 3):
|
|
||||||
feature[3] = (abs(pin)-3)/2
|
|
||||||
features += feature
|
|
||||||
#print("SUUUM:",sum)
|
|
||||||
# Append the amount of men on the bar of the current player divided by 2
|
|
||||||
features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
|
|
||||||
# Calculate how many pieces there must be in the home state and divide it by 15
|
|
||||||
features.append((15 - sum) / 15)
|
|
||||||
features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
|
|
||||||
test = np.array(features).reshape(1,-1)
|
|
||||||
#print("TEST:",test)
|
|
||||||
return test
|
|
||||||
|
|
||||||
|
def bar_trans(board, player):
|
||||||
|
if player == 1: return (abs(board[0]/2),)
|
||||||
|
elif player == -1: return (abs(board[25]/2),)
|
||||||
|
|
||||||
|
# def ordinary_trans_board(board, player):
|
||||||
|
# return np.array(
|
||||||
|
# [ordinary_trans(x, player) for x in board[1:25]]
|
||||||
|
# ).flatten()
|
||||||
|
|
||||||
|
board_rep = []
|
||||||
|
for player in [1,-1]:
|
||||||
|
for x in board[1:25]:
|
||||||
|
board_rep += ordinary_trans(x, player)
|
||||||
|
board_rep += bar_trans(board, player)
|
||||||
|
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
|
||||||
|
|
||||||
|
board_rep += ([1,0] if cur_player == 1 else [0,1])
|
||||||
|
|
||||||
|
return np.array(board_rep).reshape(1,198)
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -295,9 +298,9 @@ class Board:
|
||||||
return """
|
return """
|
||||||
13 14 15 16 17 18 19 20 21 22 23 24
|
13 14 15 16 17 18 19 20 21 22 23 24
|
||||||
+--------------------------------------------------------------------------+
|
+--------------------------------------------------------------------------+
|
||||||
| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
|
| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO|
|
||||||
|---|---|---|---|---|---|------------|---|---|---|---|---|---| |
|
|---|---|---|---|---|---|------------|---|---|---|---|---|---| |
|
||||||
| {13}| {14}| {15}| {16}| {17}| {18}| bar 1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end 1: TODO|
|
| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end 1: TODO|
|
||||||
+--------------------------------------------------------------------------+
|
+--------------------------------------------------------------------------+
|
||||||
12 11 10 9 8 7 6 5 4 3 2 1
|
12 11 10 9 8 7 6 5 4 3 2 1
|
||||||
""".format(*temp)
|
""".format(*temp)
|
||||||
|
|
|
@ -365,7 +365,9 @@ class Network:
|
||||||
# first thing inside of the while loop and then call
|
# first thing inside of the while loop and then call
|
||||||
# best_move_and_score to get V_t+1
|
# best_move_and_score to get V_t+1
|
||||||
|
|
||||||
|
i = 0
|
||||||
while Board.outcome(prev_board) is None:
|
while Board.outcome(prev_board) is None:
|
||||||
|
i += 1
|
||||||
|
|
||||||
#print("PREEEV_BOOOOAAARD:",prev_board)
|
#print("PREEEV_BOOOOAAARD:",prev_board)
|
||||||
cur_board, cur_board_value = self.make_move(sess,
|
cur_board, cur_board_value = self.make_move(sess,
|
||||||
|
@ -385,7 +387,7 @@ class Network:
|
||||||
prev_board = cur_board
|
prev_board = cur_board
|
||||||
|
|
||||||
final_board = prev_board
|
final_board = prev_board
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
|
sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
|
||||||
outcomes.append(Board.outcome(final_board)[1])
|
outcomes.append(Board.outcome(final_board)[1])
|
||||||
final_score = np.array([Board.outcome(final_board)[1]])
|
final_score = np.array([Board.outcome(final_board)[1]])
|
||||||
scaled_final_score = ((final_score + 2) / 4)
|
scaled_final_score = ((final_score + 2) / 4)
|
||||||
|
|
306
test.py
306
test.py
|
@ -614,5 +614,311 @@ class TestBoardFlip(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(Board.flip(Board.flip(board)), board)
|
self.assertEqual(Board.flip(Board.flip(board)), board)
|
||||||
|
|
||||||
|
def test_tesauro_initial(self):
|
||||||
|
board = Board.initial_state
|
||||||
|
|
||||||
|
expected = (1,1,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,1,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1,1,1,1,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0.0,
|
||||||
|
0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,1,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1,1,1,1,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,0,0,
|
||||||
|
|
||||||
|
0.0,
|
||||||
|
0,
|
||||||
|
|
||||||
|
1,
|
||||||
|
0
|
||||||
|
)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
self.assertTrue((Board.board_features_tesauro(board, 1) ==
|
||||||
|
np.array(expected).reshape(1, 198)).all())
|
||||||
|
|
||||||
|
def test_tesauro_bars(self):
|
||||||
|
board = list(Board.initial_state)
|
||||||
|
board[1] = 0
|
||||||
|
board[0] = 2
|
||||||
|
board[24] = 0
|
||||||
|
board[25] = -2
|
||||||
|
|
||||||
|
board = tuple(board)
|
||||||
|
|
||||||
|
expected = (0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,1,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1,1,1,1,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1.0,
|
||||||
|
0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,1,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1,1,1,1,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1.0,
|
||||||
|
0,
|
||||||
|
|
||||||
|
1,
|
||||||
|
0
|
||||||
|
)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
self.assertTrue((Board.board_features_tesauro(board, 1) ==
|
||||||
|
np.array(expected).reshape(1, 198)).all())
|
||||||
|
|
||||||
|
|
||||||
|
def test_tesauro_home(self):
|
||||||
|
board = list(Board.initial_state)
|
||||||
|
|
||||||
|
board[1] = 0
|
||||||
|
board[24] = 0
|
||||||
|
|
||||||
|
board = tuple(board)
|
||||||
|
|
||||||
|
expected = (0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,1,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1,1,1,1,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0.0,
|
||||||
|
2,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,1,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1,1,1,1,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0.0,
|
||||||
|
2,
|
||||||
|
|
||||||
|
1,
|
||||||
|
0
|
||||||
|
)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
self.assertTrue((Board.board_features_tesauro(board, 1) ==
|
||||||
|
np.array(expected).reshape(1, 198)).all())
|
||||||
|
|
||||||
|
|
||||||
|
def test_tesauro_black_player(self):
|
||||||
|
board = Board.initial_state
|
||||||
|
|
||||||
|
expected = (1,1,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,1,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1,1,1,1,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0.0,
|
||||||
|
0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,1,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,1,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
1,1,1,1,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
0,0,0,0,
|
||||||
|
1,1,0,0,
|
||||||
|
|
||||||
|
0.0,
|
||||||
|
0,
|
||||||
|
|
||||||
|
0,
|
||||||
|
1
|
||||||
|
)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
self.assertTrue((Board.board_features_tesauro(board, -1) ==
|
||||||
|
np.array(expected).reshape(1, 198)).all())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user