make_move now calls n_ply to search deeper and potentially give
better moves. It's hella fucking slow.
This commit is contained in:
parent
695a3d43db
commit
1db469709a
55
network.py
55
network.py
|
@ -157,7 +157,7 @@ class Network:
|
|||
exit()
|
||||
|
||||
|
||||
def make_move(self, sess, board, roll, player):
|
||||
#def make_move(self, sess, board, roll, player):
|
||||
"""
|
||||
Find the best move given a board, roll and a player, by finding all possible states one can go to
|
||||
and then picking the best, by using the network to evaluate each state. The highest score is picked
|
||||
|
@ -169,24 +169,28 @@ class Network:
|
|||
:param player: Current player
|
||||
:return: A pair of the best state to go to, together with the score of that state
|
||||
"""
|
||||
legal_moves = Board.calculate_legal_states(board, player, roll)
|
||||
moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
|
||||
scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
|
||||
best_score_index = np.array(scores).argmax()
|
||||
best_move_pair = moves_and_scores[best_score_index]
|
||||
return best_move_pair
|
||||
# legal_moves = Board.calculate_legal_states(board, player, roll)
|
||||
# moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
|
||||
# scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
|
||||
# best_score_index = np.array(scores).argmax()
|
||||
# best_move_pair = moves_and_scores[best_score_index]
|
||||
# return best_move_pair
|
||||
|
||||
def make_move(self, sess, board, roll, player, n = 1):
|
||||
best_pair = self.calc_n_ply(n, sess, board, player, roll)
|
||||
return best_pair
|
||||
|
||||
|
||||
def calculate_2_ply(self, sess, board, roll, player):
|
||||
def calculate_1_ply(self, sess, board, roll, player):
|
||||
"""
|
||||
Find the best move based on a 2-ply look-ahead. First the best move is found for a single ply and then an
|
||||
Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
|
||||
exhaustive search is performed on the best 15 moves from the single ply.
|
||||
|
||||
:param sess:
|
||||
:param board:
|
||||
:param roll: The original roll
|
||||
:param player: The current player
|
||||
:return: Best possible move based on 2-ply look-ahead
|
||||
:return: Best possible move based on 1-ply look-ahead
|
||||
|
||||
"""
|
||||
|
||||
|
@ -205,7 +209,7 @@ class Network:
|
|||
if player == 1:
|
||||
best_fifteen.reverse()
|
||||
|
||||
best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
|
||||
best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
|
||||
|
||||
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
|
||||
|
||||
|
@ -215,6 +219,29 @@ class Network:
|
|||
|
||||
return [best_board, max(all_rolls_scores)]
|
||||
|
||||
def calc_n_ply(self, n_init, sess, board, player, roll):
|
||||
|
||||
# find all legal states from the given board and the given roll
|
||||
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
||||
|
||||
# find all values for the above boards
|
||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
||||
|
||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
||||
sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
|
||||
|
||||
|
||||
# They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
|
||||
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
|
||||
if player == 1:
|
||||
sorted_moves_and_scores.reverse()
|
||||
|
||||
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
|
||||
|
||||
best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
|
||||
|
||||
return best_move_score_pair
|
||||
|
||||
|
||||
def n_ply(self, n_init, sess, boards_init, player_init):
|
||||
def ply(n, boards, player):
|
||||
|
@ -262,7 +289,6 @@ class Network:
|
|||
|
||||
|
||||
if n == 1:
|
||||
print("blalhlalha")
|
||||
average_score_pairs = [ (board, average_ply_score(board))
|
||||
for board
|
||||
in boards ]
|
||||
|
@ -301,12 +327,13 @@ class Network:
|
|||
if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
|
||||
|
||||
boards_with_scores = ply(n_init, boards_init, -1 * player_init)
|
||||
print(boards_with_scores)
|
||||
#print("Boards with scores:",boards_with_scores)
|
||||
scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
|
||||
for pair
|
||||
in boards_with_scores ]
|
||||
#print("All the scores:",scores)
|
||||
best_score_pair = boards_with_scores[np.array(scores).argmax()]
|
||||
return best_score_pair[0]
|
||||
return best_score_pair
|
||||
|
||||
def do_ply(self, sess, boards, player):
|
||||
"""
|
||||
|
|
|
@ -80,13 +80,14 @@ def calculate_possible_states(board):
|
|||
#for board in boards:
|
||||
# calculate_possible_states(board)
|
||||
|
||||
print("-"*30)
|
||||
print(network.do_ply(session, boards, 1))
|
||||
#print("-"*30)
|
||||
#print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1))
|
||||
|
||||
#print(" "*10 + "network_test")
|
||||
#print(" "*20 + "Depth 1")
|
||||
scores = network.n_ply(1, session, boards, 1)
|
||||
print(" "*20 + "Depth 1")
|
||||
print(network.calc_n_ply(2, session, Board.initial_state, 1, [2, 4]))
|
||||
|
||||
#print(scores)
|
||||
|
||||
#print(" "*20 + "Depth 2")
|
||||
#print(network.n_ply(2, session, boards, 1))
|
||||
|
|
Loading…
Reference in New Issue
Block a user