All values for boards and all rolls can now be calculated

This commit is contained in:
Alexander Munch-Hansen 2018-05-10 18:41:21 +02:00
parent 4efb229d34
commit 396d5b036d
2 changed files with 112 additions and 71 deletions

View File

@ -232,7 +232,7 @@ class Network:
return best_pair return best_pair
def calculate_1_ply(self, sess, board, roll, player): def calculate_1_ply(self, board, roll, player):
""" """
Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
exhaustive search is performed on the best 15 moves from the single ply. exhaustive search is performed on the best 15 moves from the single ply.
@ -248,21 +248,122 @@ class Network:
# find all legal states from the given board and the given roll # find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll) init_legal_states = Board.calculate_legal_states(board, player, roll)
# find all values for the above boards
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck. legal_moves = list(Board.calculate_legal_states(board, player, roll))
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
best_fifteen_boards = [x[0] for x in best_fifteen[:10]] legal_states = [list(tmp) for tmp in legal_moves]
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player) legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states])
scores = self.calc_vals(legal_states)
scores = [score.numpy() for score in scores]
moves_and_scores = list(zip(init_legal_states, scores))
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
best_boards = [x[0] for x in sorted_moves_and_scores]
best_score_index = np.array(all_rolls_scores).argmax()
best_board = best_fifteen_boards[best_score_index]
return [best_board, max(all_rolls_scores)] self.do_ply(best_boards, player)
#best_score_index = np.array(all_rolls_scores).argmax()
#best_board = best_fifteen_boards[best_score_index]
#return [best_board, max(all_rolls_scores)]
def do_ply(self, boards, player):
"""
Calculates a single extra ply, resulting in a larger search space for our best move.
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
allowing the function to search deeper, which could result in an even larger search space. If we wish
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
:param sess:
:param boards: The boards to try all rolls on
:param player: The player of the previous ply
:return: An array of scores where each index describes one of the boards which was given as param
to this function.
"""
import time
def gen_21_rolls():
"""
Calculate all possible rolls, [[1,1], [1,2] ..]
:return: All possible rolls
"""
a = []
for x in range(1, 7):
for y in range(1, 7):
if not [x, y] in a and not [y, x] in a:
a.append([x, y])
return a
all_rolls = gen_21_rolls()
all_rolls_scores = []
start = time.time()
list_of_moves = []
for idx, board in enumerate(boards):
list_of_moves.append([])
for roll in all_rolls:
all_states = list(Board.calculate_legal_states(board, player, roll))
list_of_moves[idx].append(all_states)
tmp = []
for board in list_of_moves:
all_board_moves = []
for roll in board:
for spec in roll:
legal_state = np.array(self.board_trans_func(spec, player)[0])
all_board_moves.append(legal_state)
tmp.append(np.array(all_board_moves))
# print(tmp)
for board in tmp:
print(self.model.predict_on_batch(board))
print(time.time() - start)
# count = 0
# # loop over boards
# for a_board in boards:
# a_board_scores = []
#
# # loop over all rolls, for each board
# for roll in all_rolls:
#
# # find all states we can get to, given the board and roll and the opposite player
# all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
# count += len(all_rolls_boards)
# # find scores for each board found above
# spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
# for new_board in all_rolls_boards]
#
# # if the original player is the -1 player, then we need to find (1-value)
# spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
#
# # find the best score
# best_score = max(spec_roll_scores)
#
# # append the best score to a_board_scores, where we keep track of the best score for each board
# a_board_scores.append(best_score)
#
# # save the expected average of board scores
# all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
#
# # return all the average scores
# print(count)
# return all_rolls_scores
def calc_n_ply(self, n_init, sess, board, player, roll): def calc_n_ply(self, n_init, sess, board, player, roll):
""" """
@ -392,67 +493,6 @@ class Network:
best_score_pair = boards_with_scores[np.array(scores).argmax()] best_score_pair = boards_with_scores[np.array(scores).argmax()]
return best_score_pair return best_score_pair
def do_ply(self, sess, boards, player):
"""
Calculates a single extra ply, resulting in a larger search space for our best move.
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
allowing the function to search deeper, which could result in an even larger search space. If we wish
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
:param sess:
:param boards: The boards to try all rolls on
:param player: The player of the previous ply
:return: An array of scores where each index describes one of the boards which was given as param
to this function.
"""
def gen_21_rolls():
"""
Calculate all possible rolls, [[1,1], [1,2] ..]
:return: All possible rolls
"""
a = []
for x in range(1, 7):
for y in range(1, 7):
if not [x, y] in a and not [y, x] in a:
a.append([x, y])
return a
all_rolls = gen_21_rolls()
all_rolls_scores = []
count = 0
# loop over boards
for a_board in boards:
a_board_scores = []
# loop over all rolls, for each board
for roll in all_rolls:
# find all states we can get to, given the board and roll and the opposite player
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
count += len(all_rolls_boards)
# find scores for each board found above
spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
for new_board in all_rolls_boards]
# if the original player is the -1 player, then we need to find (1-value)
spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
# find the best score
best_score = max(spec_roll_scores)
# append the best score to a_board_scores, where we keep track of the best score for each board
a_board_scores.append(best_score)
# save the expected average of board scores
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
# return all the average scores
print(count)
return all_rolls_scores
def eval(self, episode_count, trained_eps = 0): def eval(self, episode_count, trained_eps = 0):
""" """

View File

@ -55,3 +55,4 @@ network.print_variables()
network.save_model(2) network.save_model(2)
network.calculate_1_ply(Board.initial_state, [3,2], 1)