All values for boards and all rolls can now be calculated
This commit is contained in:
parent
4efb229d34
commit
396d5b036d
182
network.py
182
network.py
|
@ -232,7 +232,7 @@ class Network:
|
||||||
return best_pair
|
return best_pair
|
||||||
|
|
||||||
|
|
||||||
def calculate_1_ply(self, sess, board, roll, player):
|
def calculate_1_ply(self, board, roll, player):
|
||||||
"""
|
"""
|
||||||
Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
|
Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
|
||||||
exhaustive search is performed on the best 15 moves from the single ply.
|
exhaustive search is performed on the best 15 moves from the single ply.
|
||||||
|
@ -248,21 +248,122 @@ class Network:
|
||||||
# find all legal states from the given board and the given roll
|
# find all legal states from the given board and the given roll
|
||||||
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
||||||
|
|
||||||
# find all values for the above boards
|
|
||||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
|
||||||
|
|
||||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
legal_moves = list(Board.calculate_legal_states(board, player, roll))
|
||||||
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
|
|
||||||
|
|
||||||
best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
|
legal_states = [list(tmp) for tmp in legal_moves]
|
||||||
|
|
||||||
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
|
legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states])
|
||||||
|
|
||||||
|
scores = self.calc_vals(legal_states)
|
||||||
|
scores = [score.numpy() for score in scores]
|
||||||
|
|
||||||
|
moves_and_scores = list(zip(init_legal_states, scores))
|
||||||
|
|
||||||
|
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
|
||||||
|
|
||||||
|
best_boards = [x[0] for x in sorted_moves_and_scores]
|
||||||
|
|
||||||
|
|
||||||
best_score_index = np.array(all_rolls_scores).argmax()
|
|
||||||
best_board = best_fifteen_boards[best_score_index]
|
|
||||||
|
|
||||||
return [best_board, max(all_rolls_scores)]
|
self.do_ply(best_boards, player)
|
||||||
|
|
||||||
|
|
||||||
|
#best_score_index = np.array(all_rolls_scores).argmax()
|
||||||
|
#best_board = best_fifteen_boards[best_score_index]
|
||||||
|
|
||||||
|
#return [best_board, max(all_rolls_scores)]
|
||||||
|
|
||||||
|
def do_ply(self, boards, player):
|
||||||
|
"""
|
||||||
|
Calculates a single extra ply, resulting in a larger search space for our best move.
|
||||||
|
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
|
||||||
|
allowing the function to search deeper, which could result in an even larger search space. If we wish
|
||||||
|
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
|
||||||
|
|
||||||
|
:param sess:
|
||||||
|
:param boards: The boards to try all rolls on
|
||||||
|
:param player: The player of the previous ply
|
||||||
|
:return: An array of scores where each index describes one of the boards which was given as param
|
||||||
|
to this function.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
|
def gen_21_rolls():
|
||||||
|
"""
|
||||||
|
Calculate all possible rolls, [[1,1], [1,2] ..]
|
||||||
|
:return: All possible rolls
|
||||||
|
"""
|
||||||
|
a = []
|
||||||
|
for x in range(1, 7):
|
||||||
|
for y in range(1, 7):
|
||||||
|
if not [x, y] in a and not [y, x] in a:
|
||||||
|
a.append([x, y])
|
||||||
|
|
||||||
|
return a
|
||||||
|
|
||||||
|
all_rolls = gen_21_rolls()
|
||||||
|
|
||||||
|
all_rolls_scores = []
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
list_of_moves = []
|
||||||
|
|
||||||
|
for idx, board in enumerate(boards):
|
||||||
|
list_of_moves.append([])
|
||||||
|
for roll in all_rolls:
|
||||||
|
all_states = list(Board.calculate_legal_states(board, player, roll))
|
||||||
|
list_of_moves[idx].append(all_states)
|
||||||
|
|
||||||
|
tmp = []
|
||||||
|
for board in list_of_moves:
|
||||||
|
all_board_moves = []
|
||||||
|
for roll in board:
|
||||||
|
for spec in roll:
|
||||||
|
legal_state = np.array(self.board_trans_func(spec, player)[0])
|
||||||
|
all_board_moves.append(legal_state)
|
||||||
|
tmp.append(np.array(all_board_moves))
|
||||||
|
|
||||||
|
# print(tmp)
|
||||||
|
|
||||||
|
for board in tmp:
|
||||||
|
print(self.model.predict_on_batch(board))
|
||||||
|
|
||||||
|
print(time.time() - start)
|
||||||
|
|
||||||
|
# count = 0
|
||||||
|
# # loop over boards
|
||||||
|
# for a_board in boards:
|
||||||
|
# a_board_scores = []
|
||||||
|
#
|
||||||
|
# # loop over all rolls, for each board
|
||||||
|
# for roll in all_rolls:
|
||||||
|
#
|
||||||
|
# # find all states we can get to, given the board and roll and the opposite player
|
||||||
|
# all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
||||||
|
# count += len(all_rolls_boards)
|
||||||
|
# # find scores for each board found above
|
||||||
|
# spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
|
||||||
|
# for new_board in all_rolls_boards]
|
||||||
|
#
|
||||||
|
# # if the original player is the -1 player, then we need to find (1-value)
|
||||||
|
# spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
|
||||||
|
#
|
||||||
|
# # find the best score
|
||||||
|
# best_score = max(spec_roll_scores)
|
||||||
|
#
|
||||||
|
# # append the best score to a_board_scores, where we keep track of the best score for each board
|
||||||
|
# a_board_scores.append(best_score)
|
||||||
|
#
|
||||||
|
# # save the expected average of board scores
|
||||||
|
# all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
||||||
|
#
|
||||||
|
# # return all the average scores
|
||||||
|
# print(count)
|
||||||
|
# return all_rolls_scores
|
||||||
|
|
||||||
|
|
||||||
def calc_n_ply(self, n_init, sess, board, player, roll):
|
def calc_n_ply(self, n_init, sess, board, player, roll):
|
||||||
"""
|
"""
|
||||||
|
@ -392,67 +493,6 @@ class Network:
|
||||||
best_score_pair = boards_with_scores[np.array(scores).argmax()]
|
best_score_pair = boards_with_scores[np.array(scores).argmax()]
|
||||||
return best_score_pair
|
return best_score_pair
|
||||||
|
|
||||||
def do_ply(self, sess, boards, player):
|
|
||||||
"""
|
|
||||||
Calculates a single extra ply, resulting in a larger search space for our best move.
|
|
||||||
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
|
|
||||||
allowing the function to search deeper, which could result in an even larger search space. If we wish
|
|
||||||
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
|
|
||||||
|
|
||||||
:param sess:
|
|
||||||
:param boards: The boards to try all rolls on
|
|
||||||
:param player: The player of the previous ply
|
|
||||||
:return: An array of scores where each index describes one of the boards which was given as param
|
|
||||||
to this function.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def gen_21_rolls():
|
|
||||||
"""
|
|
||||||
Calculate all possible rolls, [[1,1], [1,2] ..]
|
|
||||||
:return: All possible rolls
|
|
||||||
"""
|
|
||||||
a = []
|
|
||||||
for x in range(1, 7):
|
|
||||||
for y in range(1, 7):
|
|
||||||
if not [x, y] in a and not [y, x] in a:
|
|
||||||
a.append([x, y])
|
|
||||||
|
|
||||||
return a
|
|
||||||
|
|
||||||
all_rolls = gen_21_rolls()
|
|
||||||
|
|
||||||
all_rolls_scores = []
|
|
||||||
count = 0
|
|
||||||
# loop over boards
|
|
||||||
for a_board in boards:
|
|
||||||
a_board_scores = []
|
|
||||||
|
|
||||||
# loop over all rolls, for each board
|
|
||||||
for roll in all_rolls:
|
|
||||||
|
|
||||||
# find all states we can get to, given the board and roll and the opposite player
|
|
||||||
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
|
||||||
count += len(all_rolls_boards)
|
|
||||||
# find scores for each board found above
|
|
||||||
spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
|
|
||||||
for new_board in all_rolls_boards]
|
|
||||||
|
|
||||||
# if the original player is the -1 player, then we need to find (1-value)
|
|
||||||
spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
|
|
||||||
|
|
||||||
# find the best score
|
|
||||||
best_score = max(spec_roll_scores)
|
|
||||||
|
|
||||||
# append the best score to a_board_scores, where we keep track of the best score for each board
|
|
||||||
a_board_scores.append(best_score)
|
|
||||||
|
|
||||||
# save the expected average of board scores
|
|
||||||
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
|
||||||
|
|
||||||
# return all the average scores
|
|
||||||
print(count)
|
|
||||||
return all_rolls_scores
|
|
||||||
|
|
||||||
|
|
||||||
def eval(self, episode_count, trained_eps = 0):
|
def eval(self, episode_count, trained_eps = 0):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -55,3 +55,4 @@ network.print_variables()
|
||||||
|
|
||||||
network.save_model(2)
|
network.save_model(2)
|
||||||
|
|
||||||
|
network.calculate_1_ply(Board.initial_state, [3,2], 1)
|
Loading…
Reference in New Issue
Block a user