All values for boards and all rolls can now be calculated
This commit is contained in:
parent
4efb229d34
commit
396d5b036d
182
network.py
182
network.py
|
@ -232,7 +232,7 @@ class Network:
|
|||
return best_pair
|
||||
|
||||
|
||||
def calculate_1_ply(self, sess, board, roll, player):
|
||||
def calculate_1_ply(self, board, roll, player):
|
||||
"""
|
||||
Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
|
||||
exhaustive search is performed on the best 15 moves from the single ply.
|
||||
|
@ -248,21 +248,122 @@ class Network:
|
|||
# find all legal states from the given board and the given roll
|
||||
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
||||
|
||||
# find all values for the above boards
|
||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
||||
|
||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
||||
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
|
||||
legal_moves = list(Board.calculate_legal_states(board, player, roll))
|
||||
|
||||
best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
|
||||
legal_states = [list(tmp) for tmp in legal_moves]
|
||||
|
||||
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
|
||||
legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states])
|
||||
|
||||
scores = self.calc_vals(legal_states)
|
||||
scores = [score.numpy() for score in scores]
|
||||
|
||||
moves_and_scores = list(zip(init_legal_states, scores))
|
||||
|
||||
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
|
||||
|
||||
best_boards = [x[0] for x in sorted_moves_and_scores]
|
||||
|
||||
|
||||
best_score_index = np.array(all_rolls_scores).argmax()
|
||||
best_board = best_fifteen_boards[best_score_index]
|
||||
|
||||
return [best_board, max(all_rolls_scores)]
|
||||
self.do_ply(best_boards, player)
|
||||
|
||||
|
||||
#best_score_index = np.array(all_rolls_scores).argmax()
|
||||
#best_board = best_fifteen_boards[best_score_index]
|
||||
|
||||
#return [best_board, max(all_rolls_scores)]
|
||||
|
||||
def do_ply(self, boards, player):
|
||||
"""
|
||||
Calculates a single extra ply, resulting in a larger search space for our best move.
|
||||
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
|
||||
allowing the function to search deeper, which could result in an even larger search space. If we wish
|
||||
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
|
||||
|
||||
:param sess:
|
||||
:param boards: The boards to try all rolls on
|
||||
:param player: The player of the previous ply
|
||||
:return: An array of scores where each index describes one of the boards which was given as param
|
||||
to this function.
|
||||
"""
|
||||
|
||||
import time
|
||||
|
||||
def gen_21_rolls():
|
||||
"""
|
||||
Calculate all possible rolls, [[1,1], [1,2] ..]
|
||||
:return: All possible rolls
|
||||
"""
|
||||
a = []
|
||||
for x in range(1, 7):
|
||||
for y in range(1, 7):
|
||||
if not [x, y] in a and not [y, x] in a:
|
||||
a.append([x, y])
|
||||
|
||||
return a
|
||||
|
||||
all_rolls = gen_21_rolls()
|
||||
|
||||
all_rolls_scores = []
|
||||
|
||||
start = time.time()
|
||||
|
||||
list_of_moves = []
|
||||
|
||||
for idx, board in enumerate(boards):
|
||||
list_of_moves.append([])
|
||||
for roll in all_rolls:
|
||||
all_states = list(Board.calculate_legal_states(board, player, roll))
|
||||
list_of_moves[idx].append(all_states)
|
||||
|
||||
tmp = []
|
||||
for board in list_of_moves:
|
||||
all_board_moves = []
|
||||
for roll in board:
|
||||
for spec in roll:
|
||||
legal_state = np.array(self.board_trans_func(spec, player)[0])
|
||||
all_board_moves.append(legal_state)
|
||||
tmp.append(np.array(all_board_moves))
|
||||
|
||||
# print(tmp)
|
||||
|
||||
for board in tmp:
|
||||
print(self.model.predict_on_batch(board))
|
||||
|
||||
print(time.time() - start)
|
||||
|
||||
# count = 0
|
||||
# # loop over boards
|
||||
# for a_board in boards:
|
||||
# a_board_scores = []
|
||||
#
|
||||
# # loop over all rolls, for each board
|
||||
# for roll in all_rolls:
|
||||
#
|
||||
# # find all states we can get to, given the board and roll and the opposite player
|
||||
# all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
||||
# count += len(all_rolls_boards)
|
||||
# # find scores for each board found above
|
||||
# spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
|
||||
# for new_board in all_rolls_boards]
|
||||
#
|
||||
# # if the original player is the -1 player, then we need to find (1-value)
|
||||
# spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
|
||||
#
|
||||
# # find the best score
|
||||
# best_score = max(spec_roll_scores)
|
||||
#
|
||||
# # append the best score to a_board_scores, where we keep track of the best score for each board
|
||||
# a_board_scores.append(best_score)
|
||||
#
|
||||
# # save the expected average of board scores
|
||||
# all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
||||
#
|
||||
# # return all the average scores
|
||||
# print(count)
|
||||
# return all_rolls_scores
|
||||
|
||||
|
||||
def calc_n_ply(self, n_init, sess, board, player, roll):
|
||||
"""
|
||||
|
@ -392,67 +493,6 @@ class Network:
|
|||
best_score_pair = boards_with_scores[np.array(scores).argmax()]
|
||||
return best_score_pair
|
||||
|
||||
def do_ply(self, sess, boards, player):
|
||||
"""
|
||||
Calculates a single extra ply, resulting in a larger search space for our best move.
|
||||
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
|
||||
allowing the function to search deeper, which could result in an even larger search space. If we wish
|
||||
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
|
||||
|
||||
:param sess:
|
||||
:param boards: The boards to try all rolls on
|
||||
:param player: The player of the previous ply
|
||||
:return: An array of scores where each index describes one of the boards which was given as param
|
||||
to this function.
|
||||
"""
|
||||
|
||||
def gen_21_rolls():
|
||||
"""
|
||||
Calculate all possible rolls, [[1,1], [1,2] ..]
|
||||
:return: All possible rolls
|
||||
"""
|
||||
a = []
|
||||
for x in range(1, 7):
|
||||
for y in range(1, 7):
|
||||
if not [x, y] in a and not [y, x] in a:
|
||||
a.append([x, y])
|
||||
|
||||
return a
|
||||
|
||||
all_rolls = gen_21_rolls()
|
||||
|
||||
all_rolls_scores = []
|
||||
count = 0
|
||||
# loop over boards
|
||||
for a_board in boards:
|
||||
a_board_scores = []
|
||||
|
||||
# loop over all rolls, for each board
|
||||
for roll in all_rolls:
|
||||
|
||||
# find all states we can get to, given the board and roll and the opposite player
|
||||
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
||||
count += len(all_rolls_boards)
|
||||
# find scores for each board found above
|
||||
spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
|
||||
for new_board in all_rolls_boards]
|
||||
|
||||
# if the original player is the -1 player, then we need to find (1-value)
|
||||
spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
|
||||
|
||||
# find the best score
|
||||
best_score = max(spec_roll_scores)
|
||||
|
||||
# append the best score to a_board_scores, where we keep track of the best score for each board
|
||||
a_board_scores.append(best_score)
|
||||
|
||||
# save the expected average of board scores
|
||||
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
||||
|
||||
# return all the average scores
|
||||
print(count)
|
||||
return all_rolls_scores
|
||||
|
||||
|
||||
def eval(self, episode_count, trained_eps = 0):
|
||||
"""
|
||||
|
|
|
@ -55,3 +55,4 @@ network.print_variables()
|
|||
|
||||
network.save_model(2)
|
||||
|
||||
network.calculate_1_ply(Board.initial_state, [3,2], 1)
|
Loading…
Reference in New Issue
Block a user