Moved "do_ply" out of "calculate_2_ply", in an effort to be able to

eventually do further plies, however some rewriting of the current
"do_ply" will be needed, as described in a comment.
This commit is contained in:
Alexander Munch-Hansen 2018-04-26 09:42:03 +02:00
parent 8899c5c2d9
commit 48a5f6cbb6

View File

@ -190,6 +190,41 @@ class Network:
""" """
init_legal_states = Board.calculate_legal_states(board, player, roll)
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
# They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
if player == 1:
best_fifteen.reverse()
best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
best_score_index = np.array(all_rolls_scores).argmax()
best_board = best_fifteen_boards[best_score_index]
return [best_board, max(all_rolls_scores)]
def do_ply(self, sess, boards, player):
"""
Calculates a single extra ply, resulting in a larger search space for our best move.
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
allowing the function to search deeper, which could result in an even larger search space. If we wish
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
:param sess:
:param boards: The boards to try all rolls on
:param player: The player of the previous ply
:return: An array of scores where each index describes one of the boards which was given as param
to this function.
"""
def gen_21_rolls(): def gen_21_rolls():
""" """
Calculate all possible rolls, [[1,1], [1,2] ..] Calculate all possible rolls, [[1,1], [1,2] ..]
@ -203,22 +238,10 @@ class Network:
return a return a
init_legal_states = Board.calculate_legal_states(board, player, roll)
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
# They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
if player == 1:
best_fifteen.reverse()
best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
all_rolls = gen_21_rolls() all_rolls = gen_21_rolls()
all_rolls_scores = [] all_rolls_scores = []
for a_board in best_fifteen_boards: for a_board in boards:
a_board_scores = [] a_board_scores = []
for roll in all_rolls: for roll in all_rolls:
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
@ -235,10 +258,7 @@ class Network:
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores)) all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
best_score_index = np.array(all_rolls_scores).argmax() return all_rolls_scores
best_board = best_fifteen_boards[best_score_index]
return [best_board, max(all_rolls_scores)]
def eval(self, episode_count, trained_eps = 0, tf_session = None): def eval(self, episode_count, trained_eps = 0, tf_session = None):
""" """