Moved "do_ply" out of "calculate_2_ply", in an effort to be able to
eventually do further plies, however some rewriting of the current "do_ply" will be needed, as described in a comment.
This commit is contained in:
parent
8899c5c2d9
commit
48a5f6cbb6
54
network.py
54
network.py
|
@ -190,6 +190,41 @@ class Network:
|
|||
|
||||
"""
|
||||
|
||||
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
||||
|
||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
||||
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
|
||||
|
||||
|
||||
# They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
|
||||
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
|
||||
if player == 1:
|
||||
best_fifteen.reverse()
|
||||
best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
|
||||
|
||||
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
|
||||
|
||||
|
||||
best_score_index = np.array(all_rolls_scores).argmax()
|
||||
best_board = best_fifteen_boards[best_score_index]
|
||||
|
||||
return [best_board, max(all_rolls_scores)]
|
||||
|
||||
def do_ply(self, sess, boards, player):
|
||||
"""
|
||||
Calculates a single extra ply, resulting in a larger search space for our best move.
|
||||
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
|
||||
allowing the function to search deeper, which could result in an even larger search space. If we wish
|
||||
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
|
||||
|
||||
:param sess:
|
||||
:param boards: The boards to try all rolls on
|
||||
:param player: The player of the previous ply
|
||||
:return: An array of scores where each index describes one of the boards which was given as param
|
||||
to this function.
|
||||
"""
|
||||
|
||||
def gen_21_rolls():
|
||||
"""
|
||||
Calculate all possible rolls, [[1,1], [1,2] ..]
|
||||
|
@ -203,22 +238,10 @@ class Network:
|
|||
|
||||
return a
|
||||
|
||||
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
||||
|
||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
||||
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
|
||||
|
||||
# They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
|
||||
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
|
||||
if player == 1:
|
||||
best_fifteen.reverse()
|
||||
best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
|
||||
|
||||
all_rolls = gen_21_rolls()
|
||||
|
||||
all_rolls_scores = []
|
||||
for a_board in best_fifteen_boards:
|
||||
for a_board in boards:
|
||||
a_board_scores = []
|
||||
for roll in all_rolls:
|
||||
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
||||
|
@ -235,10 +258,7 @@ class Network:
|
|||
|
||||
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
||||
|
||||
best_score_index = np.array(all_rolls_scores).argmax()
|
||||
best_board = best_fifteen_boards[best_score_index]
|
||||
|
||||
return [best_board, max(all_rolls_scores)]
|
||||
return all_rolls_scores
|
||||
|
||||
def eval(self, episode_count, trained_eps = 0, tf_session = None):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue
Block a user