Moved "do_ply" out of "calculate_2_ply", in an effort to be able to
eventually do further plies, however some rewriting of the current "do_ply" will be needed, as described in a comment.
This commit is contained in:
parent
8899c5c2d9
commit
48a5f6cbb6
54
network.py
54
network.py
|
@ -190,6 +190,41 @@ class Network:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
||||||
|
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
||||||
|
|
||||||
|
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
||||||
|
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
|
||||||
|
|
||||||
|
|
||||||
|
# They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
|
||||||
|
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
|
||||||
|
if player == 1:
|
||||||
|
best_fifteen.reverse()
|
||||||
|
best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
|
||||||
|
|
||||||
|
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
|
||||||
|
|
||||||
|
|
||||||
|
best_score_index = np.array(all_rolls_scores).argmax()
|
||||||
|
best_board = best_fifteen_boards[best_score_index]
|
||||||
|
|
||||||
|
return [best_board, max(all_rolls_scores)]
|
||||||
|
|
||||||
|
def do_ply(self, sess, boards, player):
|
||||||
|
"""
|
||||||
|
Calculates a single extra ply, resulting in a larger search space for our best move.
|
||||||
|
This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
|
||||||
|
allowing the function to search deeper, which could result in an even larger search space. If we wish
|
||||||
|
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
|
||||||
|
|
||||||
|
:param sess:
|
||||||
|
:param boards: The boards to try all rolls on
|
||||||
|
:param player: The player of the previous ply
|
||||||
|
:return: An array of scores where each index describes one of the boards which was given as param
|
||||||
|
to this function.
|
||||||
|
"""
|
||||||
|
|
||||||
def gen_21_rolls():
|
def gen_21_rolls():
|
||||||
"""
|
"""
|
||||||
Calculate all possible rolls, [[1,1], [1,2] ..]
|
Calculate all possible rolls, [[1,1], [1,2] ..]
|
||||||
|
@ -203,22 +238,10 @@ class Network:
|
||||||
|
|
||||||
return a
|
return a
|
||||||
|
|
||||||
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
|
||||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
|
||||||
|
|
||||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
|
||||||
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
|
|
||||||
|
|
||||||
# They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
|
|
||||||
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
|
|
||||||
if player == 1:
|
|
||||||
best_fifteen.reverse()
|
|
||||||
best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
|
|
||||||
|
|
||||||
all_rolls = gen_21_rolls()
|
all_rolls = gen_21_rolls()
|
||||||
|
|
||||||
all_rolls_scores = []
|
all_rolls_scores = []
|
||||||
for a_board in best_fifteen_boards:
|
for a_board in boards:
|
||||||
a_board_scores = []
|
a_board_scores = []
|
||||||
for roll in all_rolls:
|
for roll in all_rolls:
|
||||||
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
||||||
|
@ -235,10 +258,7 @@ class Network:
|
||||||
|
|
||||||
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
||||||
|
|
||||||
best_score_index = np.array(all_rolls_scores).argmax()
|
return all_rolls_scores
|
||||||
best_board = best_fifteen_boards[best_score_index]
|
|
||||||
|
|
||||||
return [best_board, max(all_rolls_scores)]
|
|
||||||
|
|
||||||
def eval(self, episode_count, trained_eps = 0, tf_session = None):
|
def eval(self, episode_count, trained_eps = 0, tf_session = None):
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue
Block a user