From 48a5f6cbb68bb94165cacbc5dc851b0c534bbbc8 Mon Sep 17 00:00:00 2001 From: Pownie Date: Thu, 26 Apr 2018 09:42:03 +0200 Subject: [PATCH] Moved "do_ply" out of "calculate_2_ply", in an effort to be able to eventually do further plies, however some rewriting of the current "do_ply" will be needed, as described in a comment. --- network.py | 54 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/network.py b/network.py index 006b3e9..faed87a 100644 --- a/network.py +++ b/network.py @@ -190,6 +190,41 @@ class Network: """ + init_legal_states = Board.calculate_legal_states(board, player, roll) + zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states] + + # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck. + best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1)) + + + # They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since + # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize. + if player == 1: + best_fifteen.reverse() + best_fifteen_boards = [x[0] for x in best_fifteen[:15]] + + all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player) + + + best_score_index = np.array(all_rolls_scores).argmax() + best_board = best_fifteen_boards[best_score_index] + + return [best_board, max(all_rolls_scores)] + + def do_ply(self, sess, boards, player): + """ + Calculates a single extra ply, resulting in a larger search space for our best move. + This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than + allowing the function to search deeper, which could result in an even larger search space. If we wish + to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply. + + :param sess: + :param boards: The boards to try all rolls on + :param player: The player of the previous ply + :return: An array of scores where each index describes one of the boards which was given as param + to this function. + """ + def gen_21_rolls(): """ Calculate all possible rolls, [[1,1], [1,2] ..] @@ -203,22 +238,10 @@ class Network: return a - init_legal_states = Board.calculate_legal_states(board, player, roll) - zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states] - - # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck. - best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1)) - - # They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since - # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize. - if player == 1: - best_fifteen.reverse() - best_fifteen_boards = [x[0] for x in best_fifteen[:15]] - all_rolls = gen_21_rolls() all_rolls_scores = [] - for a_board in best_fifteen_boards: + for a_board in boards: a_board_scores = [] for roll in all_rolls: all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) @@ -235,10 +258,7 @@ class Network: all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores)) - best_score_index = np.array(all_rolls_scores).argmax() - best_board = best_fifteen_boards[best_score_index] - - return [best_board, max(all_rolls_scores)] + return all_rolls_scores def eval(self, episode_count, trained_eps = 0, tf_session = None): """