Moved "do_ply" out of "calculate_2_ply", in an effort to be able to

eventually do further plies, however some rewriting of the current "do_ply" will be needed, as described in a comment.
2018-04-26 09:42:03 +02:00 · 2018-04-26 09:42:03 +02:00 · 48a5f6cbb6
commit 48a5f6cbb6
parent 8899c5c2d9
1 changed files with 37 additions and 17 deletions
--- a/network.py
+++ b/network.py
@ -190,6 +190,41 @@ class Network:
        """
        init_legal_states = Board.calculate_legal_states(board, player, roll)
        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
        # They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
        # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
        if player == 1:
            best_fifteen.reverse()
        best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
        all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
        best_score_index = np.array(all_rolls_scores).argmax()
        best_board = best_fifteen_boards[best_score_index]
        return [best_board, max(all_rolls_scores)]
    def do_ply(self, sess, boards, player):
        """
        Calculates a single extra ply, resulting in a larger search space for our best move.
        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
        :param sess:
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
        to this function.
        """
        def gen_21_rolls():
            """
            Calculate all possible rolls, [[1,1], [1,2] ..]
@ -203,22 +238,10 @@ class Network:
            return a
        init_legal_states = Board.calculate_legal_states(board, player, roll)
        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
        # They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
        # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
        if player == 1:
            best_fifteen.reverse()
        best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
        all_rolls = gen_21_rolls()
        all_rolls_scores = []
-        for a_board in best_fifteen_boards:
+        for a_board in boards:
            a_board_scores = []
            for roll in all_rolls:
                all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
@ -235,10 +258,7 @@ class Network:
            all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
-        best_score_index = np.array(all_rolls_scores).argmax()
+        return all_rolls_scores
        best_board = best_fifteen_boards[best_score_index]
        return [best_board, max(all_rolls_scores)]
    def eval(self, episode_count, trained_eps = 0, tf_session = None):
        """