Moved "do_ply" out of "calculate_2_ply", in an effort to be able to

eventually do further plies, however some rewriting of the current "do_ply" will be needed, as described in a comment.
2018-04-26 09:42:03 +02:00 · 2018-04-26 09:42:03 +02:00 · 48a5f6cbb6
commit 48a5f6cbb6
parent 8899c5c2d9
1 changed files with 37 additions and 17 deletions
--- a/network.py
+++ b/network.py
@ -190,6 +190,41 @@ class Network:

        """

+        init_legal_states = Board.calculate_legal_states(board, player, roll)
+        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
+
+        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
+        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
+
+
+        # They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
+        # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
+        if player == 1:
+            best_fifteen.reverse()
+        best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
+
+        all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
+
+
+        best_score_index = np.array(all_rolls_scores).argmax()
+        best_board = best_fifteen_boards[best_score_index]
+
+        return [best_board, max(all_rolls_scores)]
+
+    def do_ply(self, sess, boards, player):
+        """
+        Calculates a single extra ply, resulting in a larger search space for our best move.
+        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
+        allowing the function to search deeper, which could result in an even larger search space. If we wish
+        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
+
+        :param sess:
+        :param boards: The boards to try all rolls on
+        :param player: The player of the previous ply
+        :return: An array of scores where each index describes one of the boards which was given as param
+        to this function.
+        """
+
        def gen_21_rolls():
            """
            Calculate all possible rolls, [[1,1], [1,2] ..]
@ -203,22 +238,10 @@ class Network:

            return a

-        init_legal_states = Board.calculate_legal_states(board, player, roll)
-        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
-
-        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
-        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
-
-        # They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
-        # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
-        if player == 1:
-            best_fifteen.reverse()
-        best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
-
        all_rolls = gen_21_rolls()

        all_rolls_scores = []
-        for a_board in best_fifteen_boards:
+        for a_board in boards:
            a_board_scores = []
            for roll in all_rolls:
                all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
@ -235,10 +258,7 @@ class Network:

            all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))

-        best_score_index = np.array(all_rolls_scores).argmax()
-        best_board = best_fifteen_boards[best_score_index]
-
-        return [best_board, max(all_rolls_scores)]
+        return all_rolls_scores

    def eval(self, episode_count, trained_eps = 0, tf_session = None):
        """