From 48a5f6cbb68bb94165cacbc5dc851b0c534bbbc8 Mon Sep 17 00:00:00 2001
From: Pownie <alexmunchhansen@gmail.com>
Date: Thu, 26 Apr 2018 09:42:03 +0200
Subject: [PATCH] Moved "do_ply" out of "calculate_2_ply", in an effort to be
 able to eventually do further plies, however some rewriting of the current
 "do_ply" will be needed, as described in a comment.

---
 network.py | 54 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/network.py b/network.py
index 006b3e9..faed87a 100644
--- a/network.py
+++ b/network.py
@@ -190,6 +190,41 @@ class Network:
 
         """
 
+        init_legal_states = Board.calculate_legal_states(board, player, roll)
+        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
+
+        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
+        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
+
+
+        # They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
+        # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
+        if player == 1:
+            best_fifteen.reverse()
+        best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
+
+        all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
+
+
+        best_score_index = np.array(all_rolls_scores).argmax()
+        best_board = best_fifteen_boards[best_score_index]
+
+        return [best_board, max(all_rolls_scores)]
+
+    def do_ply(self, sess, boards, player):
+        """
+        Calculates a single extra ply, resulting in a larger search space for our best move.
+        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
+        allowing the function to search deeper, which could result in an even larger search space. If we wish
+        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
+
+        :param sess:
+        :param boards: The boards to try all rolls on
+        :param player: The player of the previous ply
+        :return: An array of scores where each index describes one of the boards which was given as param
+        to this function.
+        """
+
         def gen_21_rolls():
             """
             Calculate all possible rolls, [[1,1], [1,2] ..]
@@ -203,22 +238,10 @@ class Network:
 
             return a
 
-        init_legal_states = Board.calculate_legal_states(board, player, roll)
-        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
-
-        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
-        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
-
-        # They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
-        # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
-        if player == 1:
-            best_fifteen.reverse()
-        best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
-
         all_rolls = gen_21_rolls()
 
         all_rolls_scores = []
-        for a_board in best_fifteen_boards:
+        for a_board in boards:
             a_board_scores = []
             for roll in all_rolls:
                 all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
@@ -235,10 +258,7 @@ class Network:
 
             all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
 
-        best_score_index = np.array(all_rolls_scores).argmax()
-        best_board = best_fifteen_boards[best_score_index]
-
-        return [best_board, max(all_rolls_scores)]
+        return all_rolls_scores
 
     def eval(self, episode_count, trained_eps = 0, tf_session = None):
         """