All values for boards and all rolls can now be calculated

2018-05-10 18:41:21 +02:00 · 2018-05-10 18:41:21 +02:00 · 396d5b036d
commit 396d5b036d
parent 4efb229d34
2 changed files with 112 additions and 71 deletions
--- a/network.py
+++ b/network.py
@ -232,7 +232,7 @@ class Network:
        return best_pair


-    def calculate_1_ply(self, sess, board, roll, player):
+    def calculate_1_ply(self, board, roll, player):
        """
        Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
        exhaustive search is performed on the best 15 moves from the single ply.
@ -248,21 +248,122 @@ class Network:
        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)

-        # find all values for the above boards
-        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]

-        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
-        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
+        legal_moves = list(Board.calculate_legal_states(board, player, roll))

-        best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
+        legal_states = [list(tmp) for tmp in legal_moves]

-        all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
+        legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states])
+
+        scores = self.calc_vals(legal_states)
+        scores = [score.numpy() for score in scores]
+
+        moves_and_scores = list(zip(init_legal_states, scores))
+
+        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
+
+        best_boards = [x[0] for x in sorted_moves_and_scores]


-        best_score_index = np.array(all_rolls_scores).argmax()
-        best_board = best_fifteen_boards[best_score_index]

-        return [best_board, max(all_rolls_scores)]
+        self.do_ply(best_boards, player)
+
+
+        #best_score_index = np.array(all_rolls_scores).argmax()
+        #best_board = best_fifteen_boards[best_score_index]
+
+        #return [best_board, max(all_rolls_scores)]
+
+    def do_ply(self, boards, player):
+        """
+        Calculates a single extra ply, resulting in a larger search space for our best move.
+        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
+        allowing the function to search deeper, which could result in an even larger search space. If we wish
+        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
+
+        :param sess:
+        :param boards: The boards to try all rolls on
+        :param player: The player of the previous ply
+        :return: An array of scores where each index describes one of the boards which was given as param
+        to this function.
+        """
+
+        import time
+
+        def gen_21_rolls():
+            """
+            Calculate all possible rolls, [[1,1], [1,2] ..]
+            :return: All possible rolls
+            """
+            a = []
+            for x in range(1, 7):
+                for y in range(1, 7):
+                    if not [x, y] in a and not [y, x] in a:
+                        a.append([x, y])
+
+            return a
+
+        all_rolls = gen_21_rolls()
+
+        all_rolls_scores = []
+
+        start = time.time()
+
+        list_of_moves = []
+
+        for idx, board in enumerate(boards):
+            list_of_moves.append([])
+            for roll in all_rolls:
+                all_states = list(Board.calculate_legal_states(board, player, roll))
+                list_of_moves[idx].append(all_states)
+
+        tmp = []
+        for board in list_of_moves:
+            all_board_moves = []
+            for roll in board:
+                for spec in roll:
+                    legal_state = np.array(self.board_trans_func(spec, player)[0])
+                    all_board_moves.append(legal_state)
+            tmp.append(np.array(all_board_moves))
+
+        # print(tmp)
+
+        for board in tmp:
+            print(self.model.predict_on_batch(board))
+
+        print(time.time() - start)
+
+        # count = 0
+        # # loop over boards
+        # for a_board in boards:
+        #     a_board_scores = []
+        #
+        #     # loop over all rolls, for each board
+        #     for roll in all_rolls:
+        #
+        #         # find all states we can get to, given the board and roll and the opposite player
+        #         all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
+        #         count += len(all_rolls_boards)
+        #         # find scores for each board found above
+        #         spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
+        #                             for new_board in all_rolls_boards]
+        #
+        #         # if the original player is the -1 player, then we need to find (1-value)
+        #         spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
+        #
+        #         # find the best score
+        #         best_score = max(spec_roll_scores)
+        #
+        #         # append the best score to a_board_scores, where we keep track of the best score for each board
+        #         a_board_scores.append(best_score)
+        #
+        #     # save the expected average of board scores
+        #     all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
+        #
+        # # return all the average scores
+        # print(count)
+        # return all_rolls_scores
+

    def calc_n_ply(self, n_init, sess, board, player, roll):
        """
@ -392,67 +493,6 @@ class Network:
        best_score_pair = boards_with_scores[np.array(scores).argmax()]
        return best_score_pair

-    def do_ply(self, sess, boards, player):
-        """
-        Calculates a single extra ply, resulting in a larger search space for our best move.
-        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
-        allowing the function to search deeper, which could result in an even larger search space. If we wish
-        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
-
-        :param sess:
-        :param boards: The boards to try all rolls on
-        :param player: The player of the previous ply
-        :return: An array of scores where each index describes one of the boards which was given as param
-        to this function.
-        """
-
-        def gen_21_rolls():
-            """
-            Calculate all possible rolls, [[1,1], [1,2] ..]
-            :return: All possible rolls
-            """
-            a = []
-            for x in range(1, 7):
-                for y in range(1, 7):
-                    if not [x, y] in a and not [y, x] in a:
-                        a.append([x, y])
-
-            return a
-
-        all_rolls = gen_21_rolls()
-
-        all_rolls_scores = []
-        count = 0
-        # loop over boards
-        for a_board in boards:
-            a_board_scores = []
-
-            # loop over all rolls, for each board
-            for roll in all_rolls:
-
-                # find all states we can get to, given the board and roll and the opposite player
-                all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
-                count += len(all_rolls_boards)
-                # find scores for each board found above
-                spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
-                                    for new_board in all_rolls_boards]
-
-                # if the original player is the -1 player, then we need to find (1-value)
-                spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
-
-                # find the best score
-                best_score = max(spec_roll_scores)
-
-                # append the best score to a_board_scores, where we keep track of the best score for each board
-                a_board_scores.append(best_score)
-
-            # save the expected average of board scores
-            all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
-
-        # return all the average scores
-        print(count)
-        return all_rolls_scores
-

    def eval(self, episode_count, trained_eps = 0):
        """
--- a/network_test.py
+++ b/network_test.py
@ -55,3 +55,4 @@ network.print_variables()

 network.save_model(2)

+network.calculate_1_ply(Board.initial_state, [3,2], 1)