All values for boards and all rolls can now be calculated

2018-05-10 18:41:21 +02:00 · 2018-05-10 18:41:21 +02:00 · 396d5b036d
commit 396d5b036d
parent 4efb229d34
2 changed files with 112 additions and 71 deletions
--- a/network.py
+++ b/network.py
@ -232,7 +232,7 @@ class Network:
        return best_pair
-    def calculate_1_ply(self, sess, board, roll, player):
+    def calculate_1_ply(self, board, roll, player):
        """
        Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
        exhaustive search is performed on the best 15 moves from the single ply.
@ -248,21 +248,122 @@ class Network:
        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
        # find all values for the above boards
        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
-        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
+        legal_moves = list(Board.calculate_legal_states(board, player, roll))
        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
-        best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
+        legal_states = [list(tmp) for tmp in legal_moves]
-        all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
+        legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states])
        scores = self.calc_vals(legal_states)
        scores = [score.numpy() for score in scores]
        moves_and_scores = list(zip(init_legal_states, scores))
        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
        best_boards = [x[0] for x in sorted_moves_and_scores]
        best_score_index = np.array(all_rolls_scores).argmax()
        best_board = best_fifteen_boards[best_score_index]
-        return [best_board, max(all_rolls_scores)]
+        self.do_ply(best_boards, player)
        #best_score_index = np.array(all_rolls_scores).argmax()
        #best_board = best_fifteen_boards[best_score_index]
        #return [best_board, max(all_rolls_scores)]
    def do_ply(self, boards, player):
        """
        Calculates a single extra ply, resulting in a larger search space for our best move.
        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
        :param sess:
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
        to this function.
        """
        import time
        def gen_21_rolls():
            """
            Calculate all possible rolls, [[1,1], [1,2] ..]
            :return: All possible rolls
            """
            a = []
            for x in range(1, 7):
                for y in range(1, 7):
                    if not [x, y] in a and not [y, x] in a:
                        a.append([x, y])
            return a
        all_rolls = gen_21_rolls()
        all_rolls_scores = []
        start = time.time()
        list_of_moves = []
        for idx, board in enumerate(boards):
            list_of_moves.append([])
            for roll in all_rolls:
                all_states = list(Board.calculate_legal_states(board, player, roll))
                list_of_moves[idx].append(all_states)
        tmp = []
        for board in list_of_moves:
            all_board_moves = []
            for roll in board:
                for spec in roll:
                    legal_state = np.array(self.board_trans_func(spec, player)[0])
                    all_board_moves.append(legal_state)
            tmp.append(np.array(all_board_moves))
        # print(tmp)
        for board in tmp:
            print(self.model.predict_on_batch(board))
        print(time.time() - start)
        # count = 0
        # # loop over boards
        # for a_board in boards:
        #     a_board_scores = []
        #
        #     # loop over all rolls, for each board
        #     for roll in all_rolls:
        #
        #         # find all states we can get to, given the board and roll and the opposite player
        #         all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
        #         count += len(all_rolls_boards)
        #         # find scores for each board found above
        #         spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
        #                             for new_board in all_rolls_boards]
        #
        #         # if the original player is the -1 player, then we need to find (1-value)
        #         spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
        #
        #         # find the best score
        #         best_score = max(spec_roll_scores)
        #
        #         # append the best score to a_board_scores, where we keep track of the best score for each board
        #         a_board_scores.append(best_score)
        #
        #     # save the expected average of board scores
        #     all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
        #
        # # return all the average scores
        # print(count)
        # return all_rolls_scores
    def calc_n_ply(self, n_init, sess, board, player, roll):
        """
@ -392,67 +493,6 @@ class Network:
        best_score_pair = boards_with_scores[np.array(scores).argmax()]
        return best_score_pair
    def do_ply(self, sess, boards, player):
        """
        Calculates a single extra ply, resulting in a larger search space for our best move.
        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
        :param sess:
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
        to this function.
        """
        def gen_21_rolls():
            """
            Calculate all possible rolls, [[1,1], [1,2] ..]
            :return: All possible rolls
            """
            a = []
            for x in range(1, 7):
                for y in range(1, 7):
                    if not [x, y] in a and not [y, x] in a:
                        a.append([x, y])
            return a
        all_rolls = gen_21_rolls()
        all_rolls_scores = []
        count = 0
        # loop over boards
        for a_board in boards:
            a_board_scores = []
            # loop over all rolls, for each board
            for roll in all_rolls:
                # find all states we can get to, given the board and roll and the opposite player
                all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
                count += len(all_rolls_boards)
                # find scores for each board found above
                spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
                                    for new_board in all_rolls_boards]
                # if the original player is the -1 player, then we need to find (1-value)
                spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
                # find the best score
                best_score = max(spec_roll_scores)
                # append the best score to a_board_scores, where we keep track of the best score for each board
                a_board_scores.append(best_score)
            # save the expected average of board scores
            all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
        # return all the average scores
        print(count)
        return all_rolls_scores
    def eval(self, episode_count, trained_eps = 0):
        """
--- a/network_test.py
+++ b/network_test.py
@ -55,3 +55,4 @@ network.print_variables()
 network.save_model(2)
 network.calculate_1_ply(Board.initial_state, [3,2], 1)
`@ -55,3 +55,4 @@ network.print_variables()`

	`network.save_model(2)`	`network.save_model(2)`

		`network.calculate_1_ply(Board.initial_state, [3,2], 1)`