diff --git a/network.py b/network.py index 9c0e1db..f4be4c0 100644 --- a/network.py +++ b/network.py @@ -157,7 +157,7 @@ class Network: exit() - def make_move(self, sess, board, roll, player): + #def make_move(self, sess, board, roll, player): """ Find the best move given a board, roll and a player, by finding all possible states one can go to and then picking the best, by using the network to evaluate each state. The highest score is picked @@ -169,24 +169,28 @@ class Network: :param player: Current player :return: A pair of the best state to go to, together with the score of that state """ - legal_moves = Board.calculate_legal_states(board, player, roll) - moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves] - scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores] - best_score_index = np.array(scores).argmax() - best_move_pair = moves_and_scores[best_score_index] - return best_move_pair + # legal_moves = Board.calculate_legal_states(board, player, roll) + # moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves] + # scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores] + # best_score_index = np.array(scores).argmax() + # best_move_pair = moves_and_scores[best_score_index] + # return best_move_pair + + def make_move(self, sess, board, roll, player, n = 1): + best_pair = self.calc_n_ply(n, sess, board, player, roll) + return best_pair - def calculate_2_ply(self, sess, board, roll, player): + def calculate_1_ply(self, sess, board, roll, player): """ - Find the best move based on a 2-ply look-ahead. First the best move is found for a single ply and then an + Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an exhaustive search is performed on the best 15 moves from the single ply. :param sess: :param board: :param roll: The original roll :param player: The current player - :return: Best possible move based on 2-ply look-ahead + :return: Best possible move based on 1-ply look-ahead """ @@ -205,7 +209,7 @@ class Network: if player == 1: best_fifteen.reverse() - best_fifteen_boards = [x[0] for x in best_fifteen[:15]] + best_fifteen_boards = [x[0] for x in best_fifteen[:10]] all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player) @@ -215,6 +219,29 @@ class Network: return [best_board, max(all_rolls_scores)] + def calc_n_ply(self, n_init, sess, board, player, roll): + + # find all legal states from the given board and the given roll + init_legal_states = Board.calculate_legal_states(board, player, roll) + + # find all values for the above boards + zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states] + + # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck. + sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1)) + + + # They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since + # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize. + if player == 1: + sorted_moves_and_scores.reverse() + + best_boards = [x[0] for x in sorted_moves_and_scores[:10]] + + best_move_score_pair = self.n_ply(n_init, sess, best_boards, player) + + return best_move_score_pair + def n_ply(self, n_init, sess, boards_init, player_init): def ply(n, boards, player): @@ -262,7 +289,6 @@ class Network: if n == 1: - print("blalhlalha") average_score_pairs = [ (board, average_ply_score(board)) for board in boards ] @@ -301,12 +327,13 @@ class Network: if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit() boards_with_scores = ply(n_init, boards_init, -1 * player_init) - print(boards_with_scores) + #print("Boards with scores:",boards_with_scores) scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) ) for pair in boards_with_scores ] + #print("All the scores:",scores) best_score_pair = boards_with_scores[np.array(scores).argmax()] - return best_score_pair[0] + return best_score_pair def do_ply(self, sess, boards, player): """ diff --git a/network_test.py b/network_test.py index bc948c3..a514dfc 100644 --- a/network_test.py +++ b/network_test.py @@ -80,13 +80,14 @@ def calculate_possible_states(board): #for board in boards: # calculate_possible_states(board) -print("-"*30) -print(network.do_ply(session, boards, 1)) +#print("-"*30) +#print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1)) #print(" "*10 + "network_test") -#print(" "*20 + "Depth 1") -scores = network.n_ply(1, session, boards, 1) +print(" "*20 + "Depth 1") +print(network.calc_n_ply(2, session, Board.initial_state, 1, [2, 4])) +#print(scores) #print(" "*20 + "Depth 2") #print(network.n_ply(2, session, boards, 1))