diff --git a/network.py b/network.py index c46f291..d84036f 100644 --- a/network.py +++ b/network.py @@ -232,7 +232,7 @@ class Network: return best_pair - def calculate_1_ply(self, sess, board, roll, player): + def calculate_1_ply(self, board, roll, player): """ Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an exhaustive search is performed on the best 15 moves from the single ply. @@ -248,21 +248,122 @@ class Network: # find all legal states from the given board and the given roll init_legal_states = Board.calculate_legal_states(board, player, roll) - # find all values for the above boards - zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states] - # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck. - best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1) + legal_moves = list(Board.calculate_legal_states(board, player, roll)) - best_fifteen_boards = [x[0] for x in best_fifteen[:10]] + legal_states = [list(tmp) for tmp in legal_moves] - all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player) + legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states]) + + scores = self.calc_vals(legal_states) + scores = [score.numpy() for score in scores] + + moves_and_scores = list(zip(init_legal_states, scores)) + + sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1) + + best_boards = [x[0] for x in sorted_moves_and_scores] - best_score_index = np.array(all_rolls_scores).argmax() - best_board = best_fifteen_boards[best_score_index] - return [best_board, max(all_rolls_scores)] + self.do_ply(best_boards, player) + + + #best_score_index = np.array(all_rolls_scores).argmax() + #best_board = best_fifteen_boards[best_score_index] + + #return [best_board, max(all_rolls_scores)] + + def do_ply(self, boards, player): + """ + Calculates a single extra ply, resulting in a larger search space for our best move. + This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than + allowing the function to search deeper, which could result in an even larger search space. If we wish + to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply. + + :param sess: + :param boards: The boards to try all rolls on + :param player: The player of the previous ply + :return: An array of scores where each index describes one of the boards which was given as param + to this function. + """ + + import time + + def gen_21_rolls(): + """ + Calculate all possible rolls, [[1,1], [1,2] ..] + :return: All possible rolls + """ + a = [] + for x in range(1, 7): + for y in range(1, 7): + if not [x, y] in a and not [y, x] in a: + a.append([x, y]) + + return a + + all_rolls = gen_21_rolls() + + all_rolls_scores = [] + + start = time.time() + + list_of_moves = [] + + for idx, board in enumerate(boards): + list_of_moves.append([]) + for roll in all_rolls: + all_states = list(Board.calculate_legal_states(board, player, roll)) + list_of_moves[idx].append(all_states) + + tmp = [] + for board in list_of_moves: + all_board_moves = [] + for roll in board: + for spec in roll: + legal_state = np.array(self.board_trans_func(spec, player)[0]) + all_board_moves.append(legal_state) + tmp.append(np.array(all_board_moves)) + + # print(tmp) + + for board in tmp: + print(self.model.predict_on_batch(board)) + + print(time.time() - start) + + # count = 0 + # # loop over boards + # for a_board in boards: + # a_board_scores = [] + # + # # loop over all rolls, for each board + # for roll in all_rolls: + # + # # find all states we can get to, given the board and roll and the opposite player + # all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) + # count += len(all_rolls_boards) + # # find scores for each board found above + # spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1)) + # for new_board in all_rolls_boards] + # + # # if the original player is the -1 player, then we need to find (1-value) + # spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores] + # + # # find the best score + # best_score = max(spec_roll_scores) + # + # # append the best score to a_board_scores, where we keep track of the best score for each board + # a_board_scores.append(best_score) + # + # # save the expected average of board scores + # all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores)) + # + # # return all the average scores + # print(count) + # return all_rolls_scores + def calc_n_ply(self, n_init, sess, board, player, roll): """ @@ -392,67 +493,6 @@ class Network: best_score_pair = boards_with_scores[np.array(scores).argmax()] return best_score_pair - def do_ply(self, sess, boards, player): - """ - Calculates a single extra ply, resulting in a larger search space for our best move. - This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than - allowing the function to search deeper, which could result in an even larger search space. If we wish - to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply. - - :param sess: - :param boards: The boards to try all rolls on - :param player: The player of the previous ply - :return: An array of scores where each index describes one of the boards which was given as param - to this function. - """ - - def gen_21_rolls(): - """ - Calculate all possible rolls, [[1,1], [1,2] ..] - :return: All possible rolls - """ - a = [] - for x in range(1, 7): - for y in range(1, 7): - if not [x, y] in a and not [y, x] in a: - a.append([x, y]) - - return a - - all_rolls = gen_21_rolls() - - all_rolls_scores = [] - count = 0 - # loop over boards - for a_board in boards: - a_board_scores = [] - - # loop over all rolls, for each board - for roll in all_rolls: - - # find all states we can get to, given the board and roll and the opposite player - all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) - count += len(all_rolls_boards) - # find scores for each board found above - spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1)) - for new_board in all_rolls_boards] - - # if the original player is the -1 player, then we need to find (1-value) - spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores] - - # find the best score - best_score = max(spec_roll_scores) - - # append the best score to a_board_scores, where we keep track of the best score for each board - a_board_scores.append(best_score) - - # save the expected average of board scores - all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores)) - - # return all the average scores - print(count) - return all_rolls_scores - def eval(self, episode_count, trained_eps = 0): """ diff --git a/network_test.py b/network_test.py index 4f64612..243d2df 100644 --- a/network_test.py +++ b/network_test.py @@ -55,3 +55,4 @@ network.print_variables() network.save_model(2) +network.calculate_1_ply(Board.initial_state, [3,2], 1) \ No newline at end of file