From 926a331df0644237fac11d0b22ce582de1ada00e Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Sun, 13 May 2018 23:54:13 +0200 Subject: [PATCH] Some flags from main.py is gone, rolls now allow a face_value of 0 yet again and it is possible to play against the ai. There is no flag for this yet, so this has to be added. --- board.py | 59 +++++++++++++++------- bot.py | 84 ++++++++++++++++++++----------- main.py | 18 +++---- network.py | 131 ++++++++++++++++++++++++++++-------------------- network_test.py | 20 ++++---- player.py | 64 +++++++++++++++++------ 6 files changed, 244 insertions(+), 132 deletions(-) diff --git a/board.py b/board.py index 38693c3..ede4b87 100644 --- a/board.py +++ b/board.py @@ -170,12 +170,27 @@ class Board: @staticmethod - def apply_moves_to_board(board, player, moves): - for move in moves: - from_idx, to_idx = move.split("/") - board[int(from_idx)] -= int(player) - board[int(to_idx)] += int(player) - return board + def apply_moves_to_board(board, player, move): + from_idx = move[0] + to_idx = move[1] + board = list(board) + board[from_idx] -= player + + if (to_idx < 1 or to_idx > 24): + return + + if (board[to_idx] * player == -1): + + if (player == 1): + board[25] -= player + else: + board[0] -= player + + board[to_idx] = 0 + + board[to_idx] += player + + return tuple(board) @staticmethod def calculate_legal_states(board, player, roll): @@ -186,6 +201,8 @@ class Board: # turn and then do something with the second die def calc_moves(board, face_value): + if face_value == 0: + return [board] return quack.calc_moves(board, player, face_value) # Problem with cal_moves: Method can return empty list (should always contain at least same board). @@ -200,26 +217,32 @@ class Board: if not Board.any_move_valid(board, player, roll): return { board } dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4] + #print("Permuts:",dice_permutations) # print("Dice permuts:",dice_permutations) for roll in dice_permutations: # Calculate boards resulting from first move #print("initial board: ", board) #print("roll:", roll) + #print("Rest of roll:",roll[1:]) boards = calc_moves(board, roll[0]) + #print("Boards:",boards) + #print("Roll:",roll[0]) #print("boards after first die: ", boards) for die in roll[1:]: - # Calculate boards resulting from second move - nested_boards = [calc_moves(board, die) for board in boards] - #print("nested boards: ", nested_boards) - boards = [board for boards in nested_boards for board in boards] - # What the fuck - #for board in boards: - # print(board) - # print("type__:",type(board)) - # Add resulting unique boards to set of legal boards resulting from roll + # if die != 0: + if True: + # Calculate boards resulting from second move + nested_boards = [calc_moves(board, die) for board in boards] + #print("nested boards: ", nested_boards) + boards = [board for boards in nested_boards for board in boards] + # What the fuck + #for board in boards: + # print(board) + # print("type__:",type(board)) + # Add resulting unique boards to set of legal boards resulting from roll - #print("printing boards from calculate_legal_states: ", boards) + #print("printing boards from calculate_legal_states: ", boards) legal_moves = legal_moves | set(boards) # print("legal moves: ", legal_moves) if len(legal_moves) == 0: @@ -245,9 +268,9 @@ class Board: return """ 13 14 15 16 17 18 19 20 21 22 23 24 +--------------------------------------------------------------------------+ -| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO| +| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end 1: TODO| |---|---|---|---|---|---|------------|---|---|---|---|---|---| | -| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end 1: TODO| +| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO| +--------------------------------------------------------------------------+ 12 11 10 9 8 7 6 5 4 3 2 1 """.format(*temp) diff --git a/bot.py b/bot.py index 297f203..d1d74a6 100644 --- a/bot.py +++ b/bot.py @@ -1,24 +1,8 @@ -from cup import Cup -from network import Network from board import Board -import tensorflow as tf -import numpy as np -import random - class Bot: - def __init__(self, sym, config = None, name = "unnamed"): - self.config = config - self.cup = Cup() + def __init__(self, sym): self.sym = sym - self.graph = tf.Graph() - - self.network = Network(config, name) - self.network.restore_model() - - def restore_model(self): - with self.graph.as_default(): - self.network.restore_model() def get_session(self): return self.session @@ -26,16 +10,60 @@ class Bot: def get_sym(self): return self.sym - def get_network(self): - return self.network - # TODO: DEPRECATE - def make_move(self, board, sym, roll): - # print(Board.pretty(board)) - legal_moves = Board.calculate_legal_states(board, sym, roll) - moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ] - scores = [ x[1] for x in moves_and_scores ] - best_move_pair = moves_and_scores[np.array(scores).argmax()] - #print("Found the best state, being:", np.array(move_scores).argmax()) - return best_move_pair + def calc_move_sets(self, from_board, roll, player): + board = from_board + sets = [] + total = 0 + print("board!:",board) + for r in roll: + # print("Value of r:",r) + sets.append([Board.calculate_legal_states(board, player, [r,0]), r]) + total += r + sets.append([Board.calculate_legal_states(board, player, [total,0]), total]) + return sets + + + def handle_move(self, from_board, to_board, roll, player): + + # print("Cur board:",board) + sets = self.calc_move_sets(from_board, roll, player) + for idx, board_set in enumerate(sets): + board_set[0] = list(board_set[0]) + # print("My board_set:",board_set) + if to_board in [list(c) for c in board_set[0]]: + self.total_moves -= board_set[1] + if idx < 2: + # print("Roll object:",self.roll) + self.roll[idx] = 0 + else: + self.roll = [0,0] + break + print("Total moves left:",self.total_moves) + + + def tmp_name(self, from_board, to_board, roll, player, total_moves): + sets = self.calc_move_sets(from_board, roll, player) + return_board = from_board + for idx, board_set in enumerate(sets): + board_set = list(board_set[0]) + if to_board in [list(board) for board in board_set]: + total_moves -= board_set[1] + # if it's not the sum of the moves + if idx < 2: + roll[idx] = 0 + else: + roll = [0,0] + return_board = to_board + break + return total_moves, roll, return_board + + def make_human_move(self, board, player, roll): + total_moves = roll[0] + roll[1] + previous_board = board + while total_moves != 0: + move = input("Pick a move!\n") + to_board = Board.apply_moves_to_board(previous_board, player, move) + total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves) + diff --git a/main.py b/main.py index 0631df3..a5fbf47 100644 --- a/main.py +++ b/main.py @@ -31,12 +31,8 @@ parser.add_argument('--train-perpetually', action='store_true', help='start new training session as soon as the previous is finished') parser.add_argument('--list-models', action='store_true', help='list all known models') -parser.add_argument('--force-creation', action='store_true', - help='force model creation if model does not exist') parser.add_argument('--board-rep', action='store', dest='board_rep', help='name of board representation to use as input to neural network') -parser.add_argument('--use-baseline', action='store_true', - help='use the baseline model, note, has size 28') parser.add_argument('--verbose', action='store_true', help='If set, a lot of stuff will be printed') parser.add_argument('--ply', action='store', dest='ply', default='0', @@ -46,9 +42,6 @@ parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default args = parser.parse_args() -if args.model == "baseline_model": - print("Model name 'baseline_model' not allowed") - exit() config = { 'model': args.model, @@ -64,8 +57,6 @@ config = { 'model_storage_path': 'models', 'bench_storage_path': 'bench', 'board_representation': args.board_rep, - 'force_creation': args.force_creation, - 'use_baseline': args.use_baseline, 'global_step': 0, 'verbose': args.verbose, 'ply': args.ply, @@ -87,6 +78,14 @@ if not os.path.isdir(log_path): os.mkdir(log_path) +def save_config(): + import yaml + # checkpoint_path = os.path.join(config['model_storage_path'], config['model']) + # config_path = os.path.join(checkpoint_path, 'config') + # with open(config_path, 'a+') as f: + # print("lol") + print(yaml.dump(config)) + # Define helper functions def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")): format_vars = { 'trained_eps': trained_eps, @@ -173,6 +172,7 @@ if __name__ == "__main__": # Set up network from network import Network + save_config() # Set up variables episode_count = config['episode_count'] diff --git a/network.py b/network.py index ad8e27a..00e405e 100644 --- a/network.py +++ b/network.py @@ -9,6 +9,7 @@ from eval import Eval import glob from operator import itemgetter import tensorflow.contrib.eager as tfe +from player import Player class Network: # board_features_quack has size 28 @@ -562,6 +563,28 @@ class Network: return outcomes + def play_against_network(self): + self.restore_model() + human_player = Player(-1) + cur_player = 1 + player = 1 + board = Board.initial_state + i = 0 + while Board.outcome(board) is None: + print(Board.pretty(board)) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + print("Bot rolled:", roll) + + board, _ = self.make_move(board, roll, player) + print(Board.pretty(board)) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + print("You rolled:", roll) + board = human_player.make_human_move(board, roll) + print("DONE "*10) + print(Board.pretty(board)) + + + def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): """ @@ -570,79 +593,79 @@ class Network: :param trained_eps: :return: """ - with tf.Session() as sess: - difference_in_vals = 0 - self.restore_model() + difference_in_vals = 0 - start_time = time.time() + self.restore_model() - def print_time_estimate(eps_completed): - cur_time = time.time() - time_diff = cur_time - start_time - eps_per_sec = eps_completed / time_diff - secs_per_ep = time_diff / eps_completed - eps_remaining = (episodes - eps_completed) - sys.stderr.write( - "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) - sys.stderr.write( - "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( - eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) + start_time = time.time() - sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) - outcomes = [] - for episode in range(1, episodes + 1): + def print_time_estimate(eps_completed): + cur_time = time.time() + time_diff = cur_time - start_time + eps_per_sec = eps_completed / time_diff + secs_per_ep = time_diff / eps_completed + eps_remaining = (episodes - eps_completed) + sys.stderr.write( + "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) + sys.stderr.write( + "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( + eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) - sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) - # TODO decide which player should be here + sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) + outcomes = [] + for episode in range(1, episodes + 1): - player = 1 - prev_board = Board.initial_state - i = 0 - while Board.outcome(prev_board) is None: - i += 1 - self.global_step += 1 + sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) + # TODO decide which player should be here + + player = 1 + prev_board = Board.initial_state + i = 0 + while Board.outcome(prev_board) is None: + i += 1 + self.global_step += 1 - cur_board, cur_board_value = self.make_move(prev_board, - (random.randrange(1, 7), random.randrange(1, 7)), - player) + cur_board, cur_board_value = self.make_move(prev_board, + (random.randrange(1, 7), random.randrange(1, 7)), + player) - difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player)))) + difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player)))) - if self.config['verbose']: - print("Difference in values:", difference_in_vals) - print("Current board value :", cur_board_value) - print("Current board is :\n",cur_board) + if self.config['verbose']: + print("Difference in values:", difference_in_vals) + print("Current board value :", cur_board_value) + print("Current board is :\n",cur_board) - # adjust weights - if Board.outcome(cur_board) is None: - self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value) - player *= -1 + # adjust weights + if Board.outcome(cur_board) is None: + self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value) + player *= -1 - prev_board = cur_board + prev_board = cur_board - final_board = prev_board - sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i)) - outcomes.append(Board.outcome(final_board)[1]) - final_score = np.array([Board.outcome(final_board)[1]]) - scaled_final_score = ((final_score + 2) / 4) + final_board = prev_board + sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i)) + outcomes.append(Board.outcome(final_board)[1]) + final_score = np.array([Board.outcome(final_board)[1]]) + scaled_final_score = ((final_score + 2) / 4) - self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) + self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) - sys.stderr.write("\n") + sys.stderr.write("\n") - if episode % min(save_step_size, episodes) == 0: - sys.stderr.write("[TRAIN] Saving model...\n") - self.save_model(episode + trained_eps) + if episode % min(save_step_size, episodes) == 0: + sys.stderr.write("[TRAIN] Saving model...\n") + self.save_model(episode + trained_eps) - if episode % 50 == 0: - print_time_estimate(episode) + if episode % 50 == 0: + print_time_estimate(episode) - sys.stderr.write("[TRAIN] Saving model for final episode...\n") - self.save_model(episode+trained_eps) + sys.stderr.write("[TRAIN] Saving model for final episode...\n") + self.save_model(episode+trained_eps) - return outcomes, difference_in_vals[0][0] + return outcomes, difference_in_vals[0][0] diff --git a/network_test.py b/network_test.py index a4d8dda..1bcb878 100644 --- a/network_test.py +++ b/network_test.py @@ -9,8 +9,8 @@ from board import Board import main config = main.config.copy() -config['model'] = "eager_testings" -config['force_creation'] = True +config['model'] = "player_testings" +config['ply'] = "1" config['board_representation'] = 'quack-fat' network = Network(config, config['model']) @@ -40,19 +40,21 @@ boards = {initial_state, -board = network.board_trans_func(Board.initial_state, 1) +# board = network.board_trans_func(Board.initial_state, 1) -pair = network.make_move(Board.initial_state, [3,2], 1) +# pair = network.make_move(Board.initial_state, [3,2], 1) -print(pair[1]) +# print(pair[1]) -network.do_backprop(board, 0.9) +# network.do_backprop(board, 0.9) -network.print_variables() +# network.print_variables() -network.save_model(2) +# network.save_model(2) -print(network.calculate_1_ply(Board.initial_state, [3,2], 1)) \ No newline at end of file +# print(network.calculate_1_ply(Board.initial_state, [3,2], 1)) + +network.play_against_network() \ No newline at end of file diff --git a/player.py b/player.py index 596449f..4208cdd 100644 --- a/player.py +++ b/player.py @@ -11,19 +11,55 @@ class Player: def get_sym(self): return self.sym - def make_move(self, board, sym, roll): - print(Board.pretty(board)) - legal_moves = Board.calculate_legal_states(board, sym, roll) - if roll[0] == roll[1]: - print("Example of move: 4/6,6/8,12/14,13/15") - else: - print("Example of move: 4/6,13/17") + def calc_move_sets(self, from_board, roll, player): + board = from_board + sets = [] + total = 0 + for r in roll: + # print("Value of r:",r) + sets.append([Board.calculate_legal_states(board, player, [r,0]), r]) + total += r + sets.append([Board.calculate_legal_states(board, player, [total,0]), total]) + return sets - user_moves = input("Enter your move: ").strip().split(",") - board = Board.apply_moves_to_board(board, sym, user_moves) - while board not in legal_moves: - print("Move is invalid, please enter a new move") - user_moves = input("Enter your move: ").strip().split(",") - board = Board.apply_moves_to_board(board, sym, user_moves) - return board + def tmp_name(self, from_board, to_board, roll, player, total_moves): + sets = self.calc_move_sets(from_board, roll, player) + return_board = from_board + for idx, board_set in enumerate(sets): + + board_set[0] = list(board_set[0]) + print(to_board) + print(board_set) + if to_board in board_set[0]: + total_moves -= board_set[1] + # if it's not the sum of the moves + if idx < 2: + roll[idx] = 0 + else: + roll = [0,0] + return_board = to_board + break + return total_moves, roll, return_board + + def make_human_move(self, board, roll): + total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4 + move = "" + while total_moves != 0: + while True: + print("You have {roll} left!".format(roll=total_moves)) + move = input("Pick a move!\n") + pot_move = move.split("/") + if len(pot_move) == 2: + try: + pot_move[0] = int(pot_move[0]) + pot_move[1] = int(pot_move[1]) + move = pot_move + break; + except TypeError: + print("The correct syntax is: 2/5 for a move from index 2 to 5.") + + to_board = Board.apply_moves_to_board(board, self.get_sym(), move) + total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves) + print(Board.pretty(board)) + return board \ No newline at end of file