from board import Board from player import Player from bot import Bot from restore_bot import RestoreBot from cup import Cup from eval import Eval import numpy as np import sys import time class Game: def __init__(self, config = None): self.config = config self.board = Board.initial_state self.p1 = None self.p2 = None # TODO remove this self.cup = Cup() def set_up_bots(self): self.p1 = Bot(1, config = self.config) self.p2 = Bot(1, config = self.config) def roll(self): return self.cup.roll() def best_move_and_score(self): roll = self.roll() move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll) self.board = move_and_val[0] return move_and_val def next_round(self): roll = self.roll() #print(roll) self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0]) return self.board def board_state(self): return self.board def play_against_player(self): self.board = Board.initial_state coin_flip = random.random() if coin_flip > 0.5: user_color = input("Pick a number, 1 (white) or -1 (black)") if int(user_color) == 1: p1 = Player(1) p2 = Bot(-1) else: p1 = Bot(1) p2 = Player(-1) else: p1 = Bot(1) p2 = Player(-1) # Since we have to make sure that the Bot always plays as if it's white, we have to flip # the board when it's not actually. if p1.__name__ == "Bot" and p1.get_sym() == 1: while Board.outcome(self.board) == None: roll = self.roll() self.board = p1.make_move(self.board, p1.get_sym(), roll) roll = self.roll() self.board = p2.make_move(self.board, p2.get_sym(), roll) if p1.__name__ == "Bot" and p1.get_sym() == -1: while Board.outcome(self.board) == None: roll = self.roll() self.board = Board.flip(p1.make_move(Board.flip(self.board), p1.get_sym(), roll)) roll = self.roll() self.board = p2.make_move(self.board, p2.get_sym(), roll) if p2.__name__ == "Bot" and p1.get_sym() == -1: while Board.outcome(self.board) == None: roll = self.roll() self.board = p1.make_move(self.board, p1.get_sym(), roll) roll = self.roll() self.board = Board.flip(p2.make_move(Board.flip(self.board), p2.get_sym(), roll)) print(Board.outcome(self.board)) def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0): start_time = time.time() def print_time_estimate(eps_completed): cur_time = time.time() time_diff = cur_time - start_time eps_per_sec = eps_completed / time_diff secs_per_ep = time_diff / eps_completed eps_remaining = (episodes - eps_completed) sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) outcomes = [] for episode in range(1, episodes + 1): sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) self.board = Board.initial_state prev_board, prev_board_value = self.best_move_and_score() # find the best move here, make this move, then change turn as the # first thing inside of the while loop and then call # best_move_and_score to get V_t+1 while Board.outcome(self.board) is None: self.next_round() cur_board, cur_board_value = self.best_move_and_score() self.p1.get_network().train(prev_board, cur_board_value) prev_board = cur_board # print("-"*30) # print(Board.pretty(self.board)) # print("/"*30) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) outcomes.append(Board.outcome(self.board)[1]) final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1)) self.p1.get_network().train(prev_board, final_score) sys.stderr.write("\n") if episode % min(save_step_size, episodes) == 0: sys.stderr.write("[TRAIN] Saving model...\n") self.p1.get_network().save_model(episode+trained_eps) sys.stderr.write("[TRAIN] Loading model for training opponent...\n") self.p2.restore_model() if episode % 50 == 0: print_time_estimate(episode) sys.stderr.write("[TRAIN] Saving model for final episode...\n") self.p1.get_network().save_model(episode+trained_eps) self.p2.restore_model() return outcomes def next_round_test(self): print(self.board) print() self.next_round() print("--------------------------------") print(self.board) print("--------------------------------") def eval(self, trained_eps = 0): def do_eval(method, episodes = 1000, trained_eps = 0): start_time = time.time() def print_time_estimate(eps_completed): cur_time = time.time() time_diff = cur_time - start_time eps_per_sec = eps_completed / time_diff secs_per_ep = time_diff / eps_completed eps_remaining = (episodes - eps_completed) sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) if method == 'random': outcomes = [] for i in range(1, episodes + 1): sys.stderr.write("[EVAL ] Episode {}".format(i)) self.board = Board.initial_state while Board.outcome(self.board) is None: roll = self.roll() self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] roll = self.roll() self.board = Board.flip(Eval.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) outcomes.append(Board.outcome(self.board)[1]) sys.stderr.write("\n") if i % 50 == 0: print_time_estimate(i) return outcomes elif method == 'pubeval': outcomes = [] # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval for i in range(1, episodes + 1): sys.stderr.write("[EVAL ] Episode {}".format(i)) self.board = Board.initial_state while Board.outcome(self.board) is None: roll = self.roll() self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] roll = self.roll() self.board = Board.flip(Eval.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26]) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) outcomes.append(Board.outcome(self.board)[1]) sys.stderr.write("\n") if i % 10 == 0: print_time_estimate(i) return outcomes else: sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) return [0] return [ (method, do_eval(method, self.config['episode_count'], trained_eps = trained_eps)) for method in self.config['eval_methods'] ] def play(self, episodes = 1000): outcomes = [] for i in range(episodes): self.board = Board.initial_state while Board.outcome(self.board) is None: # count += 1 # print("Turn:",count) roll = self.roll() # print("type of board: ", type(self.board)) # print("Board:",self.board) # print("{} rolled: {}".format(self.p1.get_sym(), roll)) self.board = (self.p1.make_random_move(self.board, self.p1.get_sym(), roll)) # print(self.board) # print() # count += 1 roll = self.roll() # print("{} rolled: {}".format(self.p2.get_sym(), roll)) self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) if Board.outcome(self.board)[1] > 0: print_winner = "1: White, " + str(Board.outcome(self.board)) else: print_winner = "-1: Black " + str(Board.outcome(self.board)) outcomes.append(Board.outcome(self.board)[1]) print("The winner is {}!".format(print_winner)) print("Round:",i) # print("Final board:",Board.pretty(self.board)) return outcomes # return count highest = 0