from board import Board from player import Player from bot import Bot from restore_bot import RestoreBot from cup import Cup import numpy as np import sys class Game: def __init__(self, config = None): self.config = config self.board = Board.initial_state self.p1 = None self.p2 = None self.cup = Cup() def set_up_bots(self): self.p1 = Bot(1, config = self.config) self.p2 = Bot(1, config = self.config) def roll(self): return self.cup.roll() def roll_and_find_best_for_bot(self): roll = self.roll() move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll) self.board = move_and_val[0] return move_and_val def next_round(self): roll = self.roll() #print(roll) self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0]) return self.board def board_state(self): return self.board def play_against_player(self): self.board = Board.initial_state coin_flip = random.random() if coin_flip > 0.5: user_color = input("Pick a number, 1 (white) or -1 (black)") if int(user_color) == 1: p1 = player(1) p2 = bot(-1) else: p1 = bot(1) p2 = player(-1) else: p1 = bot(1) p2 = player(-1) while Board.outcome(self.board) == None: roll = self.roll() self.board = p1.make_move(self.board, p1.get_sym(), roll) roll = self.roll() self.board = p2.make_move(self.board, p2.get_sym(), roll) def train_model(self, episodes=1000, save_step_size = 100, init_ep = 0): sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) outcomes = [] for episode in range(episodes): sys.stderr.write("[TRAIN] Episode {}".format(episode + init_ep)) self.board = Board.initial_state prev_board, prev_board_value = self.roll_and_find_best_for_bot() # find the best move here, make this move, then change turn as the # first thing inside of the while loop and then call # roll_and_find_best_for_bot to get V_t+1 while Board.outcome(self.board) is None: self.next_round() cur_board, cur_board_value = self.roll_and_find_best_for_bot() self.p1.get_network().train(prev_board, cur_board_value) prev_board = cur_board # print("-"*30) # print(Board.pretty(self.board)) # print("/"*30) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) outcomes.append(Board.outcome(self.board)[1]) final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1)) self.p1.get_network().train(prev_board, final_score) sys.stderr.write("\n") if episode % min(save_step_size, episodes) == 0: sys.stderr.write("[TRAIN] Saving model...\n") self.p1.get_network().save_model() self.p2.restore_model() sys.stderr.write("[TRAIN] Saving model for final episode...\n") self.p1.get_network().save_model() self.p2.restore_model() return outcomes def next_round_test(self): print(self.board) print() self.next_round() print("--------------------------------") print(self.board) print("--------------------------------") def eval(self, init_ep = 0): def do_eval(method, episodes = 1000, init_ep = 0): sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) if method == 'random': outcomes = [] for i in range(episodes): sys.stderr.write("[EVAL ] Episode {}".format(i)) self.board = Board.initial_state while Board.outcome(self.board) is None: roll = self.roll() self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] roll = self.roll() self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) outcomes.append(Board.outcome(self.board)[1]) sys.stderr.write("\n") return outcomes else: sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) return [0] return [ (method, do_eval(method, self.config['episode_count'], init_ep = init_ep)) for method in self.config['eval_methods'] ] def play(self, episodes = 1000): outcomes = [] for i in range(episodes): self.board = Board.initial_state while Board.outcome(self.board) is None: # count += 1 # print("Turn:",count) roll = self.roll() # print("type of board: ", type(self.board)) # print("Board:",self.board) # print("{} rolled: {}".format(self.p1.get_sym(), roll)) self.board = (self.p1.make_random_move(self.board, self.p1.get_sym(), roll)) # print(self.board) # print() # count += 1 roll = self.roll() # print("{} rolled: {}".format(self.p2.get_sym(), roll)) self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) if Board.outcome(self.board)[1] > 0: print_winner = "1: White, " + str(Board.outcome(self.board)) else: print_winner = "-1: Black " + str(Board.outcome(self.board)) outcomes.append(Board.outcome(self.board)[1]) print("The winner is {}!".format(print_winner)) print("Round:",i) # print("Final board:",Board.pretty(self.board)) return outcomes # return count highest = 0