diff --git a/game.py b/game.py index b21d047..9469b57 100644 --- a/game.py +++ b/game.py @@ -3,12 +3,8 @@ from player import Player from bot import Bot from restore_bot import RestoreBot from cup import Cup -from eval import Eval import numpy as np -import sys -import time -import os # for path join class Game: @@ -91,106 +87,6 @@ class Game: print(self.board) print("--------------------------------") - def eval(self, trained_eps = 0): - def do_eval(method, episodes = 1000, trained_eps = 0): - start_time = time.time() - - def print_time_estimate(eps_completed): - cur_time = time.time() - time_diff = cur_time - start_time - eps_per_sec = eps_completed / time_diff - secs_per_ep = time_diff / eps_completed - eps_remaining = (episodes - eps_completed) - sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) - sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) - - sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) - - if method == 'random': - outcomes = [] - for i in range(1, episodes + 1): - sys.stderr.write("[EVAL ] Episode {}".format(i)) - self.board = Board.initial_state - while Board.outcome(self.board) is None: - roll = self.roll() - self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] - roll = self.roll() - self.board = Board.flip(Eval.make_random_move(Board.flip(self.board), 1, roll)) - sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) - outcomes.append(Board.outcome(self.board)[1]) - sys.stderr.write("\n") - - if i % 50 == 0: - print_time_estimate(i) - return outcomes - elif method == 'pubeval': - outcomes = [] - # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval - for i in range(1, episodes + 1): - sys.stderr.write("[EVAL ] Episode {}".format(i)) - self.board = Board.initial_state - #print("init:", self.board, sep="\n") - while Board.outcome(self.board) is None: - #print("-"*30) - roll = self.roll() - #print(roll) - - prev_board = tuple(self.board) - self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] - #print("post p1:", self.board, sep="\n") - - #print("."*30) - roll = self.roll() - #print(roll) - - prev_board = tuple(self.board) - self.board = Eval.make_pubeval_move(self.board, -1, roll)[0][0:26] - #print("post pubeval:", self.board, sep="\n") - - - #print("*"*30) - #print(self.board) - #print("+"*30) - sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) - outcomes.append(Board.outcome(self.board)[1]) - sys.stderr.write("\n") - - if i % 10 == 0: - print_time_estimate(i) - - return outcomes - elif method == 'dumbmodel': - config_prime = self.config.copy() - config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel') - eval_bot = Bot(1, config = config_prime, name = "dumbmodel") - #print(self.config, "\n", config_prime) - outcomes = [] - for i in range(1, episodes + 1): - sys.stderr.write("[EVAL ] Episode {}".format(i)) - self.board = Board.initial_state - while Board.outcome(self.board) is None: - roll = self.roll() - self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] - - roll = self.roll() - self.board = Board.flip(eval_bot.make_move(Board.flip(self.board), self.p1.get_sym(), roll)[0]) - sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) - outcomes.append(Board.outcome(self.board)[1]) - sys.stderr.write("\n") - - if i % 50 == 0: - print_time_estimate(i) - return outcomes - else: - sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) - return [0] - - return [ (method, do_eval(method, - self.config['episode_count'], - trained_eps = trained_eps)) - for method - in self.config['eval_methods'] ] - def play(self, episodes = 1000): outcomes = [] for i in range(episodes): diff --git a/main.py b/main.py index 5213958..8d42e2b 100644 --- a/main.py +++ b/main.py @@ -87,14 +87,6 @@ if not os.path.isdir(log_path): os.mkdir(log_path) -# Set up network -from network import Network - - -# Set up variables -episode_count = config['episode_count'] - - # Do actions specified by command-line if args.list_models: def get_eps_trained(folder): @@ -108,22 +100,30 @@ if args.list_models: sys.stderr.write("Found {} model(s)\n".format(len(models))) for model in models: sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1])) + + exit() + +# Set up network +from network import Network +network = Network(config, config['model']) +eps = config['start_episode'] + +# Set up variables +episode_count = config['episode_count'] -elif args.train: - network = Network(config, config['model']) - eps = config['start_episode'] +if args.train: while True: train_outcome = network.train_model(episodes = episode_count, trained_eps = eps) eps += episode_count log_train_outcome(train_outcome, trained_eps = eps) if config['eval_after_train']: - eval_outcomes = g.eval(trained_eps = eps) + eval_outcomes = network.eval(trained_eps = eps) log_eval_outcomes(eval_outcomes, trained_eps = eps) if not config['train_perpetually']: break elif args.eval: eps = config['start_episode'] - outcomes = g.eval() + outcomes = network.eval() log_eval_outcomes(outcomes, trained_eps = eps) #elif args.play: # g.play(episodes = episode_count) diff --git a/network.py b/network.py index 30b54b8..62b1d17 100644 --- a/network.py +++ b/network.py @@ -6,6 +6,7 @@ import os import time import sys import random +from eval import Eval class Network: hidden_size = 40 @@ -240,3 +241,104 @@ class Network: # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it! + + + def eval(self, trained_eps = 0): + def do_eval(method, episodes = 1000, trained_eps = 0): + start_time = time.time() + + def print_time_estimate(eps_completed): + cur_time = time.time() + time_diff = cur_time - start_time + eps_per_sec = eps_completed / time_diff + secs_per_ep = time_diff / eps_completed + eps_remaining = (episodes - eps_completed) + sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) + sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) + + sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) + + if method == 'random': + outcomes = [] + for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + while Board.outcome(board) is None: + roll = (random.randrange(1,7), random.randrange(1,7)) + board = (self.p1.make_move(board, self.p1.get_sym(), roll))[0] + roll = (random.randrange(1,7), random.randrange(1,7)) + board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll)) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") + + if i % 50 == 0: + print_time_estimate(i) + return outcomes + elif method == 'pubeval': + outcomes = [] + # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval + for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + #print("init:", board, sep="\n") + while Board.outcome(board) is None: + #print("-"*30) + roll = (random.randrange(1,7), random.randrange(1,7)) + #print(roll) + + prev_board = tuple(board) + board = (self.make_move(board, roll))[0] + #print("post p1:", board, sep="\n") + + #print("."*30) + roll = (random.randrange(1,7), random.randrange(1,7)) + #print(roll) + + prev_board = tuple(board) + board = Eval.make_pubeval_move(board, -1, roll)[0][0:26] + #print("post pubeval:", board, sep="\n") + + + #print("*"*30) + #print(board) + #print("+"*30) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") + + if i % 10 == 0: + print_time_estimate(i) + + return outcomes + # elif method == 'dumbmodel': + # config_prime = self.config.copy() + # config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel') + # eval_bot = Bot(1, config = config_prime, name = "dumbmodel") + # #print(self.config, "\n", config_prime) + # outcomes = [] + # for i in range(1, episodes + 1): + # sys.stderr.write("[EVAL ] Episode {}".format(i)) + # board = Board.initial_state + # while Board.outcome(board) is None: + # roll = (random.randrange(1,7), random.randrange(1,7)) + # board = (self.make_move(board, self.p1.get_sym(), roll))[0] + + # roll = (random.randrange(1,7), random.randrange(1,7)) + # board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0]) + # sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + # outcomes.append(Board.outcome(board)[1]) + # sys.stderr.write("\n") + + # if i % 50 == 0: + # print_time_estimate(i) + # return outcomes + else: + sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) + return [0] + + return [ (method, do_eval(method, + self.config['episode_count'], + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ]