move evaluation code into network.py
This commit is contained in:
parent
99783ee4f8
commit
b7e6dd10af
104
game.py
104
game.py
|
@ -3,12 +3,8 @@ from player import Player
|
||||||
from bot import Bot
|
from bot import Bot
|
||||||
from restore_bot import RestoreBot
|
from restore_bot import RestoreBot
|
||||||
from cup import Cup
|
from cup import Cup
|
||||||
from eval import Eval
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import os # for path join
|
|
||||||
|
|
||||||
class Game:
|
class Game:
|
||||||
|
|
||||||
|
@ -91,106 +87,6 @@ class Game:
|
||||||
print(self.board)
|
print(self.board)
|
||||||
print("--------------------------------")
|
print("--------------------------------")
|
||||||
|
|
||||||
def eval(self, trained_eps = 0):
|
|
||||||
def do_eval(method, episodes = 1000, trained_eps = 0):
|
|
||||||
start_time = time.time()
|
|
||||||
|
|
||||||
def print_time_estimate(eps_completed):
|
|
||||||
cur_time = time.time()
|
|
||||||
time_diff = cur_time - start_time
|
|
||||||
eps_per_sec = eps_completed / time_diff
|
|
||||||
secs_per_ep = time_diff / eps_completed
|
|
||||||
eps_remaining = (episodes - eps_completed)
|
|
||||||
sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
|
|
||||||
sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
|
|
||||||
|
|
||||||
sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
|
|
||||||
|
|
||||||
if method == 'random':
|
|
||||||
outcomes = []
|
|
||||||
for i in range(1, episodes + 1):
|
|
||||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
|
||||||
self.board = Board.initial_state
|
|
||||||
while Board.outcome(self.board) is None:
|
|
||||||
roll = self.roll()
|
|
||||||
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
|
||||||
roll = self.roll()
|
|
||||||
self.board = Board.flip(Eval.make_random_move(Board.flip(self.board), 1, roll))
|
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
|
||||||
sys.stderr.write("\n")
|
|
||||||
|
|
||||||
if i % 50 == 0:
|
|
||||||
print_time_estimate(i)
|
|
||||||
return outcomes
|
|
||||||
elif method == 'pubeval':
|
|
||||||
outcomes = []
|
|
||||||
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
|
|
||||||
for i in range(1, episodes + 1):
|
|
||||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
|
||||||
self.board = Board.initial_state
|
|
||||||
#print("init:", self.board, sep="\n")
|
|
||||||
while Board.outcome(self.board) is None:
|
|
||||||
#print("-"*30)
|
|
||||||
roll = self.roll()
|
|
||||||
#print(roll)
|
|
||||||
|
|
||||||
prev_board = tuple(self.board)
|
|
||||||
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
|
||||||
#print("post p1:", self.board, sep="\n")
|
|
||||||
|
|
||||||
#print("."*30)
|
|
||||||
roll = self.roll()
|
|
||||||
#print(roll)
|
|
||||||
|
|
||||||
prev_board = tuple(self.board)
|
|
||||||
self.board = Eval.make_pubeval_move(self.board, -1, roll)[0][0:26]
|
|
||||||
#print("post pubeval:", self.board, sep="\n")
|
|
||||||
|
|
||||||
|
|
||||||
#print("*"*30)
|
|
||||||
#print(self.board)
|
|
||||||
#print("+"*30)
|
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
|
||||||
sys.stderr.write("\n")
|
|
||||||
|
|
||||||
if i % 10 == 0:
|
|
||||||
print_time_estimate(i)
|
|
||||||
|
|
||||||
return outcomes
|
|
||||||
elif method == 'dumbmodel':
|
|
||||||
config_prime = self.config.copy()
|
|
||||||
config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
|
|
||||||
eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
|
|
||||||
#print(self.config, "\n", config_prime)
|
|
||||||
outcomes = []
|
|
||||||
for i in range(1, episodes + 1):
|
|
||||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
|
||||||
self.board = Board.initial_state
|
|
||||||
while Board.outcome(self.board) is None:
|
|
||||||
roll = self.roll()
|
|
||||||
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
|
||||||
|
|
||||||
roll = self.roll()
|
|
||||||
self.board = Board.flip(eval_bot.make_move(Board.flip(self.board), self.p1.get_sym(), roll)[0])
|
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
|
||||||
sys.stderr.write("\n")
|
|
||||||
|
|
||||||
if i % 50 == 0:
|
|
||||||
print_time_estimate(i)
|
|
||||||
return outcomes
|
|
||||||
else:
|
|
||||||
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
|
|
||||||
return [0]
|
|
||||||
|
|
||||||
return [ (method, do_eval(method,
|
|
||||||
self.config['episode_count'],
|
|
||||||
trained_eps = trained_eps))
|
|
||||||
for method
|
|
||||||
in self.config['eval_methods'] ]
|
|
||||||
|
|
||||||
def play(self, episodes = 1000):
|
def play(self, episodes = 1000):
|
||||||
outcomes = []
|
outcomes = []
|
||||||
for i in range(episodes):
|
for i in range(episodes):
|
||||||
|
|
22
main.py
22
main.py
|
@ -87,14 +87,6 @@ if not os.path.isdir(log_path):
|
||||||
os.mkdir(log_path)
|
os.mkdir(log_path)
|
||||||
|
|
||||||
|
|
||||||
# Set up network
|
|
||||||
from network import Network
|
|
||||||
|
|
||||||
|
|
||||||
# Set up variables
|
|
||||||
episode_count = config['episode_count']
|
|
||||||
|
|
||||||
|
|
||||||
# Do actions specified by command-line
|
# Do actions specified by command-line
|
||||||
if args.list_models:
|
if args.list_models:
|
||||||
def get_eps_trained(folder):
|
def get_eps_trained(folder):
|
||||||
|
@ -109,21 +101,29 @@ if args.list_models:
|
||||||
for model in models:
|
for model in models:
|
||||||
sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1]))
|
sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1]))
|
||||||
|
|
||||||
elif args.train:
|
exit()
|
||||||
|
|
||||||
|
# Set up network
|
||||||
|
from network import Network
|
||||||
network = Network(config, config['model'])
|
network = Network(config, config['model'])
|
||||||
eps = config['start_episode']
|
eps = config['start_episode']
|
||||||
|
|
||||||
|
# Set up variables
|
||||||
|
episode_count = config['episode_count']
|
||||||
|
|
||||||
|
if args.train:
|
||||||
while True:
|
while True:
|
||||||
train_outcome = network.train_model(episodes = episode_count, trained_eps = eps)
|
train_outcome = network.train_model(episodes = episode_count, trained_eps = eps)
|
||||||
eps += episode_count
|
eps += episode_count
|
||||||
log_train_outcome(train_outcome, trained_eps = eps)
|
log_train_outcome(train_outcome, trained_eps = eps)
|
||||||
if config['eval_after_train']:
|
if config['eval_after_train']:
|
||||||
eval_outcomes = g.eval(trained_eps = eps)
|
eval_outcomes = network.eval(trained_eps = eps)
|
||||||
log_eval_outcomes(eval_outcomes, trained_eps = eps)
|
log_eval_outcomes(eval_outcomes, trained_eps = eps)
|
||||||
if not config['train_perpetually']:
|
if not config['train_perpetually']:
|
||||||
break
|
break
|
||||||
elif args.eval:
|
elif args.eval:
|
||||||
eps = config['start_episode']
|
eps = config['start_episode']
|
||||||
outcomes = g.eval()
|
outcomes = network.eval()
|
||||||
log_eval_outcomes(outcomes, trained_eps = eps)
|
log_eval_outcomes(outcomes, trained_eps = eps)
|
||||||
#elif args.play:
|
#elif args.play:
|
||||||
# g.play(episodes = episode_count)
|
# g.play(episodes = episode_count)
|
||||||
|
|
102
network.py
102
network.py
|
@ -6,6 +6,7 @@ import os
|
||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
import random
|
import random
|
||||||
|
from eval import Eval
|
||||||
|
|
||||||
class Network:
|
class Network:
|
||||||
hidden_size = 40
|
hidden_size = 40
|
||||||
|
@ -240,3 +241,104 @@ class Network:
|
||||||
|
|
||||||
# NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
|
# NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def eval(self, trained_eps = 0):
|
||||||
|
def do_eval(method, episodes = 1000, trained_eps = 0):
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
def print_time_estimate(eps_completed):
|
||||||
|
cur_time = time.time()
|
||||||
|
time_diff = cur_time - start_time
|
||||||
|
eps_per_sec = eps_completed / time_diff
|
||||||
|
secs_per_ep = time_diff / eps_completed
|
||||||
|
eps_remaining = (episodes - eps_completed)
|
||||||
|
sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
|
||||||
|
sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
|
||||||
|
|
||||||
|
sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
|
||||||
|
|
||||||
|
if method == 'random':
|
||||||
|
outcomes = []
|
||||||
|
for i in range(1, episodes + 1):
|
||||||
|
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||||
|
board = Board.initial_state
|
||||||
|
while Board.outcome(board) is None:
|
||||||
|
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
|
board = (self.p1.make_move(board, self.p1.get_sym(), roll))[0]
|
||||||
|
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
|
board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
|
||||||
|
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||||
|
outcomes.append(Board.outcome(board)[1])
|
||||||
|
sys.stderr.write("\n")
|
||||||
|
|
||||||
|
if i % 50 == 0:
|
||||||
|
print_time_estimate(i)
|
||||||
|
return outcomes
|
||||||
|
elif method == 'pubeval':
|
||||||
|
outcomes = []
|
||||||
|
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
|
||||||
|
for i in range(1, episodes + 1):
|
||||||
|
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||||
|
board = Board.initial_state
|
||||||
|
#print("init:", board, sep="\n")
|
||||||
|
while Board.outcome(board) is None:
|
||||||
|
#print("-"*30)
|
||||||
|
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
|
#print(roll)
|
||||||
|
|
||||||
|
prev_board = tuple(board)
|
||||||
|
board = (self.make_move(board, roll))[0]
|
||||||
|
#print("post p1:", board, sep="\n")
|
||||||
|
|
||||||
|
#print("."*30)
|
||||||
|
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
|
#print(roll)
|
||||||
|
|
||||||
|
prev_board = tuple(board)
|
||||||
|
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
|
||||||
|
#print("post pubeval:", board, sep="\n")
|
||||||
|
|
||||||
|
|
||||||
|
#print("*"*30)
|
||||||
|
#print(board)
|
||||||
|
#print("+"*30)
|
||||||
|
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||||
|
outcomes.append(Board.outcome(board)[1])
|
||||||
|
sys.stderr.write("\n")
|
||||||
|
|
||||||
|
if i % 10 == 0:
|
||||||
|
print_time_estimate(i)
|
||||||
|
|
||||||
|
return outcomes
|
||||||
|
# elif method == 'dumbmodel':
|
||||||
|
# config_prime = self.config.copy()
|
||||||
|
# config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
|
||||||
|
# eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
|
||||||
|
# #print(self.config, "\n", config_prime)
|
||||||
|
# outcomes = []
|
||||||
|
# for i in range(1, episodes + 1):
|
||||||
|
# sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||||
|
# board = Board.initial_state
|
||||||
|
# while Board.outcome(board) is None:
|
||||||
|
# roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
|
# board = (self.make_move(board, self.p1.get_sym(), roll))[0]
|
||||||
|
|
||||||
|
# roll = (random.randrange(1,7), random.randrange(1,7))
|
||||||
|
# board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
|
||||||
|
# sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||||
|
# outcomes.append(Board.outcome(board)[1])
|
||||||
|
# sys.stderr.write("\n")
|
||||||
|
|
||||||
|
# if i % 50 == 0:
|
||||||
|
# print_time_estimate(i)
|
||||||
|
# return outcomes
|
||||||
|
else:
|
||||||
|
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
|
||||||
|
return [0]
|
||||||
|
|
||||||
|
return [ (method, do_eval(method,
|
||||||
|
self.config['episode_count'],
|
||||||
|
trained_eps = trained_eps))
|
||||||
|
for method
|
||||||
|
in self.config['eval_methods'] ]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user