clean up and move things to network.py

This commit is contained in:
Alexander Munch-Hansen 2018-03-20 13:03:21 +01:00
parent 9b96cf41da
commit 99783ee4f8
4 changed files with 113 additions and 81 deletions

12
bot.py
View File

@ -12,16 +12,9 @@ class Bot:
self.cup = Cup() self.cup = Cup()
self.sym = sym self.sym = sym
self.graph = tf.Graph() self.graph = tf.Graph()
with self.graph.as_default():
self.session = tf.Session() self.network = Network(config, name)
self.network = Network(self.session, config, name)
self.network.restore_model() self.network.restore_model()
variables_names = [v.name for v in tf.trainable_variables()]
values = self.session.run(variables_names)
for k, v in zip(variables_names, values):
print("Variable: ", k)
print("Shape: ", v.shape)
print(v)
def restore_model(self): def restore_model(self):
with self.graph.as_default(): with self.graph.as_default():
@ -36,6 +29,7 @@ class Bot:
def get_network(self): def get_network(self):
return self.network return self.network
# TODO: DEPRECATE
def make_move(self, board, sym, roll): def make_move(self, board, sym, roll):
# print(Board.pretty(board)) # print(Board.pretty(board))
legal_moves = Board.calculate_legal_states(board, sym, roll) legal_moves = Board.calculate_legal_states(board, sym, roll)

55
game.py
View File

@ -83,61 +83,6 @@ class Game:
print(Board.outcome(self.board)) print(Board.outcome(self.board))
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
start_time = time.time()
def print_time_estimate(eps_completed):
cur_time = time.time()
time_diff = cur_time - start_time
eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed
eps_remaining = (episodes - eps_completed)
sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
outcomes = []
for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
self.board = Board.initial_state
prev_board, prev_board_value = self.best_move_and_score()
# find the best move here, make this move, then change turn as the
# first thing inside of the while loop and then call
# best_move_and_score to get V_t+1
while Board.outcome(self.board) is None:
self.next_round()
cur_board, cur_board_value = self.best_move_and_score()
self.p1.get_network().train(prev_board, cur_board_value)
prev_board = cur_board
# print("-"*30)
# print(Board.pretty(self.board))
# print("/"*30)
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1])
final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
self.p1.get_network().train(prev_board, final_score)
sys.stderr.write("\n")
if episode % min(save_step_size, episodes) == 0:
sys.stderr.write("[TRAIN] Saving model...\n")
self.p1.get_network().save_model(episode+trained_eps)
sys.stderr.write("[TRAIN] Loading model for training opponent...\n")
self.p2.restore_model()
if episode % 50 == 0:
print_time_estimate(episode)
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
self.p1.get_network().save_model(episode+trained_eps)
self.p2.restore_model()
return outcomes
def next_round_test(self): def next_round_test(self):
print(self.board) print(self.board)
print() print()

10
main.py
View File

@ -65,6 +65,7 @@ parser.add_argument('--list-models', action='store_true',
args = parser.parse_args() args = parser.parse_args()
config = { config = {
'model': args.model,
'model_path': os.path.join(model_storage_path, args.model), 'model_path': os.path.join(model_storage_path, args.model),
'episode_count': args.episode_count, 'episode_count': args.episode_count,
'eval_methods': args.eval_methods, 'eval_methods': args.eval_methods,
@ -86,10 +87,8 @@ if not os.path.isdir(log_path):
os.mkdir(log_path) os.mkdir(log_path)
# Set up game # Set up network
import game from network import Network
g = game.Game(config = config)
g.set_up_bots()
# Set up variables # Set up variables
@ -111,9 +110,10 @@ if args.list_models:
sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1])) sys.stderr.write(" {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1]))
elif args.train: elif args.train:
network = Network(config, config['model'])
eps = config['start_episode'] eps = config['start_episode']
while True: while True:
train_outcome = g.train_model(episodes = episode_count, trained_eps = eps) train_outcome = network.train_model(episodes = episode_count, trained_eps = eps)
eps += episode_count eps += episode_count
log_train_outcome(train_outcome, trained_eps = eps) log_train_outcome(train_outcome, trained_eps = eps)
if config['eval_after_train']: if config['eval_after_train']:

View File

@ -3,6 +3,9 @@ from cup import Cup
import numpy as np import numpy as np
from board import Board from board import Board
import os import os
import time
import sys
import random
class Network: class Network:
hidden_size = 40 hidden_size = 40
@ -17,18 +20,12 @@ class Network:
def custom_tanh(self, x, name=None): def custom_tanh(self, x, name=None):
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
def __init__(self, session, config, name): def __init__(self, config, name):
self.config = config self.config = config
self.session = session self.session = tf.Session()
self.checkpoint_path = config['model_path'] self.checkpoint_path = config['model_path']
self.name = name self.name = name
# Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path):
with open(episode_count_path, 'r') as f:
self.config['start_episode'] = int(f.read())
# input = x # input = x
self.x = tf.placeholder('float', [1, Network.input_size], name='x') self.x = tf.placeholder('float', [1, Network.input_size], name='x')
self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next") self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
@ -52,6 +49,8 @@ class Network:
# tf.reduce_sum basically finds the sum of its input, so this gives the # tf.reduce_sum basically finds the sum of its input, so this gives the
# difference between the two values, in case they should be lists, which # difference between the two values, in case they should be lists, which
# they might be if our input changes # they might be if our input changes
# TODO: Alexander thinks that self.value will be computed twice (instead of once)
difference_in_values = tf.reduce_sum(self.value_next - self.value, name='difference') difference_in_values = tf.reduce_sum(self.value_next - self.value, name='difference')
trainable_vars = tf.trainable_variables() trainable_vars = tf.trainable_variables()
@ -71,6 +70,8 @@ class Network:
self.saver = tf.train.Saver(max_to_keep=1) self.saver = tf.train.Saver(max_to_keep=1)
self.session.run(tf.global_variables_initializer()) self.session.run(tf.global_variables_initializer())
self.restore_model()
def eval_state(self, state): def eval_state(self, state):
# Run state through a network # Run state through a network
@ -122,12 +123,25 @@ class Network:
print("[NETWK] ({name}) Restoring model from:".format(name = self.name), print("[NETWK] ({name}) Restoring model from:".format(name = self.name),
str(latest_checkpoint)) str(latest_checkpoint))
self.saver.restore(self.session, latest_checkpoint) self.saver.restore(self.session, latest_checkpoint)
variables_names = [v.name for v in tf.trainable_variables()]
values = self.session.run(variables_names)
for k, v in zip(variables_names, values):
print("Variable: ", k)
print("Shape: ", v.shape)
print(v)
# Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path):
with open(episode_count_path, 'r') as f:
self.config['start_episode'] = int(f.read())
# Have a circular dependency, #fuck, need to rewrite something # Have a circular dependency, #fuck, need to rewrite something
def train(self, board, v_next): def adjust_weights(self, board, v_next):
# print("lol") # print("lol")
board = np.array(board).reshape((1,26)) board = np.array(board).reshape((1,26))
self.session.run(self.training_op, feed_dict = {self.x:board, self.value_next: v_next}) self.session.run(self.training_op, feed_dict = { self.x: board,
self.value_next: v_next })
# while game isn't done: # while game isn't done:
@ -138,6 +152,85 @@ class Network:
def make_move(self, board, roll):
# print(Board.pretty(board))
legal_moves = Board.calculate_legal_states(board, 1, roll)
moves_and_scores = [ (move, self.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
scores = [ x[1] for x in moves_and_scores ]
best_score_index = np.array(scores).argmax()
best_move_pair = moves_and_scores[best_score_index]
#print("Found the best state, being:", np.array(move_scores).argmax())
return best_move_pair
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
start_time = time.time()
def print_time_estimate(eps_completed):
cur_time = time.time()
time_diff = cur_time - start_time
eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed
eps_remaining = (episodes - eps_completed)
sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
outcomes = []
for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
# TODO decide which player should be here
player = 1
roll = (random.randrange(1,7), random.randrange(1,7))
prev_board, _ = self.make_move(Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
if player == -1:
prev_board = Board.flip(prev_board)
# find the best move here, make this move, then change turn as the
# first thing inside of the while loop and then call
# best_move_and_score to get V_t+1
# i = 0
while Board.outcome(prev_board) is None:
# print("-"*30)
# print(i)
# print(roll)
# print(Board.pretty(prev_board))
# print("/"*30)
# i += 1
player *= -1
roll = (random.randrange(1,7), random.randrange(1,7))
cur_board, cur_board_value = self.make_move(Board.flip(prev_board) if player == -1 else prev_board, roll)
if player == -1:
cur_board = Board.flip(cur_board)
self.adjust_weights(prev_board, cur_board_value)
prev_board = cur_board
final_board = prev_board
sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
outcomes.append(Board.outcome(final_board)[1])
final_score = np.array([ Board.outcome(final_board)[1] ])
self.adjust_weights(prev_board, final_score.reshape((1, 1)))
sys.stderr.write("\n")
if episode % min(save_step_size, episodes) == 0:
sys.stderr.write("[TRAIN] Saving model...\n")
self.save_model(episode+trained_eps)
if episode % 50 == 0:
print_time_estimate(episode)
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
self.save_model(episode+trained_eps)
return outcomes
# take turn, which finds the best state and picks it, based on the current network # take turn, which finds the best state and picks it, based on the current network