fixed dumb bugs; still messy

This commit is contained in:
Christoffer Müller Madsen 2018-03-14 20:42:09 +01:00
parent 08481e508a
commit 2fc7a2a09c
5 changed files with 106 additions and 29 deletions

10
bot.py
View File

@ -7,15 +7,21 @@ import numpy as np
import random import random
class Bot: class Bot:
def __init__(self, sym, config = None): def __init__(self, sym, config = None, name = "unnamed"):
self.config = config self.config = config
self.cup = Cup() self.cup = Cup()
self.sym = sym self.sym = sym
self.graph = tf.Graph() self.graph = tf.Graph()
with self.graph.as_default(): with self.graph.as_default():
self.session = tf.Session() self.session = tf.Session()
self.network = Network(self.session, config) self.network = Network(self.session, config, name)
self.network.restore_model() self.network.restore_model()
variables_names = [v.name for v in tf.trainable_variables()]
values = self.session.run(variables_names)
for k, v in zip(variables_names, values):
print("Variable: ", k)
print("Shape: ", v.shape)
print(v)
def restore_model(self): def restore_model(self):
with self.graph.as_default(): with self.graph.as_default():

10
eval.py
View File

@ -14,8 +14,14 @@ class Eval:
# TODO: Test this, the score results should be deterministic # TODO: Test this, the score results should be deterministic
@staticmethod @staticmethod
def make_pubeval_move(board, sym, roll): def make_pubeval_move(board, sym, roll):
legal_moves = Board.calculate_legal_states(tuple(board), sym, roll) legal_moves = Board.calculate_legal_states(board, sym, roll)
moves_and_scores = [(board, pubeval.eval(False, Board.board_features_to_pubeval(board, sym))) for board in legal_moves] moves_and_scores = [ ( board,
pubeval.eval(False, Board.board_features_to_pubeval(board, sym)))
for board
in legal_moves ]
scores = [ x[1] for x in moves_and_scores ] scores = [ x[1] for x in moves_and_scores ]
best_move_pair = moves_and_scores[np.array(scores).argmax()] best_move_pair = moves_and_scores[np.array(scores).argmax()]
return best_move_pair return best_move_pair

48
game.py
View File

@ -8,6 +8,7 @@ from eval import Eval
import numpy as np import numpy as np
import sys import sys
import time import time
import os # for path join
class Game: class Game:
@ -21,8 +22,8 @@ class Game:
self.cup = Cup() self.cup = Cup()
def set_up_bots(self): def set_up_bots(self):
self.p1 = Bot(1, config = self.config) self.p1 = Bot(1, config = self.config, name = "p1")
self.p2 = Bot(1, config = self.config) self.p2 = Bot(1, config = self.config, name = "p2")
def roll(self): def roll(self):
return self.cup.roll() return self.cup.roll()
@ -169,7 +170,7 @@ class Game:
roll = self.roll() roll = self.roll()
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
roll = self.roll() roll = self.roll()
self.board = Board.flip(Eval.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) self.board = Board.flip(Eval.make_random_move(Board.flip(self.board), 1, roll))
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n") sys.stderr.write("\n")
@ -183,11 +184,28 @@ class Game:
for i in range(1, episodes + 1): for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i)) sys.stderr.write("[EVAL ] Episode {}".format(i))
self.board = Board.initial_state self.board = Board.initial_state
#print("init:", self.board, sep="\n")
while Board.outcome(self.board) is None: while Board.outcome(self.board) is None:
#print("-"*30)
roll = self.roll() roll = self.roll()
#print(roll)
prev_board = tuple(self.board)
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
#print("post p1:", self.board, sep="\n")
#print("."*30)
roll = self.roll() roll = self.roll()
self.board = Board.flip(Eval.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26]) #print(roll)
prev_board = tuple(self.board)
self.board = Eval.make_pubeval_move(self.board, -1, roll)[0][0:26]
#print("post pubeval:", self.board, sep="\n")
#print("*"*30)
#print(self.board)
#print("+"*30)
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n") sys.stderr.write("\n")
@ -196,6 +214,28 @@ class Game:
print_time_estimate(i) print_time_estimate(i)
return outcomes return outcomes
elif method == 'dumbmodel':
config_prime = self.config.copy()
config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
#print(self.config, "\n", config_prime)
outcomes = []
for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i))
self.board = Board.initial_state
while Board.outcome(self.board) is None:
roll = self.roll()
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
roll = self.roll()
self.board = Board.flip(eval_bot.make_move(Board.flip(self.board), self.p1.get_sym(), roll)[0])
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n")
if i % 50 == 0:
print_time_estimate(i)
return outcomes
else: else:
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
return [0] return [0]

View File

@ -73,7 +73,8 @@ config = {
'eval': args.eval, 'eval': args.eval,
'eval_after_train': args.eval_after_train, 'eval_after_train': args.eval_after_train,
'start_episode': args.start_episode, 'start_episode': args.start_episode,
'train_perpetually': args.train_perpetually 'train_perpetually': args.train_perpetually,
'model_storage_path': model_storage_path
} }
# Make sure directories exist # Make sure directories exist

View File

@ -15,15 +15,13 @@ class Network:
#os.environ["TF_CPP_MIN_LOG_LEVEL"]="2" #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
def custom_tanh(self, x, name=None): def custom_tanh(self, x, name=None):
a = tf.Variable(2.00, tf.float32) return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
return tf.scalar_mul(a, tf.tanh(x, name))
def __init__(self, session, config = None): def __init__(self, session, config, name):
self.config = config self.config = config
self.session = session self.session = session
self.checkpoint_path = config['model_path'] self.checkpoint_path = config['model_path']
self.name = name
# Restore trained episode count for model # Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
@ -37,18 +35,23 @@ class Network:
xavier_init = tf.contrib.layers.xavier_initializer() xavier_init = tf.contrib.layers.xavier_initializer()
W_1 = tf.Variable(xavier_init((Network.input_size, Network.hidden_size))) W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
W_2 = tf.Variable(xavier_init((Network.hidden_size, Network.output_size))) initializer=xavier_init)
W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
initializer=xavier_init)
b_1 = tf.zeros(Network.hidden_size,) b_1 = tf.get_variable("b_1", (Network.hidden_size,),
b_2 = tf.zeros(Network.output_size,) initializer=tf.zeros_initializer)
b_2 = tf.get_variable("b_2", (Network.output_size,),
initializer=tf.zeros_initializer)
value_after_input = self.custom_tanh(tf.matmul(self.x, W_1) + b_1, name='hidden_layer') value_after_input = self.custom_tanh(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
# TODO: Remember to make this tanh * 2
self.value = self.custom_tanh(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') self.value = self.custom_tanh(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
# tf.reduce_sum basically finds the sum of it's input, so this gives the difference between the two values, in case they should be lists, which they might be if our input changes # tf.reduce_sum basically finds the sum of its input, so this gives the
# difference between the two values, in case they should be lists, which
# they might be if our input changes
difference_in_values = tf.reduce_sum(self.value_next - self.value, name='difference') difference_in_values = tf.reduce_sum(self.value_next - self.value, name='difference')
trainable_vars = tf.trainable_variables() trainable_vars = tf.trainable_variables()
@ -71,32 +74,53 @@ class Network:
def eval_state(self, state): def eval_state(self, state):
# Run state through a network # Run state through a network
# Remember to create placeholders for everything because wtf tensorflow and graphs # Remember to create placeholders for everything because wtf tensorflow
# and graphs
# Remember to create the dense layers # Remember to create the dense layers
# Figure out a way of giving a layer a custom activiation function (we want something which gives [-2,2]. Naively tahn*2, however I fell this is wrong. # Figure out a way of giving a layer a custom activiation function (we
# want something which gives [-2,2]. Naively tahn*2, however I fell this
# is wrong.
# tf.group, groups a bunch of actions, so calculate the different gradients for the different weights, by using tf.trainable_variables() to find all variables and tf.gradients(current_value, trainable_variables) to find all the gradients. We can then loop through this and calculate the trace for each gradient and variable pair (note, zip can be used to combine the two lists found before), and then we can calculate the overall change in weights, based on the formula listed in tesauro (learning_rate * difference_in_values * trace), this calculation can be assigned to a tf variable and put in a list and then this can be grouped into a single operation, essentially building our own backprop function. # tf.group, groups a bunch of actions, so calculate the different
# Grouping them is done by tf.group(*the_gradients_from_before_we_want_to_apply, name="training_op") # gradients for the different weights, by using tf.trainable_variables()
# to find all variables and tf.gradients(current_value,
# trainable_variables) to find all the gradients. We can then loop
# through this and calculate the trace for each gradient and variable
# pair (note, zip can be used to combine the two lists found before),
# and then we can calculate the overall change in weights, based on the
# formula listed in tesauro (learning_rate * difference_in_values *
# trace), this calculation can be assigned to a tf variable and put in a
# list and then this can be grouped into a single operation, essentially
# building our own backprop function.
# If we remove the eligibily trace to begin with, we only have # Grouping them is done by
# to implement learning_rate * (difference_in_values) * gradients (the before-mentioned calculation. # tf.group(*the_gradients_from_before_we_want_to_apply,
# name="training_op")
# If we remove the eligibily trace to begin with, we only have to
# implement learning_rate * (difference_in_values) * gradients (the
# before-mentioned calculation.
# print("Network is evaluating") # print("Network is evaluating")
val = self.session.run(self.value, feed_dict={self.x: state}) val = self.session.run(self.value, feed_dict={self.x: state})
#print("eval ({})".format(self.name), state, val, sep="\n")
return val return val
def save_model(self, episode_count): def save_model(self, episode_count):
self.saver.save(self.session, os.path.join(self.checkpoint_path, 'model.ckpt')) self.saver.save(self.session, os.path.join(self.checkpoint_path, 'model.ckpt'))
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
print("[NETWK] ({name}) Saving model to:".format(name = self.name),
os.path.join(self.checkpoint_path, 'model.ckpt'))
f.write(str(episode_count) + "\n") f.write(str(episode_count) + "\n")
def restore_model(self): def restore_model(self):
if os.path.isfile(self.checkpoint_path): if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
print("[NETWK] ({name}) Restoring model from:".format(name = self.name),
str(latest_checkpoint))
self.saver.restore(self.session, latest_checkpoint) self.saver.restore(self.session, latest_checkpoint)
# Have a circular dependency, #fuck, need to rewrite something # Have a circular dependency, #fuck, need to rewrite something