diff --git a/bot.py b/bot.py index f1a2c08..a008134 100644 --- a/bot.py +++ b/bot.py @@ -7,13 +7,14 @@ import random class Bot: - def __init__(self, sym): + def __init__(self, sym, config = None): + self.config = config self.cup = Cup() self.sym = sym self.graph = tf.Graph() with self.graph.as_default(): self.session = tf.Session() - self.network = Network(self.session) + self.network = Network(self.session, config) self.network.restore_model() diff --git a/game.py b/game.py index 3e75f1d..b14d472 100644 --- a/game.py +++ b/game.py @@ -1,19 +1,24 @@ from board import Board from bot import Bot from restore_bot import RestoreBot -import numpy as np - from cup import Cup +import numpy as np +import sys class Game: - def __init__(self): + def __init__(self, config = None): + self.config = config self.board = Board.initial_state - self.p1 = Bot(1) - self.p2 = Bot(1) + self.p1 = None + self.p2 = None self.cup = Cup() + def set_up_bots(self): + self.p1 = Bot(1, config = self.config) + self.p2 = Bot(1, config = self.config) + def roll(self): return self.cup.roll() @@ -32,39 +37,45 @@ class Game: def board_state(self): return self.board - def train_model(self): - episodes = 100 + def train_model(self, episodes=1000, save_step_size = 100, init_ep = 0): + sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) outcomes = [] for episode in range(episodes): + sys.stderr.write("[TRAIN] Episode {}".format(episode + init_ep)) self.board = Board.initial_state -# prev_board = self.board + prev_board, prev_board_value = self.roll_and_find_best_for_bot() # find the best move here, make this move, then change turn as the # first thing inside of the while loop and then call # roll_and_find_best_for_bot to get V_t+1 -# self.p1.make_move(prev_board, self.p1.get_sym(), self.roll()) + while Board.outcome(self.board) is None: self.next_round() cur_board, cur_board_value = self.roll_and_find_best_for_bot() self.p1.get_network().train(prev_board, cur_board_value) prev_board = cur_board -# self.next_round() + # print("-"*30) # print(Board.pretty(self.board)) # print("/"*30) - print("Outcome:", Board.outcome(self.board)[1]) + sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) outcomes.append(Board.outcome(self.board)[1]) final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1)) self.p1.get_network().train(prev_board, final_score) - print("trained episode {}".format(episode)) - if episode % 10 == 0: - print("Saving...") + + sys.stderr.write("\n") + + if episode % min(save_step_size, episodes) == 0: + sys.stderr.write("[TRAIN] Saving model...\n") self.p1.get_network().save_model() self.p2.restore_model() - print(sum(outcomes)) - print(outcomes) - print(sum(outcomes)) + + sys.stderr.write("[TRAIN] Saving model for final episode...\n") + self.p1.get_network().save_model() + self.p2.restore_model() + + return outcomes def next_round_test(self): print(self.board) @@ -74,31 +85,58 @@ class Game: print(self.board) print("--------------------------------") - def play(self, amount_of_games): + def eval(self, init_ep = 0): + def do_eval(method, episodes = 1000, init_ep = 0): + sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) + if method == 'random': + outcomes = [] + for i in range(episodes): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + self.board = Board.initial_state + while Board.outcome(self.board) is None: + roll = self.roll() + self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] + roll = self.roll() + self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) + sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) + outcomes.append(Board.outcome(self.board)[1]) + sys.stderr.write("\n") + return outcomes + else: + sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) + return [0] + + return [ (method, do_eval(method, + self.config['episode_count'], + init_ep = init_ep)) + for method + in self.config['eval_methods'] ] + + def play(self, episodes = 1000): outcomes = [] - for i in range(amount_of_games): - count = 0 + for i in range(episodes): + self.board = Board.initial_state while Board.outcome(self.board) is None: - count += 1 - print("Turn:",count) + # count += 1 + # print("Turn:",count) roll = self.roll() - print("type of board: ", type(self.board)) - print("Board:",self.board) - print("{} rolled: {}".format(self.p1.get_sym(), roll)) +# print("type of board: ", type(self.board)) +# print("Board:",self.board) +# print("{} rolled: {}".format(self.p1.get_sym(), roll)) - self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] + self.board = (self.p1.make_random_move(self.board, self.p1.get_sym(), roll)) - print(self.board) + # print(self.board) - print() +# print() - count += 1 +# count += 1 roll = self.roll() - print("{} rolled: {}".format(self.p2.get_sym(), roll)) + # print("{} rolled: {}".format(self.p2.get_sym(), roll)) self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) @@ -108,21 +146,11 @@ class Game: print_winner = "-1: Black " + str(Board.outcome(self.board)) outcomes.append(Board.outcome(self.board)[1]) print("The winner is {}!".format(print_winner)) - print("Final board:",Board.pretty(self.board)) + print("Round:",i) +# print("Final board:",Board.pretty(self.board)) return outcomes # return count highest = 0 -#for i in range(100000): -# try: -g = Game() -#g.train_model() -outcomes = g.play(2000) -print(outcomes) -print(sum(outcomes)) -#count = g.play() - # highest = max(highest,count) - # except KeyboardInterrupt: - # break -#print("\nHighest amount of turns is:",highest) + diff --git a/main.py b/main.py new file mode 100644 index 0000000..e320595 --- /dev/null +++ b/main.py @@ -0,0 +1,83 @@ +import argparse +import config + +def print_train_outcome(outcome, init_ep = 0): + format_vars = { 'init_ep': init_ep, + 'count': len(train_outcome), + 'sum': sum(train_outcome), + 'mean': sum(train_outcome) / len(train_outcome)} + print("train;{init_ep};{count};{sum};{mean}".format(**format_vars)) + +def print_eval_outcomes(outcomes, init_ep = 0): + for outcome in eval_outcomes: + scores = outcome[1] + format_vars = { 'init_ep': init_ep, + 'method': outcome[0], + 'count': len(scores), + 'sum': sum(scores), + 'mean': sum(scores) / len(scores) + } + print("eval;{method};{init_ep};{count};{sum};{mean}".format(**format_vars)) + +parser = argparse.ArgumentParser(description="Backgammon games") +parser.add_argument('--episodes', action='store', dest='episode_count', + type=int, default=1000, + help='number of episodes to train') +parser.add_argument('--model-path', action='store', dest='model_path', + default='./model', + help='path to Tensorflow model') +parser.add_argument('--eval-methods', action='store', + default=['random'], nargs='*', + help='specifies evaluation methods') +parser.add_argument('--eval', action='store_true', + help='whether to evaluate the neural network with a random choice bot') +parser.add_argument('--train', action='store_true', + help='whether to train the neural network') +parser.add_argument('--play', action='store_true', + help='whether to play with the neural network') + +args = parser.parse_args() + +config = { + 'model_path': args.model_path, + 'episode_count': args.episode_count, + 'eval_methods': args.eval_methods, + 'train': args.train, + 'play': args.play, + 'eval': args.eval +} + +#print("-"*30) +#print(type(args.eval_methods)) +#print(args.eval_methods) +#print("-"*30) + +import game +g = game.Game(config = config) +g.set_up_bots() + +episode_count = args.episode_count + +if args.train: + eps = 0 + while True: + train_outcome = g.train_model(episodes = episode_count, init_ep = eps) + print_train_outcome(train_outcome, init_ep = eps) + if args.eval: + eval_outcomes = g.eval(init_ep = eps) + print_eval_outcomes(eval_outcomes, init_ep = eps) + eps += episode_count +elif args.eval: + outcomes = g.eval() + print_eval_outcomes(outcomes, init_ep = 0) +#elif args.play: +# g.play(episodes = episode_count) + +#outcomes = g.play(2000) +#print(outcomes) +#print(sum(outcomes)) +#count = g.play() + # highest = max(highest,count) + # except KeyboardInterrupt: + # break +#print("\nHighest amount of turns is:",highest) diff --git a/network.py b/network.py index a83fcaa..007b213 100644 --- a/network.py +++ b/network.py @@ -2,19 +2,15 @@ import tensorflow as tf from cup import Cup import numpy as np from board import Board -#from game import Game import os - -class Config(): +import config + +class Network: hidden_size = 40 input_size = 26 output_size = 1 # Can't remember the best learning_rate, look this up learning_rate = 0.1 - checkpoint_path = "/tmp/" - - -class Network: # TODO: Actually compile tensorflow properly #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2" @@ -24,26 +20,22 @@ class Network: return tf.scalar_mul(a, tf.tanh(x, name)) - def __init__(self, session): + def __init__(self, session, config = None): + self.config = config self.session = session - self.config = Config - input_size = self.config.input_size - hidden_size = self.config.hidden_size - output_size = self.config.output_size - learning_rate = self.config.learning_rate - self.checkpoint_path = self.config.checkpoint_path - + self.checkpoint_path = config['model_path'] + # input = x - self.x = tf.placeholder('float', [1,input_size], name='x') - self.value_next = tf.placeholder('float', [1,output_size], name="value_next") + self.x = tf.placeholder('float', [1, Network.input_size], name='x') + self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next") xavier_init = tf.contrib.layers.xavier_initializer() - W_1 = tf.Variable(xavier_init((input_size, hidden_size))) - W_2 = tf.Variable(xavier_init((hidden_size, output_size))) + W_1 = tf.Variable(xavier_init((Network.input_size, Network.hidden_size))) + W_2 = tf.Variable(xavier_init((Network.hidden_size, Network.output_size))) - b_1 = tf.zeros(hidden_size,) - b_2 = tf.zeros(output_size,) + b_1 = tf.zeros(Network.hidden_size,) + b_2 = tf.zeros(Network.output_size,) value_after_input = self.custom_tanh(tf.matmul(self.x, W_1) + b_1, name='hidden_layer') @@ -61,7 +53,7 @@ class Network: with tf.variable_scope('apply_gradients'): for gradient, trainable_var in zip(gradients, trainable_vars): # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t. - backprop_calc = learning_rate * difference_in_values * gradient + backprop_calc = Network.learning_rate * difference_in_values * gradient grad_apply = trainable_var.assign_add(backprop_calc) apply_gradients.append(grad_apply) @@ -92,7 +84,7 @@ class Network: return val def save_model(self): - self.saver.save(self.session, self.checkpoint_path + 'model.ckpt') + self.saver.save(self.session, os.path.join(self.checkpoint_path, 'model.ckpt')) def restore_model(self): if os.path.isfile(self.checkpoint_path):