woooow
This commit is contained in:
parent
a33826219d
commit
30183448ec
5
bot.py
5
bot.py
|
@ -7,13 +7,14 @@ import random
|
||||||
|
|
||||||
class Bot:
|
class Bot:
|
||||||
|
|
||||||
def __init__(self, sym):
|
def __init__(self, sym, config = None):
|
||||||
|
self.config = config
|
||||||
self.cup = Cup()
|
self.cup = Cup()
|
||||||
self.sym = sym
|
self.sym = sym
|
||||||
self.graph = tf.Graph()
|
self.graph = tf.Graph()
|
||||||
with self.graph.as_default():
|
with self.graph.as_default():
|
||||||
self.session = tf.Session()
|
self.session = tf.Session()
|
||||||
self.network = Network(self.session)
|
self.network = Network(self.session, config)
|
||||||
self.network.restore_model()
|
self.network.restore_model()
|
||||||
|
|
||||||
|
|
||||||
|
|
114
game.py
114
game.py
|
@ -1,19 +1,24 @@
|
||||||
from board import Board
|
from board import Board
|
||||||
from bot import Bot
|
from bot import Bot
|
||||||
from restore_bot import RestoreBot
|
from restore_bot import RestoreBot
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from cup import Cup
|
from cup import Cup
|
||||||
|
import numpy as np
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
class Game:
|
class Game:
|
||||||
def __init__(self):
|
def __init__(self, config = None):
|
||||||
|
self.config = config
|
||||||
self.board = Board.initial_state
|
self.board = Board.initial_state
|
||||||
|
|
||||||
self.p1 = Bot(1)
|
self.p1 = None
|
||||||
self.p2 = Bot(1)
|
self.p2 = None
|
||||||
self.cup = Cup()
|
self.cup = Cup()
|
||||||
|
|
||||||
|
def set_up_bots(self):
|
||||||
|
self.p1 = Bot(1, config = self.config)
|
||||||
|
self.p2 = Bot(1, config = self.config)
|
||||||
|
|
||||||
def roll(self):
|
def roll(self):
|
||||||
return self.cup.roll()
|
return self.cup.roll()
|
||||||
|
|
||||||
|
@ -32,39 +37,45 @@ class Game:
|
||||||
def board_state(self):
|
def board_state(self):
|
||||||
return self.board
|
return self.board
|
||||||
|
|
||||||
def train_model(self):
|
def train_model(self, episodes=1000, save_step_size = 100, init_ep = 0):
|
||||||
episodes = 100
|
sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
|
||||||
outcomes = []
|
outcomes = []
|
||||||
for episode in range(episodes):
|
for episode in range(episodes):
|
||||||
|
sys.stderr.write("[TRAIN] Episode {}".format(episode + init_ep))
|
||||||
self.board = Board.initial_state
|
self.board = Board.initial_state
|
||||||
# prev_board = self.board
|
|
||||||
prev_board, prev_board_value = self.roll_and_find_best_for_bot()
|
prev_board, prev_board_value = self.roll_and_find_best_for_bot()
|
||||||
# find the best move here, make this move, then change turn as the
|
# find the best move here, make this move, then change turn as the
|
||||||
# first thing inside of the while loop and then call
|
# first thing inside of the while loop and then call
|
||||||
# roll_and_find_best_for_bot to get V_t+1
|
# roll_and_find_best_for_bot to get V_t+1
|
||||||
# self.p1.make_move(prev_board, self.p1.get_sym(), self.roll())
|
|
||||||
while Board.outcome(self.board) is None:
|
while Board.outcome(self.board) is None:
|
||||||
self.next_round()
|
self.next_round()
|
||||||
cur_board, cur_board_value = self.roll_and_find_best_for_bot()
|
cur_board, cur_board_value = self.roll_and_find_best_for_bot()
|
||||||
self.p1.get_network().train(prev_board, cur_board_value)
|
self.p1.get_network().train(prev_board, cur_board_value)
|
||||||
prev_board = cur_board
|
prev_board = cur_board
|
||||||
# self.next_round()
|
|
||||||
# print("-"*30)
|
# print("-"*30)
|
||||||
# print(Board.pretty(self.board))
|
# print(Board.pretty(self.board))
|
||||||
# print("/"*30)
|
# print("/"*30)
|
||||||
print("Outcome:", Board.outcome(self.board)[1])
|
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
outcomes.append(Board.outcome(self.board)[1])
|
||||||
final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
|
final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
|
||||||
self.p1.get_network().train(prev_board, final_score)
|
self.p1.get_network().train(prev_board, final_score)
|
||||||
print("trained episode {}".format(episode))
|
|
||||||
if episode % 10 == 0:
|
sys.stderr.write("\n")
|
||||||
print("Saving...")
|
|
||||||
|
if episode % min(save_step_size, episodes) == 0:
|
||||||
|
sys.stderr.write("[TRAIN] Saving model...\n")
|
||||||
self.p1.get_network().save_model()
|
self.p1.get_network().save_model()
|
||||||
self.p2.restore_model()
|
self.p2.restore_model()
|
||||||
print(sum(outcomes))
|
|
||||||
|
|
||||||
print(outcomes)
|
|
||||||
print(sum(outcomes))
|
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
|
||||||
|
self.p1.get_network().save_model()
|
||||||
|
self.p2.restore_model()
|
||||||
|
|
||||||
|
return outcomes
|
||||||
|
|
||||||
def next_round_test(self):
|
def next_round_test(self):
|
||||||
print(self.board)
|
print(self.board)
|
||||||
|
@ -74,31 +85,58 @@ class Game:
|
||||||
print(self.board)
|
print(self.board)
|
||||||
print("--------------------------------")
|
print("--------------------------------")
|
||||||
|
|
||||||
def play(self, amount_of_games):
|
def eval(self, init_ep = 0):
|
||||||
|
def do_eval(method, episodes = 1000, init_ep = 0):
|
||||||
|
sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
|
||||||
|
if method == 'random':
|
||||||
|
outcomes = []
|
||||||
|
for i in range(episodes):
|
||||||
|
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||||
|
self.board = Board.initial_state
|
||||||
|
while Board.outcome(self.board) is None:
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll))
|
||||||
|
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
||||||
|
outcomes.append(Board.outcome(self.board)[1])
|
||||||
|
sys.stderr.write("\n")
|
||||||
|
return outcomes
|
||||||
|
else:
|
||||||
|
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
|
||||||
|
return [0]
|
||||||
|
|
||||||
|
return [ (method, do_eval(method,
|
||||||
|
self.config['episode_count'],
|
||||||
|
init_ep = init_ep))
|
||||||
|
for method
|
||||||
|
in self.config['eval_methods'] ]
|
||||||
|
|
||||||
|
def play(self, episodes = 1000):
|
||||||
outcomes = []
|
outcomes = []
|
||||||
for i in range(amount_of_games):
|
for i in range(episodes):
|
||||||
count = 0
|
|
||||||
self.board = Board.initial_state
|
self.board = Board.initial_state
|
||||||
while Board.outcome(self.board) is None:
|
while Board.outcome(self.board) is None:
|
||||||
count += 1
|
# count += 1
|
||||||
print("Turn:",count)
|
# print("Turn:",count)
|
||||||
|
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
|
|
||||||
print("type of board: ", type(self.board))
|
# print("type of board: ", type(self.board))
|
||||||
print("Board:",self.board)
|
# print("Board:",self.board)
|
||||||
print("{} rolled: {}".format(self.p1.get_sym(), roll))
|
# print("{} rolled: {}".format(self.p1.get_sym(), roll))
|
||||||
|
|
||||||
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
self.board = (self.p1.make_random_move(self.board, self.p1.get_sym(), roll))
|
||||||
|
|
||||||
print(self.board)
|
# print(self.board)
|
||||||
|
|
||||||
print()
|
# print()
|
||||||
|
|
||||||
count += 1
|
# count += 1
|
||||||
|
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
print("{} rolled: {}".format(self.p2.get_sym(), roll))
|
# print("{} rolled: {}".format(self.p2.get_sym(), roll))
|
||||||
self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll))
|
self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll))
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,21 +146,11 @@ class Game:
|
||||||
print_winner = "-1: Black " + str(Board.outcome(self.board))
|
print_winner = "-1: Black " + str(Board.outcome(self.board))
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
outcomes.append(Board.outcome(self.board)[1])
|
||||||
print("The winner is {}!".format(print_winner))
|
print("The winner is {}!".format(print_winner))
|
||||||
print("Final board:",Board.pretty(self.board))
|
print("Round:",i)
|
||||||
|
# print("Final board:",Board.pretty(self.board))
|
||||||
return outcomes
|
return outcomes
|
||||||
# return count
|
# return count
|
||||||
|
|
||||||
highest = 0
|
highest = 0
|
||||||
|
|
||||||
#for i in range(100000):
|
|
||||||
# try:
|
|
||||||
g = Game()
|
|
||||||
#g.train_model()
|
|
||||||
outcomes = g.play(2000)
|
|
||||||
print(outcomes)
|
|
||||||
print(sum(outcomes))
|
|
||||||
#count = g.play()
|
|
||||||
# highest = max(highest,count)
|
|
||||||
# except KeyboardInterrupt:
|
|
||||||
# break
|
|
||||||
#print("\nHighest amount of turns is:",highest)
|
|
||||||
|
|
83
main.py
Normal file
83
main.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
import argparse
|
||||||
|
import config
|
||||||
|
|
||||||
|
def print_train_outcome(outcome, init_ep = 0):
|
||||||
|
format_vars = { 'init_ep': init_ep,
|
||||||
|
'count': len(train_outcome),
|
||||||
|
'sum': sum(train_outcome),
|
||||||
|
'mean': sum(train_outcome) / len(train_outcome)}
|
||||||
|
print("train;{init_ep};{count};{sum};{mean}".format(**format_vars))
|
||||||
|
|
||||||
|
def print_eval_outcomes(outcomes, init_ep = 0):
|
||||||
|
for outcome in eval_outcomes:
|
||||||
|
scores = outcome[1]
|
||||||
|
format_vars = { 'init_ep': init_ep,
|
||||||
|
'method': outcome[0],
|
||||||
|
'count': len(scores),
|
||||||
|
'sum': sum(scores),
|
||||||
|
'mean': sum(scores) / len(scores)
|
||||||
|
}
|
||||||
|
print("eval;{method};{init_ep};{count};{sum};{mean}".format(**format_vars))
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Backgammon games")
|
||||||
|
parser.add_argument('--episodes', action='store', dest='episode_count',
|
||||||
|
type=int, default=1000,
|
||||||
|
help='number of episodes to train')
|
||||||
|
parser.add_argument('--model-path', action='store', dest='model_path',
|
||||||
|
default='./model',
|
||||||
|
help='path to Tensorflow model')
|
||||||
|
parser.add_argument('--eval-methods', action='store',
|
||||||
|
default=['random'], nargs='*',
|
||||||
|
help='specifies evaluation methods')
|
||||||
|
parser.add_argument('--eval', action='store_true',
|
||||||
|
help='whether to evaluate the neural network with a random choice bot')
|
||||||
|
parser.add_argument('--train', action='store_true',
|
||||||
|
help='whether to train the neural network')
|
||||||
|
parser.add_argument('--play', action='store_true',
|
||||||
|
help='whether to play with the neural network')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
config = {
|
||||||
|
'model_path': args.model_path,
|
||||||
|
'episode_count': args.episode_count,
|
||||||
|
'eval_methods': args.eval_methods,
|
||||||
|
'train': args.train,
|
||||||
|
'play': args.play,
|
||||||
|
'eval': args.eval
|
||||||
|
}
|
||||||
|
|
||||||
|
#print("-"*30)
|
||||||
|
#print(type(args.eval_methods))
|
||||||
|
#print(args.eval_methods)
|
||||||
|
#print("-"*30)
|
||||||
|
|
||||||
|
import game
|
||||||
|
g = game.Game(config = config)
|
||||||
|
g.set_up_bots()
|
||||||
|
|
||||||
|
episode_count = args.episode_count
|
||||||
|
|
||||||
|
if args.train:
|
||||||
|
eps = 0
|
||||||
|
while True:
|
||||||
|
train_outcome = g.train_model(episodes = episode_count, init_ep = eps)
|
||||||
|
print_train_outcome(train_outcome, init_ep = eps)
|
||||||
|
if args.eval:
|
||||||
|
eval_outcomes = g.eval(init_ep = eps)
|
||||||
|
print_eval_outcomes(eval_outcomes, init_ep = eps)
|
||||||
|
eps += episode_count
|
||||||
|
elif args.eval:
|
||||||
|
outcomes = g.eval()
|
||||||
|
print_eval_outcomes(outcomes, init_ep = 0)
|
||||||
|
#elif args.play:
|
||||||
|
# g.play(episodes = episode_count)
|
||||||
|
|
||||||
|
#outcomes = g.play(2000)
|
||||||
|
#print(outcomes)
|
||||||
|
#print(sum(outcomes))
|
||||||
|
#count = g.play()
|
||||||
|
# highest = max(highest,count)
|
||||||
|
# except KeyboardInterrupt:
|
||||||
|
# break
|
||||||
|
#print("\nHighest amount of turns is:",highest)
|
34
network.py
34
network.py
|
@ -2,19 +2,15 @@ import tensorflow as tf
|
||||||
from cup import Cup
|
from cup import Cup
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from board import Board
|
from board import Board
|
||||||
#from game import Game
|
|
||||||
import os
|
import os
|
||||||
|
import config
|
||||||
|
|
||||||
class Config():
|
class Network:
|
||||||
hidden_size = 40
|
hidden_size = 40
|
||||||
input_size = 26
|
input_size = 26
|
||||||
output_size = 1
|
output_size = 1
|
||||||
# Can't remember the best learning_rate, look this up
|
# Can't remember the best learning_rate, look this up
|
||||||
learning_rate = 0.1
|
learning_rate = 0.1
|
||||||
checkpoint_path = "/tmp/"
|
|
||||||
|
|
||||||
|
|
||||||
class Network:
|
|
||||||
|
|
||||||
# TODO: Actually compile tensorflow properly
|
# TODO: Actually compile tensorflow properly
|
||||||
#os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
|
#os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
|
||||||
|
@ -24,26 +20,22 @@ class Network:
|
||||||
return tf.scalar_mul(a, tf.tanh(x, name))
|
return tf.scalar_mul(a, tf.tanh(x, name))
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, session):
|
def __init__(self, session, config = None):
|
||||||
|
self.config = config
|
||||||
self.session = session
|
self.session = session
|
||||||
self.config = Config
|
self.checkpoint_path = config['model_path']
|
||||||
input_size = self.config.input_size
|
|
||||||
hidden_size = self.config.hidden_size
|
|
||||||
output_size = self.config.output_size
|
|
||||||
learning_rate = self.config.learning_rate
|
|
||||||
self.checkpoint_path = self.config.checkpoint_path
|
|
||||||
|
|
||||||
# input = x
|
# input = x
|
||||||
self.x = tf.placeholder('float', [1,input_size], name='x')
|
self.x = tf.placeholder('float', [1, Network.input_size], name='x')
|
||||||
self.value_next = tf.placeholder('float', [1,output_size], name="value_next")
|
self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
|
||||||
|
|
||||||
xavier_init = tf.contrib.layers.xavier_initializer()
|
xavier_init = tf.contrib.layers.xavier_initializer()
|
||||||
|
|
||||||
W_1 = tf.Variable(xavier_init((input_size, hidden_size)))
|
W_1 = tf.Variable(xavier_init((Network.input_size, Network.hidden_size)))
|
||||||
W_2 = tf.Variable(xavier_init((hidden_size, output_size)))
|
W_2 = tf.Variable(xavier_init((Network.hidden_size, Network.output_size)))
|
||||||
|
|
||||||
b_1 = tf.zeros(hidden_size,)
|
b_1 = tf.zeros(Network.hidden_size,)
|
||||||
b_2 = tf.zeros(output_size,)
|
b_2 = tf.zeros(Network.output_size,)
|
||||||
|
|
||||||
value_after_input = self.custom_tanh(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
|
value_after_input = self.custom_tanh(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
|
||||||
|
|
||||||
|
@ -61,7 +53,7 @@ class Network:
|
||||||
with tf.variable_scope('apply_gradients'):
|
with tf.variable_scope('apply_gradients'):
|
||||||
for gradient, trainable_var in zip(gradients, trainable_vars):
|
for gradient, trainable_var in zip(gradients, trainable_vars):
|
||||||
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
|
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
|
||||||
backprop_calc = learning_rate * difference_in_values * gradient
|
backprop_calc = Network.learning_rate * difference_in_values * gradient
|
||||||
grad_apply = trainable_var.assign_add(backprop_calc)
|
grad_apply = trainable_var.assign_add(backprop_calc)
|
||||||
apply_gradients.append(grad_apply)
|
apply_gradients.append(grad_apply)
|
||||||
|
|
||||||
|
@ -92,7 +84,7 @@ class Network:
|
||||||
return val
|
return val
|
||||||
|
|
||||||
def save_model(self):
|
def save_model(self):
|
||||||
self.saver.save(self.session, self.checkpoint_path + 'model.ckpt')
|
self.saver.save(self.session, os.path.join(self.checkpoint_path, 'model.ckpt'))
|
||||||
|
|
||||||
def restore_model(self):
|
def restore_model(self):
|
||||||
if os.path.isfile(self.checkpoint_path):
|
if os.path.isfile(self.checkpoint_path):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user