Compare commits

..

No commits in common. "master" and "eager_eval" have entirely different histories.

9 changed files with 247 additions and 415 deletions

141
app.py
View File

@ -1,141 +0,0 @@
from flask import Flask, request, jsonify
from flask_json import FlaskJSON, as_json_p
from flask_cors import CORS
from board import Board
from eval import Eval
import main
import random
from network import Network
app = Flask(__name__)
app.config['JSON_ADD_STATUS'] = False
app.config['JSON_JSONP_OPTIONAL'] = False
json = FlaskJSON(app)
CORS(app)
config = main.config.copy()
config['model'] = "player_testings"
config['ply'] = "0"
config['board_representation'] = 'tesauro'
network = Network(config, config['model'])
network.restore_model()
def calc_move_sets(from_board, roll, player):
board = from_board
sets = []
total = 0
for r in roll:
# print("Value of r:", r)
sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
total += r
sets.append([Board.calculate_legal_states(board, player, roll), total])
return sets
def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
sets = calc_move_sets(from_board, roll, player)
return_board = from_board
print("To board:\n",to_board)
print("All sets:\n",sets)
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
# print(to_board)
# print(board_set)
if to_board in board_set[0]:
# print("To board:", to_board)
# print(board_set[0])
# print(board_set[1])
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < (4 if is_quad else 2):
roll[idx] = 0
else:
roll = [0, 0]
return_board = to_board
break
# print("Return board!:\n",return_board)
return total_moves, roll, return_board
def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
return board, total_moves, roll
@app.route('/get_board', methods=['GET'])
@as_json_p
def get_board():
return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
def check_move(prev, curr):
# TODO: Decide on player system and implement roll properly
legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
truth_list = [list(curr) == list(ele) for ele in legal_states]
return any(truth_list)
@app.route('/bot_move', methods=['POST'])
def bot_move():
data = request.get_json(force=True)
board = [int(x) for x in data['board'].split(',')]
use_pubeval = bool(data['pubeval'])
roll = (random.randrange(1, 7), random.randrange(1, 7))
if use_pubeval:
board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
else:
board, _ = network.make_move(tuple(board), roll, 1)
# print("Board!:",board)
return ",".join([str(x) for x in list(board)])
@app.route('/post_board', methods=['POST'])
def post_board():
data = request.get_json(force=True)
# TODO: Fix hardcoded player
player = -1
board = [int(x) for x in data['board'].split(',')]
prev_board = [int(x) for x in data['prevBoard'].split(',')]
print(data['roll'])
roll = [int(x) for x in data['roll'].split(',')]
print(roll)
quad = data['quad'] == "true"
# print(board)
total_roll = int(data['totalRoll'])
print("total roll is:", total_roll)
return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
str_board = ",".join([str(x) for x in return_board])
str_roll = ",".join([str(x) for x in roll])
return_string = str_board + "#" + str(total_moves) + "#" + str_roll
print(return_string)
return return_string
if __name__ == '__main__':
app.run(host = '0.0.0.0', port=35270)

View File

@ -1,78 +0,0 @@
def run_stuff(board_rep, model_name, ply)
epi_count = 0
system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
while epi_count < 200000 do
system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
epi_count += 2000
end
end
### ///////////////////////////////////////////////////////////////
# QUACK TESTINGS
### ///////////////////////////////////////////////////////////////
board_rep = "quack"
model_name = "quack_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack"
# model_name = "quack_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# QUACK-FAT TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "quack-fat"
model_name = "quack-fat_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack-fat"
# model_name = "quack-fat_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# QUACK-NORM TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "quack-norm"
model_name = "quack-norm_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack-norm"
# model_name = "quack-norm_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# TESAURO TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "tesauro"
model_name = "tesauro_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "tesauro"
# model_name = "tesauro_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)

View File

@ -1,30 +1,30 @@
#!/usr/bin/env ruby #!/usr/bin/env ruby
MODELS_DIR = 'models'
def save(model_name) def save(model_name)
require 'date' require 'date'
model_path = File.join(MODELS_DIR, model_name) models_dir = 'models'
model_path = File.join(models_dir, model_name)
if not File.exists? model_path then
return false
end
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
puts "Found model #{model_name} with episodes #{episode_count} trained!" puts "Found model #{model_name} with episodes #{episode_count} trained!"
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz" file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
save_path = File.join(MODELS_DIR, 'saves', file_name) save_path = File.join(models_dir, 'saves', file_name)
puts "Saving to #{save_path}" puts "Saving to #{save_path}"
system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name) system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
return true
end end
def train(model, episodes) def train(model, episodes)
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s) system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
end end
def force_train(model, episodes)
system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
end
def evaluate(model, episodes, method) def evaluate(model, episodes, method)
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method) system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
end end
@ -33,37 +33,15 @@ model = ARGV[0]
if model.nil? then raise "no model specified" end if model.nil? then raise "no model specified" end
if not File.exists? File.join(MODELS_DIR, model) then
force_train model, 10
save model
3.times do
evaluate model, 250, "pubeval"
end
3.times do
evaluate model, 250, "dumbeval"
end
end
# while true do
# save model
# train model, 1000
# save model
# train model, 1000
# 3.times do
# evaluate model, 250, "pubeval"
# end
# 3.times do
# evaluate model, 250, "dumbeval"
# end
# end
while true do while true do
save model save model
train model, 500 train model, 1000
5.times do save model
train model, 1000
3.times do
evaluate model, 250, "pubeval" evaluate model, 250, "pubeval"
end end
5.times do 3.times do
evaluate model, 250, "dumbeval" evaluate model, 250, "dumbeval"
end end
end end

View File

@ -16,6 +16,8 @@ class Board:
return quack.idxs_with_checkers_of_player(board, player) return quack.idxs_with_checkers_of_player(board, player)
# TODO: Write a test for this
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
# index 26 is player 1 home, index 27 is player -1 home # index 26 is player 1 home, index 27 is player -1 home
@staticmethod @staticmethod
def board_features_to_pubeval(board, player): def board_features_to_pubeval(board, player):
@ -49,6 +51,7 @@ class Board:
# board += ([1, 0] if np.sign(player) > 0 else [0, 1]) # board += ([1, 0] if np.sign(player) > 0 else [0, 1])
# return np.array(board).reshape(1,30) # return np.array(board).reshape(1,30)
# quack-fatter # quack-fatter
@staticmethod @staticmethod
def board_features_quack_norm(board, player): def board_features_quack_norm(board, player):
@ -92,64 +95,11 @@ class Board:
board_rep += bar_trans(board, player) board_rep += bar_trans(board, player)
board_rep += (15 - Board.num_of_checkers_for_player(board, player),) board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
board_rep += ([1, 0] if cur_player == 1 else [0, 1]) board_rep += ([1,0] if cur_player == 1 else [1,0])
return np.array(board_rep).reshape(1,198) return np.array(board_rep).reshape(1,198)
@staticmethod
def board_features_tesauro_fat(board, cur_player):
def ordinary_trans(val, player):
abs_val = val*player
if abs_val <= 0:
return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 1:
return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 2:
return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 3:
return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 4:
return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 5:
return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 6:
return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 7:
return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 8:
return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 9:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
elif abs_val == 10:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
elif abs_val == 11:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
elif abs_val == 12:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
elif abs_val == 13:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
elif abs_val == 14:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
elif abs_val == 15:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
def bar_trans(board, player):
if player == 1: return (abs(board[0]/2),)
elif player == -1: return (abs(board[25]/2),)
board_rep = []
for player in [1, -1]:
for x in board[1:25]:
board_rep += ordinary_trans(x, player)
board_rep += bar_trans(board, player)
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
board_rep += ([1, 0] if cur_player == 1 else [0, 1])
return np.array(board_rep).reshape(1, len(board_rep))
@staticmethod @staticmethod
def board_features_tesauro_wrong(board, cur_player): def board_features_tesauro_wrong(board, cur_player):
features = [] features = []
@ -247,6 +197,9 @@ class Board:
# Find all points with checkers on them belonging to the player # Find all points with checkers on them belonging to the player
# Iterate through each index and check if it's a possible move given the roll # Iterate through each index and check if it's a possible move given the roll
# TODO: make sure that it is not possible to do nothing on first part of
# turn and then do something with the second die
def calc_moves(board, face_value): def calc_moves(board, face_value):
if face_value == 0: if face_value == 0:
return [board] return [board]
@ -268,13 +221,23 @@ class Board:
# print("Dice permuts:",dice_permutations) # print("Dice permuts:",dice_permutations)
for roll in dice_permutations: for roll in dice_permutations:
# Calculate boards resulting from first move # Calculate boards resulting from first move
#print("initial board: ", board)
#print("roll:", roll)
#print("Rest of roll:",roll[1:])
boards = calc_moves(board, roll[0]) boards = calc_moves(board, roll[0])
#print("Boards:",boards)
#print("Roll:",roll[0])
#print("boards after first die: ", boards)
for die in roll[1:]: for die in roll[1:]:
# Calculate boards resulting from second move # Calculate boards resulting from second move
nested_boards = [calc_moves(board, die) for board in boards] nested_boards = [calc_moves(board, die) for board in boards]
#print("nested boards: ", nested_boards)
boards = [board for boards in nested_boards for board in boards] boards = [board for boards in nested_boards for board in boards]
# What the fuck
#for board in boards:
# print(board)
# print("type__:",type(board))
# Add resulting unique boards to set of legal boards resulting from roll # Add resulting unique boards to set of legal boards resulting from roll
#print("printing boards from calculate_legal_states: ", boards) #print("printing boards from calculate_legal_states: ", boards)

30
main.py
View File

@ -2,7 +2,6 @@ import argparse
import sys import sys
import os import os
import time import time
import subprocess
# Parse command line arguments # Parse command line arguments
parser = argparse.ArgumentParser(description="Backgammon games") parser = argparse.ArgumentParser(description="Backgammon games")
@ -78,20 +77,27 @@ if not os.path.isdir(model_path()):
if not os.path.isdir(log_path): if not os.path.isdir(log_path):
os.mkdir(log_path) os.mkdir(log_path)
def save_config():
import yaml
# checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
# config_path = os.path.join(checkpoint_path, 'config')
# with open(config_path, 'a+') as f:
# print("lol")
print(yaml.dump(config))
# Define helper functions # Define helper functions
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")): def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
format_vars = { 'trained_eps': trained_eps, format_vars = { 'trained_eps': trained_eps,
'count': len(outcome), 'count': len(outcome),
'sum': sum(outcome), 'sum': sum(outcome),
'mean': sum(outcome) / len(outcome), 'mean': sum(outcome) / len(outcome),
'time': int(time.time()), 'time': int(time.time()),
'average_diff_in_vals': diff_in_values, 'average_diff_in_vals': diff_in_values/len(outcome)
'commit': commit
} }
with open(log_path, 'a+') as f: with open(log_path, 'a+') as f:
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n") f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")): def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
@ -102,12 +108,9 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
:param log_path: :param log_path:
:return: :return:
""" """
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
for outcome in outcomes: for outcome in outcomes:
scores = outcome[1] scores = outcome[1]
format_vars = { 'commit': commit, format_vars = { 'trained_eps': trained_eps,
'trained_eps': trained_eps,
'method': outcome[0], 'method': outcome[0],
'count': len(scores), 'count': len(scores),
'sum': sum(scores), 'sum': sum(scores),
@ -115,10 +118,9 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
'time': int(time.time()) 'time': int(time.time())
} }
with open(log_path, 'a+') as f: with open(log_path, 'a+') as f:
f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n") f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0): def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
for outcome in outcomes: for outcome in outcomes:
scores = outcome[1] scores = outcome[1]
format_vars = { 'trained_eps': trained_eps, format_vars = { 'trained_eps': trained_eps,
@ -128,10 +130,9 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
'mean': sum(scores) / len(scores), 'mean': sum(scores) / len(scores),
'time': time, 'time': time,
'index': index, 'index': index,
'commit': commit
} }
with open(log_path, 'a+') as f: with open(log_path, 'a+') as f:
f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n") f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
def find_board_rep(): def find_board_rep():
checkpoint_path = os.path.join(config['model_storage_path'], config['model']) checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
@ -171,6 +172,7 @@ if __name__ == "__main__":
# Set up network # Set up network
from network import Network from network import Network
save_config()
# Set up variables # Set up variables
episode_count = config['episode_count'] episode_count = config['episode_count']
@ -209,8 +211,6 @@ if __name__ == "__main__":
elif args.eval: elif args.eval:
network = Network(config, config['model']) network = Network(config, config['model'])
network.restore_model()
for i in range(int(config['repeat_eval'])): for i in range(int(config['repeat_eval'])):
start_episode = network.episodes_trained start_episode = network.episodes_trained
# Evaluation measures are described in `config` # Evaluation measures are described in `config`

View File

@ -21,10 +21,10 @@ class Network:
'quack' : (28, Board.board_features_quack), 'quack' : (28, Board.board_features_quack),
'tesauro' : (198, Board.board_features_tesauro), 'tesauro' : (198, Board.board_features_tesauro),
'quack-norm' : (30, Board.board_features_quack_norm), 'quack-norm' : (30, Board.board_features_quack_norm),
'tesauro-fat' : (726, Board.board_features_tesauro_fat),
'tesauro-poop': (198, Board.board_features_tesauro_wrong) 'tesauro-poop': (198, Board.board_features_tesauro_wrong)
} }
def custom_tanh(self, x, name=None): def custom_tanh(self, x, name=None):
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
@ -39,11 +39,6 @@ class Network:
'0': self.make_move_0_ply '0': self.make_move_0_ply
} }
self.max_or_min = {
1: np.argmax,
-1: np.argmin
}
tf.enable_eager_execution() tf.enable_eager_execution()
xavier_init = tf.contrib.layers.xavier_initializer() xavier_init = tf.contrib.layers.xavier_initializer()
@ -114,11 +109,12 @@ class Network:
with tf.GradientTape() as tape: with tf.GradientTape() as tape:
value = self.model(prev_state.reshape(1,-1)) value = self.model(prev_state.reshape(1,-1))
grads = tape.gradient(value, self.model.variables) grads = tape.gradient(value, self.model.variables)
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
with tf.variable_scope('apply_gradients'):
for grad, train_var in zip(grads, self.model.variables): for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad backprop_calc = self.learning_rate * difference_in_values * grad
train_var.assign_add(backprop_calc) train_var.assign_add(backprop_calc)
@ -148,9 +144,8 @@ class Network:
:param episode_count: :param episode_count:
:return: :return:
""" """
tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt')) tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
#self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
print("[NETWK] ({name}) Saving model to:".format(name=self.name), print("[NETWK] ({name}) Saving model to:".format(name=self.name),
os.path.join(self.checkpoint_path, 'model.ckpt')) os.path.join(self.checkpoint_path, 'model.ckpt'))
@ -170,7 +165,8 @@ class Network:
:param states: A number of states. The states have to be transformed before being given to this function. :param states: A number of states. The states have to be transformed before being given to this function.
:return: :return:
""" """
return self.model.predict_on_batch(states) values = self.model.predict_on_batch(states)
return values
def restore_model(self): def restore_model(self):
@ -178,6 +174,7 @@ class Network:
Restore a model for a session, such that a trained model and either be further trained or Restore a model for a session, such that a trained model and either be further trained or
used for evaluation used for evaluation
:param sess: Current session
:return: Nothing. It's a side-effect that a model gets restored for the network. :return: Nothing. It's a side-effect that a model gets restored for the network.
""" """
@ -189,6 +186,9 @@ class Network:
str(latest_checkpoint)) str(latest_checkpoint))
tfe.Saver(self.model.variables).restore(latest_checkpoint) tfe.Saver(self.model.variables).restore(latest_checkpoint)
# variables_names = [v.name for v in self.model.variables]
# Restore trained episode count for model # Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path): if os.path.isfile(episode_count_path):
@ -211,6 +211,7 @@ class Network:
and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead. and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player. The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
:param sess:
:param board: Current board :param board: Current board
:param roll: Current roll :param roll: Current roll
:param player: Current player :param player: Current player
@ -220,12 +221,13 @@ class Network:
legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves]) legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
scores = self.model.predict_on_batch(legal_states) scores = self.model.predict_on_batch(legal_states)
transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores]
best_score_idx = self.max_or_min[player](scores) best_score_idx = np.argmax(np.array(transformed_scores))
best_move = legal_moves[best_score_idx]
best_score = scores[best_score_idx]
best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx] return [best_move, best_score]
return (best_move, best_score)
def make_move_1_ply(self, board, roll, player): def make_move_1_ply(self, board, roll, player):
""" """
@ -235,7 +237,7 @@ class Network:
:param player: :param player:
:return: :return:
""" """
start = time.time() # start = time.time()
best_pair = self.calculate_1_ply(board, roll, player) best_pair = self.calculate_1_ply(board, roll, player)
# print(time.time() - start) # print(time.time() - start)
return best_pair return best_pair
@ -246,31 +248,35 @@ class Network:
Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
all moves and scores are found for them. The expected score is then calculated for each of the boards from the all moves and scores are found for them. The expected score is then calculated for each of the boards from the
0-ply. 0-ply.
:param sess:
:param board: :param board:
:param roll: The original roll :param roll: The original roll
:param player: The current player :param player: The current player
:return: Best possible move based on 1-ply look-ahead :return: Best possible move based on 1-ply look-ahead
""" """
# find all legal states from the given board and the given roll # find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll) init_legal_states = Board.calculate_legal_states(board, player, roll)
legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states]) legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
scores = [ score.numpy() scores = self.calc_vals(legal_states)
for score scores = [score.numpy() for score in scores]
in self.calc_vals(legal_states) ]
moves_and_scores = list(zip(init_legal_states, scores)) moves_and_scores = list(zip(init_legal_states, scores))
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
best_boards = [x[0] for x in sorted_moves_and_scores[:10]] best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
scores = self.do_ply(best_boards, player)
best_score_idx = self.max_or_min[player](scores)
# best_score_idx = np.array(trans_scores).argmax()
return (best_boards[best_score_idx], scores[best_score_idx]) scores, trans_scores = self.do_ply(best_boards, player)
best_score_idx = np.array(trans_scores).argmax()
return [best_boards[best_score_idx], scores[best_score_idx]]
def do_ply(self, boards, player): def do_ply(self, boards, player):
""" """
@ -279,6 +285,7 @@ class Network:
allowing the function to search deeper, which could result in an even larger search space. If we wish allowing the function to search deeper, which could result in an even larger search space. If we wish
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply. to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
:param sess:
:param boards: The boards to try all rolls on :param boards: The boards to try all rolls on
:param player: The player of the previous ply :param player: The player of the previous ply
:return: An array of scores where each index describes one of the boards which was given as param :return: An array of scores where each index describes one of the boards which was given as param
@ -298,11 +305,11 @@ class Network:
length_list = [] length_list = []
test_list = [] test_list = []
# Prepping of data # Prepping of data
# start = time.time() start= time.time()
for board in boards: for board in boards:
length = 0 length = 0
for roll in all_rolls: for roll in all_rolls:
all_states = Board.calculate_legal_states(board, player*-1, roll) all_states = list(Board.calculate_legal_states(board, player*-1, roll))
for state in all_states: for state in all_states:
state = np.array(self.board_trans_func(state, player*-1)[0]) state = np.array(self.board_trans_func(state, player*-1)[0])
test_list.append(state) test_list.append(state)
@ -313,19 +320,146 @@ class Network:
start = time.time() start = time.time()
all_scores = self.model.predict_on_batch(np.array(test_list)) all_scores_legit = self.model.predict_on_batch(np.array(test_list))
split_scores = [] split_scores = []
from_idx = 0 from_idx = 0
for length in length_list: for length in length_list:
split_scores.append(all_scores[from_idx:from_idx+length]) split_scores.append(all_scores_legit[from_idx:from_idx+length])
from_idx += length from_idx += length
means_splits = [tf.reduce_mean(scores) for scores in split_scores] means_splits = [tf.reduce_mean(scores) for scores in split_scores]
transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits]
# print(time.time() - start) # print(time.time() - start)
# print("/"*50)
return means_splits return ([means_splits, transformed_means_splits])
def calc_n_ply(self, n_init, sess, board, player, roll):
"""
:param n_init:
:param sess:
:param board:
:param player:
:param roll:
:return:
"""
# find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll)
# find all values for the above boards
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
return best_move_score_pair
def n_ply(self, n_init, sess, boards_init, player_init):
"""
:param n_init:
:param sess:
:param boards_init:
:param player_init:
:return:
"""
def ply(n, boards, player):
def calculate_possible_states(board):
possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
(1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
(2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
(4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
(6, 6) ]
# for roll in possible_rolls:
# print(len(Board.calculate_legal_states(board, player, roll)))
return [ Board.calculate_legal_states(board, player, roll)
for roll
in possible_rolls ]
def find_best_state_score(boards):
score_pairs = [ (board, self.eval_state(sess, self.board_trans_func(board, player)))
for board
in boards ]
scores = [ pair[1]
for pair
in score_pairs ]
best_score_pair = score_pairs[np.array(scores).argmax()]
return best_score_pair
def average_score(boards):
return sum(boards)/len(boards)
def average_ply_score(board):
states_for_rolls = calculate_possible_states(board)
best_state_score_for_each_roll = [
find_best_state_score(states)
for states
in states_for_rolls ]
best_score_for_each_roll = [ x[1]
for x
in best_state_score_for_each_roll ]
average_score_var = average_score(best_score_for_each_roll)
return average_score_var
if n == 1:
average_score_pairs = [ (board, average_ply_score(board))
for board
in boards ]
return average_score_pairs
elif n > 1: # n != 1
def average_for_score_pairs(score_pairs):
scores = [ pair[1]
for pair
in score_pairs ]
return sum(scores)/len(scores)
def average_plain(scores):
return sum(scores)/len(scores)
print("+"*20)
print(n)
print(type(boards))
print(boards)
possible_states_for_boards = [
(board, calculate_possible_states(board))
for board
in boards ]
average_score_pairs = [
(inner_boards[0], average_plain([ average_for_score_pairs(ply(n - 1, inner_board, player * -1 if n == 1 else player))
for inner_board
in inner_boards[1] ]))
for inner_boards
in possible_states_for_boards ]
return average_score_pairs
else:
assert False
if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
boards_with_scores = ply(n_init, boards_init, -1 * player_init)
#print("Boards with scores:",boards_with_scores)
scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
for pair
in boards_with_scores ]
#print("All the scores:",scores)
best_score_pair = boards_with_scores[np.array(scores).argmax()]
return best_score_pair
def eval(self, episode_count, trained_eps = 0): def eval(self, episode_count, trained_eps = 0):
@ -343,6 +477,7 @@ class Network:
""" """
Do the actual evaluation Do the actual evaluation
:param sess:
:param method: Either pubeval or dumbeval :param method: Either pubeval or dumbeval
:param episodes: Amount of episodes to use in the evaluation :param episodes: Amount of episodes to use in the evaluation
:param trained_eps: :param trained_eps:
@ -366,6 +501,7 @@ class Network:
sys.stderr.write( sys.stderr.write(
"[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
if method == 'pubeval': if method == 'pubeval':
outcomes = [] outcomes = []
for i in range(1, episodes + 1): for i in range(1, episodes + 1):
@ -373,9 +509,11 @@ class Network:
board = Board.initial_state board = Board.initial_state
while Board.outcome(board) is None: while Board.outcome(board) is None:
roll = (random.randrange(1, 7), random.randrange(1, 7)) roll = (random.randrange(1, 7), random.randrange(1, 7))
board = (self.make_move(board, roll, 1))[0] board = (self.make_move(board, roll, 1))[0]
roll = (random.randrange(1, 7), random.randrange(1, 7)) roll = (random.randrange(1, 7), random.randrange(1, 7))
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26] board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -394,9 +532,11 @@ class Network:
board = Board.initial_state board = Board.initial_state
while Board.outcome(board) is None: while Board.outcome(board) is None:
roll = (random.randrange(1, 7), random.randrange(1, 7)) roll = (random.randrange(1, 7), random.randrange(1, 7))
board = (self.make_move(board, roll, 1))[0] board = (self.make_move(board, roll, 1))[0]
roll = (random.randrange(1, 7), random.randrange(1, 7)) roll = (random.randrange(1, 7), random.randrange(1, 7))
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26] board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -456,8 +596,10 @@ class Network:
:return: :return:
""" """
difference_in_vals = 0
self.restore_model() self.restore_model()
average_diffs = 0
start_time = time.time() start_time = time.time()
def print_time_estimate(eps_completed): def print_time_estimate(eps_completed):
@ -477,27 +619,28 @@ class Network:
for episode in range(1, episodes + 1): for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
# TODO decide which player should be here
# player = 1 player = 1
player = random.choice([-1,1])
prev_board = Board.initial_state prev_board = Board.initial_state
i = 0 i = 0
difference_in_values = 0
while Board.outcome(prev_board) is None: while Board.outcome(prev_board) is None:
i += 1 i += 1
self.global_step += 1 self.global_step += 1
cur_board, cur_board_value = self.make_move(prev_board, cur_board, cur_board_value = self.make_move(prev_board,
(random.randrange(1, 7), random.randrange(1, 7)), (random.randrange(1, 7), random.randrange(1, 7)),
player) player)
difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player)))) difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
if self.config['verbose']: if self.config['verbose']:
print("Difference in values:", difference_in_vals) print("Difference in values:", difference_in_vals)
print("Current board value :", cur_board_value) print("Current board value :", cur_board_value)
print("Current board is :\n",cur_board) print("Current board is :\n",cur_board)
# adjust weights # adjust weights
if Board.outcome(cur_board) is None: if Board.outcome(cur_board) is None:
self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value) self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
@ -511,10 +654,6 @@ class Network:
final_score = np.array([Board.outcome(final_board)[1]]) final_score = np.array([Board.outcome(final_board)[1]])
scaled_final_score = ((final_score + 2) / 4) scaled_final_score = ((final_score + 2) / 4)
difference_in_values += abs(scaled_final_score-cur_board_value)
average_diffs += (difference_in_values[0][0] / (i+1))
self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
sys.stderr.write("\n") sys.stderr.write("\n")
@ -527,9 +666,8 @@ class Network:
print_time_estimate(episode) print_time_estimate(episode)
sys.stderr.write("[TRAIN] Saving model for final episode...\n") sys.stderr.write("[TRAIN] Saving model for final episode...\n")
self.save_model(episode+trained_eps) self.save_model(episode+trained_eps)
return outcomes, average_diffs/len(outcomes) return outcomes, difference_in_vals[0][0]

View File

@ -57,11 +57,4 @@ boards = {initial_state,
# print(network.calculate_1_ply(Board.initial_state, [3,2], 1)) # print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
network.play_against_network()
diff = [0, 0]
val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
print(val)
diff[0] += abs(-1-val)
diff[1] += 1
print(diff[1])

View File

@ -20,22 +20,21 @@ class Player:
sets.append([Board.calculate_legal_states(board, player, [r,0]), r]) sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
total += r total += r
sets.append([Board.calculate_legal_states(board, player, [total,0]), total]) sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
print(sets)
return sets return sets
def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False): def tmp_name(self, from_board, to_board, roll, player, total_moves):
sets = self.calc_move_sets(from_board, roll, player) sets = self.calc_move_sets(from_board, roll, player)
return_board = from_board return_board = from_board
for idx, board_set in enumerate(sets): for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0]) board_set[0] = list(board_set[0])
# print(to_board) print(to_board)
# print(board_set) print(board_set)
if to_board in board_set[0]: if to_board in board_set[0]:
total_moves -= board_set[1] total_moves -= board_set[1]
# if it's not the sum of the moves # if it's not the sum of the moves
if idx < (4 if is_quad else 2): if idx < 2:
roll[idx] = 0 roll[idx] = 0
else: else:
roll = [0,0] roll = [0,0]
@ -44,11 +43,8 @@ class Player:
return total_moves, roll, return_board return total_moves, roll, return_board
def make_human_move(self, board, roll): def make_human_move(self, board, roll):
is_quad = roll[0] == roll[1] total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4 move = ""
if is_quad:
roll = [roll[0]]*4
while total_moves != 0: while total_moves != 0:
while True: while True:
print("You have {roll} left!".format(roll=total_moves)) print("You have {roll} left!".format(roll=total_moves))
@ -64,6 +60,6 @@ class Player:
print("The correct syntax is: 2/5 for a move from index 2 to 5.") print("The correct syntax is: 2/5 for a move from index 2 to 5.")
to_board = Board.apply_moves_to_board(board, self.get_sym(), move) to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad) total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
print(Board.pretty(board)) print(Board.pretty(board))
return board return board

17
test.py
View File

@ -737,23 +737,6 @@ class TestBoardFlip(unittest.TestCase):
self.assertTrue((Board.board_features_tesauro(board, 1) == self.assertTrue((Board.board_features_tesauro(board, 1) ==
np.array(expected).reshape(1, 198)).all()) np.array(expected).reshape(1, 198)).all())
def test_pubeval_features(self):
board = Board.initial_state
expected = (0,
2, 0, 0, 0, 0, -5,
0, -3, 0, 0, 0, 5,
-5, 0, 0, 0, 3, 0,
5, 0, 0, 0, 0, -2,
0,
0, 0)
import numpy as np
self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
np.array(expected).reshape(1, 28)).all())
self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
np.array(expected).reshape(1, 28)).all())
def test_tesauro_bars(self): def test_tesauro_bars(self):
board = list(Board.initial_state) board = list(Board.initial_state)
board[1] = 0 board[1] = 0