Compare commits

..

No commits in common. "master" and "eager_eval" have entirely different histories.

9 changed files with 247 additions and 415 deletions

141
app.py
View File

@ -1,141 +0,0 @@
from flask import Flask, request, jsonify
from flask_json import FlaskJSON, as_json_p
from flask_cors import CORS
from board import Board
from eval import Eval
import main
import random
from network import Network
app = Flask(__name__)
app.config['JSON_ADD_STATUS'] = False
app.config['JSON_JSONP_OPTIONAL'] = False
json = FlaskJSON(app)
CORS(app)
config = main.config.copy()
config['model'] = "player_testings"
config['ply'] = "0"
config['board_representation'] = 'tesauro'
network = Network(config, config['model'])
network.restore_model()
def calc_move_sets(from_board, roll, player):
board = from_board
sets = []
total = 0
for r in roll:
# print("Value of r:", r)
sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
total += r
sets.append([Board.calculate_legal_states(board, player, roll), total])
return sets
def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
sets = calc_move_sets(from_board, roll, player)
return_board = from_board
print("To board:\n",to_board)
print("All sets:\n",sets)
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
# print(to_board)
# print(board_set)
if to_board in board_set[0]:
# print("To board:", to_board)
# print(board_set[0])
# print(board_set[1])
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < (4 if is_quad else 2):
roll[idx] = 0
else:
roll = [0, 0]
return_board = to_board
break
# print("Return board!:\n",return_board)
return total_moves, roll, return_board
def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
return board, total_moves, roll
@app.route('/get_board', methods=['GET'])
@as_json_p
def get_board():
return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
def check_move(prev, curr):
# TODO: Decide on player system and implement roll properly
legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
truth_list = [list(curr) == list(ele) for ele in legal_states]
return any(truth_list)
@app.route('/bot_move', methods=['POST'])
def bot_move():
data = request.get_json(force=True)
board = [int(x) for x in data['board'].split(',')]
use_pubeval = bool(data['pubeval'])
roll = (random.randrange(1, 7), random.randrange(1, 7))
if use_pubeval:
board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
else:
board, _ = network.make_move(tuple(board), roll, 1)
# print("Board!:",board)
return ",".join([str(x) for x in list(board)])
@app.route('/post_board', methods=['POST'])
def post_board():
data = request.get_json(force=True)
# TODO: Fix hardcoded player
player = -1
board = [int(x) for x in data['board'].split(',')]
prev_board = [int(x) for x in data['prevBoard'].split(',')]
print(data['roll'])
roll = [int(x) for x in data['roll'].split(',')]
print(roll)
quad = data['quad'] == "true"
# print(board)
total_roll = int(data['totalRoll'])
print("total roll is:", total_roll)
return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
str_board = ",".join([str(x) for x in return_board])
str_roll = ",".join([str(x) for x in roll])
return_string = str_board + "#" + str(total_moves) + "#" + str_roll
print(return_string)
return return_string
if __name__ == '__main__':
app.run(host = '0.0.0.0', port=35270)

View File

@ -1,78 +0,0 @@
def run_stuff(board_rep, model_name, ply)
epi_count = 0
system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
while epi_count < 200000 do
system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
epi_count += 2000
end
end
### ///////////////////////////////////////////////////////////////
# QUACK TESTINGS
### ///////////////////////////////////////////////////////////////
board_rep = "quack"
model_name = "quack_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack"
# model_name = "quack_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# QUACK-FAT TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "quack-fat"
model_name = "quack-fat_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack-fat"
# model_name = "quack-fat_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# QUACK-NORM TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "quack-norm"
model_name = "quack-norm_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack-norm"
# model_name = "quack-norm_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# TESAURO TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "tesauro"
model_name = "tesauro_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "tesauro"
# model_name = "tesauro_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)

View File

@ -1,30 +1,30 @@
#!/usr/bin/env ruby
MODELS_DIR = 'models'
def save(model_name)
require 'date'
model_path = File.join(MODELS_DIR, model_name)
models_dir = 'models'
model_path = File.join(models_dir, model_name)
if not File.exists? model_path then
return false
end
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
puts "Found model #{model_name} with episodes #{episode_count} trained!"
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
save_path = File.join(MODELS_DIR, 'saves', file_name)
save_path = File.join(models_dir, 'saves', file_name)
puts "Saving to #{save_path}"
system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
return true
end
def train(model, episodes)
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
end
def force_train(model, episodes)
system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
end
def evaluate(model, episodes, method)
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
end
@ -33,37 +33,15 @@ model = ARGV[0]
if model.nil? then raise "no model specified" end
if not File.exists? File.join(MODELS_DIR, model) then
force_train model, 10
save model
3.times do
evaluate model, 250, "pubeval"
end
3.times do
evaluate model, 250, "dumbeval"
end
end
# while true do
# save model
# train model, 1000
# save model
# train model, 1000
# 3.times do
# evaluate model, 250, "pubeval"
# end
# 3.times do
# evaluate model, 250, "dumbeval"
# end
# end
while true do
save model
train model, 500
5.times do
train model, 1000
save model
train model, 1000
3.times do
evaluate model, 250, "pubeval"
end
5.times do
3.times do
evaluate model, 250, "dumbeval"
end
end

View File

@ -15,7 +15,9 @@ class Board:
def idxs_with_checkers_of_player(board, player):
return quack.idxs_with_checkers_of_player(board, player)
# TODO: Write a test for this
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
# index 26 is player 1 home, index 27 is player -1 home
@staticmethod
def board_features_to_pubeval(board, player):
@ -49,6 +51,7 @@ class Board:
# board += ([1, 0] if np.sign(player) > 0 else [0, 1])
# return np.array(board).reshape(1,30)
# quack-fatter
@staticmethod
def board_features_quack_norm(board, player):
@ -63,7 +66,7 @@ class Board:
board.append(15 - sum(positives))
board.append(-15 - sum(negatives))
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
return np.array(board).reshape(1, 30)
return np.array(board).reshape(1,30)
# tesauro
@staticmethod
@ -92,62 +95,9 @@ class Board:
board_rep += bar_trans(board, player)
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
board_rep += ([1, 0] if cur_player == 1 else [0, 1])
board_rep += ([1,0] if cur_player == 1 else [1,0])
return np.array(board_rep).reshape(1, 198)
@staticmethod
def board_features_tesauro_fat(board, cur_player):
def ordinary_trans(val, player):
abs_val = val*player
if abs_val <= 0:
return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 1:
return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 2:
return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 3:
return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 4:
return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 5:
return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 6:
return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 7:
return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 8:
return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 9:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
elif abs_val == 10:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
elif abs_val == 11:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
elif abs_val == 12:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
elif abs_val == 13:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
elif abs_val == 14:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
elif abs_val == 15:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
def bar_trans(board, player):
if player == 1: return (abs(board[0]/2),)
elif player == -1: return (abs(board[25]/2),)
board_rep = []
for player in [1, -1]:
for x in board[1:25]:
board_rep += ordinary_trans(x, player)
board_rep += bar_trans(board, player)
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
board_rep += ([1, 0] if cur_player == 1 else [0, 1])
return np.array(board_rep).reshape(1, len(board_rep))
return np.array(board_rep).reshape(1,198)
@staticmethod
@ -247,6 +197,9 @@ class Board:
# Find all points with checkers on them belonging to the player
# Iterate through each index and check if it's a possible move given the roll
# TODO: make sure that it is not possible to do nothing on first part of
# turn and then do something with the second die
def calc_moves(board, face_value):
if face_value == 0:
return [board]
@ -268,13 +221,23 @@ class Board:
# print("Dice permuts:",dice_permutations)
for roll in dice_permutations:
# Calculate boards resulting from first move
#print("initial board: ", board)
#print("roll:", roll)
#print("Rest of roll:",roll[1:])
boards = calc_moves(board, roll[0])
#print("Boards:",boards)
#print("Roll:",roll[0])
#print("boards after first die: ", boards)
for die in roll[1:]:
# Calculate boards resulting from second move
nested_boards = [calc_moves(board, die) for board in boards]
#print("nested boards: ", nested_boards)
boards = [board for boards in nested_boards for board in boards]
# What the fuck
#for board in boards:
# print(board)
# print("type__:",type(board))
# Add resulting unique boards to set of legal boards resulting from roll
#print("printing boards from calculate_legal_states: ", boards)

30
main.py
View File

@ -2,7 +2,6 @@ import argparse
import sys
import os
import time
import subprocess
# Parse command line arguments
parser = argparse.ArgumentParser(description="Backgammon games")
@ -78,20 +77,27 @@ if not os.path.isdir(model_path()):
if not os.path.isdir(log_path):
os.mkdir(log_path)
def save_config():
import yaml
# checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
# config_path = os.path.join(checkpoint_path, 'config')
# with open(config_path, 'a+') as f:
# print("lol")
print(yaml.dump(config))
# Define helper functions
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
format_vars = { 'trained_eps': trained_eps,
'count': len(outcome),
'sum': sum(outcome),
'mean': sum(outcome) / len(outcome),
'time': int(time.time()),
'average_diff_in_vals': diff_in_values,
'commit': commit
'average_diff_in_vals': diff_in_values/len(outcome)
}
with open(log_path, 'a+') as f:
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
@ -102,12 +108,9 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
:param log_path:
:return:
"""
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'commit': commit,
'trained_eps': trained_eps,
format_vars = { 'trained_eps': trained_eps,
'method': outcome[0],
'count': len(scores),
'sum': sum(scores),
@ -115,10 +118,9 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
'time': int(time.time())
}
with open(log_path, 'a+') as f:
f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'trained_eps': trained_eps,
@ -128,10 +130,9 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
'mean': sum(scores) / len(scores),
'time': time,
'index': index,
'commit': commit
}
with open(log_path, 'a+') as f:
f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")
f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
def find_board_rep():
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
@ -171,6 +172,7 @@ if __name__ == "__main__":
# Set up network
from network import Network
save_config()
# Set up variables
episode_count = config['episode_count']
@ -209,8 +211,6 @@ if __name__ == "__main__":
elif args.eval:
network = Network(config, config['model'])
network.restore_model()
for i in range(int(config['repeat_eval'])):
start_episode = network.episodes_trained
# Evaluation measures are described in `config`

View File

@ -21,10 +21,10 @@ class Network:
'quack' : (28, Board.board_features_quack),
'tesauro' : (198, Board.board_features_tesauro),
'quack-norm' : (30, Board.board_features_quack_norm),
'tesauro-fat' : (726, Board.board_features_tesauro_fat),
'tesauro-poop': (198, Board.board_features_tesauro_wrong)
}
def custom_tanh(self, x, name=None):
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
@ -39,11 +39,6 @@ class Network:
'0': self.make_move_0_ply
}
self.max_or_min = {
1: np.argmax,
-1: np.argmin
}
tf.enable_eager_execution()
xavier_init = tf.contrib.layers.xavier_initializer()
@ -98,7 +93,7 @@ class Network:
:param decay_steps: The amount of steps between each decay
:return: The result of the exponential decay performed on the learning rate
"""
res = max_lr * decay_rate ** (global_step // decay_steps)
res = max_lr * decay_rate**(global_step // decay_steps)
return res
def do_backprop(self, prev_state, value_next):
@ -109,19 +104,20 @@ class Network:
:return: Nothing, the calculation is performed on the model of the network
"""
self.learning_rate = tf.maximum(self.min_learning_rate,
self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
name="learning_rate")
self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
name="learning_rate")
with tf.GradientTape() as tape:
value = self.model(prev_state.reshape(1,-1))
grads = tape.gradient(value, self.model.variables)
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad
train_var.assign_add(backprop_calc)
with tf.variable_scope('apply_gradients'):
for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad
train_var.assign_add(backprop_calc)
@ -148,9 +144,8 @@ class Network:
:param episode_count:
:return:
"""
tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
#self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
print("[NETWK] ({name}) Saving model to:".format(name=self.name),
os.path.join(self.checkpoint_path, 'model.ckpt'))
@ -170,14 +165,16 @@ class Network:
:param states: A number of states. The states have to be transformed before being given to this function.
:return:
"""
return self.model.predict_on_batch(states)
values = self.model.predict_on_batch(states)
return values
def restore_model(self):
"""
Restore a model for a session, such that a trained model and either be further trained or
used for evaluation
:param sess: Current session
:return: Nothing. It's a side-effect that a model gets restored for the network.
"""
@ -189,6 +186,9 @@ class Network:
str(latest_checkpoint))
tfe.Saver(self.model.variables).restore(latest_checkpoint)
# variables_names = [v.name for v in self.model.variables]
# Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path):
@ -211,6 +211,7 @@ class Network:
and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
:param sess:
:param board: Current board
:param roll: Current roll
:param player: Current player
@ -220,12 +221,13 @@ class Network:
legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
scores = self.model.predict_on_batch(legal_states)
transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores]
best_score_idx = self.max_or_min[player](scores)
best_score_idx = np.argmax(np.array(transformed_scores))
best_move = legal_moves[best_score_idx]
best_score = scores[best_score_idx]
best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
return (best_move, best_score)
return [best_move, best_score]
def make_move_1_ply(self, board, roll, player):
"""
@ -235,9 +237,9 @@ class Network:
:param player:
:return:
"""
start = time.time()
# start = time.time()
best_pair = self.calculate_1_ply(board, roll, player)
#print(time.time() - start)
# print(time.time() - start)
return best_pair
@ -246,31 +248,35 @@ class Network:
Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
all moves and scores are found for them. The expected score is then calculated for each of the boards from the
0-ply.
:param sess:
:param board:
:param roll: The original roll
:param player: The current player
:return: Best possible move based on 1-ply look-ahead
"""
# find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll)
legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
scores = [ score.numpy()
for score
in self.calc_vals(legal_states) ]
scores = self.calc_vals(legal_states)
scores = [score.numpy() for score in scores]
moves_and_scores = list(zip(init_legal_states, scores))
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]
scores = self.do_ply(best_boards, player)
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
best_score_idx = self.max_or_min[player](scores)
# best_score_idx = np.array(trans_scores).argmax()
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
return (best_boards[best_score_idx], scores[best_score_idx])
scores, trans_scores = self.do_ply(best_boards, player)
best_score_idx = np.array(trans_scores).argmax()
return [best_boards[best_score_idx], scores[best_score_idx]]
def do_ply(self, boards, player):
"""
@ -279,6 +285,7 @@ class Network:
allowing the function to search deeper, which could result in an even larger search space. If we wish
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
:param sess:
:param boards: The boards to try all rolls on
:param player: The player of the previous ply
:return: An array of scores where each index describes one of the boards which was given as param
@ -298,11 +305,11 @@ class Network:
length_list = []
test_list = []
# Prepping of data
# start = time.time()
start= time.time()
for board in boards:
length = 0
for roll in all_rolls:
all_states = Board.calculate_legal_states(board, player*-1, roll)
all_states = list(Board.calculate_legal_states(board, player*-1, roll))
for state in all_states:
state = np.array(self.board_trans_func(state, player*-1)[0])
test_list.append(state)
@ -313,19 +320,146 @@ class Network:
start = time.time()
all_scores = self.model.predict_on_batch(np.array(test_list))
all_scores_legit = self.model.predict_on_batch(np.array(test_list))
split_scores = []
from_idx = 0
for length in length_list:
split_scores.append(all_scores[from_idx:from_idx+length])
split_scores.append(all_scores_legit[from_idx:from_idx+length])
from_idx += length
means_splits = [tf.reduce_mean(scores) for scores in split_scores]
transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits]
# print(time.time() - start)
# print("/"*50)
return means_splits
return ([means_splits, transformed_means_splits])
def calc_n_ply(self, n_init, sess, board, player, roll):
"""
:param n_init:
:param sess:
:param board:
:param player:
:param roll:
:return:
"""
# find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll)
# find all values for the above boards
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
return best_move_score_pair
def n_ply(self, n_init, sess, boards_init, player_init):
"""
:param n_init:
:param sess:
:param boards_init:
:param player_init:
:return:
"""
def ply(n, boards, player):
def calculate_possible_states(board):
possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
(1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
(2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
(4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
(6, 6) ]
# for roll in possible_rolls:
# print(len(Board.calculate_legal_states(board, player, roll)))
return [ Board.calculate_legal_states(board, player, roll)
for roll
in possible_rolls ]
def find_best_state_score(boards):
score_pairs = [ (board, self.eval_state(sess, self.board_trans_func(board, player)))
for board
in boards ]
scores = [ pair[1]
for pair
in score_pairs ]
best_score_pair = score_pairs[np.array(scores).argmax()]
return best_score_pair
def average_score(boards):
return sum(boards)/len(boards)
def average_ply_score(board):
states_for_rolls = calculate_possible_states(board)
best_state_score_for_each_roll = [
find_best_state_score(states)
for states
in states_for_rolls ]
best_score_for_each_roll = [ x[1]
for x
in best_state_score_for_each_roll ]
average_score_var = average_score(best_score_for_each_roll)
return average_score_var
if n == 1:
average_score_pairs = [ (board, average_ply_score(board))
for board
in boards ]
return average_score_pairs
elif n > 1: # n != 1
def average_for_score_pairs(score_pairs):
scores = [ pair[1]
for pair
in score_pairs ]
return sum(scores)/len(scores)
def average_plain(scores):
return sum(scores)/len(scores)
print("+"*20)
print(n)
print(type(boards))
print(boards)
possible_states_for_boards = [
(board, calculate_possible_states(board))
for board
in boards ]
average_score_pairs = [
(inner_boards[0], average_plain([ average_for_score_pairs(ply(n - 1, inner_board, player * -1 if n == 1 else player))
for inner_board
in inner_boards[1] ]))
for inner_boards
in possible_states_for_boards ]
return average_score_pairs
else:
assert False
if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
boards_with_scores = ply(n_init, boards_init, -1 * player_init)
#print("Boards with scores:",boards_with_scores)
scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
for pair
in boards_with_scores ]
#print("All the scores:",scores)
best_score_pair = boards_with_scores[np.array(scores).argmax()]
return best_score_pair
def eval(self, episode_count, trained_eps = 0):
@ -343,6 +477,7 @@ class Network:
"""
Do the actual evaluation
:param sess:
:param method: Either pubeval or dumbeval
:param episodes: Amount of episodes to use in the evaluation
:param trained_eps:
@ -366,6 +501,7 @@ class Network:
sys.stderr.write(
"[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
if method == 'pubeval':
outcomes = []
for i in range(1, episodes + 1):
@ -373,9 +509,11 @@ class Network:
board = Board.initial_state
while Board.outcome(board) is None:
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = (self.make_move(board, roll, 1))[0]
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -394,9 +532,11 @@ class Network:
board = Board.initial_state
while Board.outcome(board) is None:
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = (self.make_move(board, roll, 1))[0]
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -456,8 +596,10 @@ class Network:
:return:
"""
difference_in_vals = 0
self.restore_model()
average_diffs = 0
start_time = time.time()
def print_time_estimate(eps_completed):
@ -477,27 +619,28 @@ class Network:
for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
# TODO decide which player should be here
# player = 1
player = random.choice([-1,1])
player = 1
prev_board = Board.initial_state
i = 0
difference_in_values = 0
while Board.outcome(prev_board) is None:
i += 1
self.global_step += 1
cur_board, cur_board_value = self.make_move(prev_board,
(random.randrange(1, 7), random.randrange(1, 7)),
player)
difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
if self.config['verbose']:
print("Difference in values:", difference_in_vals)
print("Current board value :", cur_board_value)
print("Current board is :\n",cur_board)
# adjust weights
if Board.outcome(cur_board) is None:
self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
@ -511,10 +654,6 @@ class Network:
final_score = np.array([Board.outcome(final_board)[1]])
scaled_final_score = ((final_score + 2) / 4)
difference_in_values += abs(scaled_final_score-cur_board_value)
average_diffs += (difference_in_values[0][0] / (i+1))
self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
sys.stderr.write("\n")
@ -527,9 +666,8 @@ class Network:
print_time_estimate(episode)
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
self.save_model(episode+trained_eps)
return outcomes, average_diffs/len(outcomes)
return outcomes, difference_in_vals[0][0]

View File

@ -57,11 +57,4 @@ boards = {initial_state,
# print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
diff = [0, 0]
val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
print(val)
diff[0] += abs(-1-val)
diff[1] += 1
print(diff[1])
network.play_against_network()

View File

@ -20,22 +20,21 @@ class Player:
sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
total += r
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
print(sets)
return sets
def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
def tmp_name(self, from_board, to_board, roll, player, total_moves):
sets = self.calc_move_sets(from_board, roll, player)
return_board = from_board
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
# print(to_board)
# print(board_set)
print(to_board)
print(board_set)
if to_board in board_set[0]:
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < (4 if is_quad else 2):
if idx < 2:
roll[idx] = 0
else:
roll = [0,0]
@ -44,11 +43,8 @@ class Player:
return total_moves, roll, return_board
def make_human_move(self, board, roll):
is_quad = roll[0] == roll[1]
total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
if is_quad:
roll = [roll[0]]*4
total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
move = ""
while total_moves != 0:
while True:
print("You have {roll} left!".format(roll=total_moves))
@ -64,6 +60,6 @@ class Player:
print("The correct syntax is: 2/5 for a move from index 2 to 5.")
to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
print(Board.pretty(board))
return board

17
test.py
View File

@ -737,23 +737,6 @@ class TestBoardFlip(unittest.TestCase):
self.assertTrue((Board.board_features_tesauro(board, 1) ==
np.array(expected).reshape(1, 198)).all())
def test_pubeval_features(self):
board = Board.initial_state
expected = (0,
2, 0, 0, 0, 0, -5,
0, -3, 0, 0, 0, 5,
-5, 0, 0, 0, 3, 0,
5, 0, 0, 0, 0, -2,
0,
0, 0)
import numpy as np
self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
np.array(expected).reshape(1, 28)).all())
self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
np.array(expected).reshape(1, 28)).all())
def test_tesauro_bars(self):
board = list(Board.initial_state)
board[1] = 0