Compare commits

..

27 Commits

Author SHA1 Message Date
ea4efc5a2b Updated server code. 2018-06-07 21:36:06 +02:00
26c0b469eb restore restore_model 2018-05-22 20:49:10 +02:00
f170bad9b1 tesauro fat and diffs in values 2018-05-22 15:39:14 +02:00
6e061171da rm TODO 2018-05-22 15:38:04 +02:00
40c228ef01 pubeval tests 2018-05-22 15:36:23 +02:00
c2c6c89e9f Merge branch 'experimentation' into 'master'
Experimentation

See merge request Pownie/backgammon!8
2018-05-22 13:16:10 +00:00
b7708b3675 train-evaluate-save 2018-05-22 15:15:36 +02:00
bad870c27a update 0-ply-tests 2018-05-22 15:15:15 +02:00
653d6e30a8 add missing comma 2018-05-22 15:12:47 +02:00
7e51b44e33 Merge branch 'experimentation' into 'master'
tesauro fat and diffs in values

See merge request Pownie/backgammon!7
2018-05-22 13:12:10 +00:00
1fd6c35baa Merge branch 'master' into 'experimentation'
# Conflicts:
#   main.py
2018-05-22 13:11:43 +00:00
d426c1c3b5 tesauro fat and diffs in values 2018-05-22 15:10:41 +02:00
5ab144cffc add git commit status to all logs 2018-05-22 14:44:13 +02:00
cef8e54709 Merge branch 'master' of gitfub.space:Pownie/backgammon 2018-05-22 14:37:46 +02:00
2efbc446f2 log git commit status in evaluation logs 2018-05-22 14:37:27 +02:00
c54f7aca24 Merge branch 'experimentation' into 'master'
Experimentation

See merge request Pownie/backgammon!6
2018-05-22 12:36:37 +00:00
c31bc39780 More server 2018-05-22 00:26:32 +02:00
6133cb439f Merge remote-tracking branch 'origin/experimentation' into experimentation 2018-05-20 20:15:57 +02:00
5acd79b6da Slight modification to move calculation 2018-05-20 19:43:28 +02:00
=
b11e783b30 add 0-ply-tests 2018-05-20 18:50:28 +02:00
f834b10e02 remove unnecessary print 2018-05-20 16:52:05 +02:00
72f01a2a2d remove dependency on yaml 2018-05-20 16:03:58 +02:00
d14e6c5994 Everything might work, except for quad, that might be bugged. 2018-05-20 00:38:13 +02:00
a266293ecd Stuff is happening, moving is better! 2018-05-19 22:01:55 +02:00
e9a46c79df server and stuff 2018-05-19 14:12:13 +02:00
816cdfae00 fix and clean 2018-05-18 14:55:10 +02:00
ff9664eb38 Merge branch 'eager_eval' into 'master'
Eager eval

See merge request Pownie/backgammon!5
2018-05-18 12:06:12 +00:00
9 changed files with 414 additions and 246 deletions

141
app.py Normal file
View File

@ -0,0 +1,141 @@
from flask import Flask, request, jsonify
from flask_json import FlaskJSON, as_json_p
from flask_cors import CORS
from board import Board
from eval import Eval
import main
import random
from network import Network
app = Flask(__name__)
app.config['JSON_ADD_STATUS'] = False
app.config['JSON_JSONP_OPTIONAL'] = False
json = FlaskJSON(app)
CORS(app)
config = main.config.copy()
config['model'] = "player_testings"
config['ply'] = "0"
config['board_representation'] = 'tesauro'
network = Network(config, config['model'])
network.restore_model()
def calc_move_sets(from_board, roll, player):
board = from_board
sets = []
total = 0
for r in roll:
# print("Value of r:", r)
sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
total += r
sets.append([Board.calculate_legal_states(board, player, roll), total])
return sets
def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
sets = calc_move_sets(from_board, roll, player)
return_board = from_board
print("To board:\n",to_board)
print("All sets:\n",sets)
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
# print(to_board)
# print(board_set)
if to_board in board_set[0]:
# print("To board:", to_board)
# print(board_set[0])
# print(board_set[1])
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < (4 if is_quad else 2):
roll[idx] = 0
else:
roll = [0, 0]
return_board = to_board
break
# print("Return board!:\n",return_board)
return total_moves, roll, return_board
def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
return board, total_moves, roll
@app.route('/get_board', methods=['GET'])
@as_json_p
def get_board():
return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
def check_move(prev, curr):
# TODO: Decide on player system and implement roll properly
legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
truth_list = [list(curr) == list(ele) for ele in legal_states]
return any(truth_list)
@app.route('/bot_move', methods=['POST'])
def bot_move():
data = request.get_json(force=True)
board = [int(x) for x in data['board'].split(',')]
use_pubeval = bool(data['pubeval'])
roll = (random.randrange(1, 7), random.randrange(1, 7))
if use_pubeval:
board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
else:
board, _ = network.make_move(tuple(board), roll, 1)
# print("Board!:",board)
return ",".join([str(x) for x in list(board)])
@app.route('/post_board', methods=['POST'])
def post_board():
data = request.get_json(force=True)
# TODO: Fix hardcoded player
player = -1
board = [int(x) for x in data['board'].split(',')]
prev_board = [int(x) for x in data['prevBoard'].split(',')]
print(data['roll'])
roll = [int(x) for x in data['roll'].split(',')]
print(roll)
quad = data['quad'] == "true"
# print(board)
total_roll = int(data['totalRoll'])
print("total roll is:", total_roll)
return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
str_board = ",".join([str(x) for x in return_board])
str_roll = ",".join([str(x) for x in roll])
return_string = str_board + "#" + str(total_moves) + "#" + str_roll
print(return_string)
return return_string
if __name__ == '__main__':
app.run(host = '0.0.0.0', port=35270)

78
bin/0-ply-tests.rb Normal file
View File

@ -0,0 +1,78 @@
def run_stuff(board_rep, model_name, ply)
epi_count = 0
system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
while epi_count < 200000 do
system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
epi_count += 2000
end
end
### ///////////////////////////////////////////////////////////////
# QUACK TESTINGS
### ///////////////////////////////////////////////////////////////
board_rep = "quack"
model_name = "quack_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack"
# model_name = "quack_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# QUACK-FAT TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "quack-fat"
model_name = "quack-fat_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack-fat"
# model_name = "quack-fat_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# QUACK-NORM TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "quack-norm"
model_name = "quack-norm_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "quack-norm"
# model_name = "quack-norm_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)
### ///////////////////////////////////////////////////////////////
# TESAURO TESTING
### ///////////////////////////////////////////////////////////////
board_rep = "tesauro"
model_name = "tesauro_test_0_ply"
ply = 0
run_stuff(board_rep, model_name, ply)
# board_rep = "tesauro"
# model_name = "tesauro_test_1_ply"
# ply = 1
# run_stuff(board_rep, model_name, ply)

View File

@ -1,30 +1,30 @@
#!/usr/bin/env ruby
MODELS_DIR = 'models'
def save(model_name)
require 'date'
models_dir = 'models'
model_path = File.join(models_dir, model_name)
if not File.exists? model_path then
return false
end
model_path = File.join(MODELS_DIR, model_name)
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
puts "Found model #{model_name} with episodes #{episode_count} trained!"
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
save_path = File.join(models_dir, 'saves', file_name)
save_path = File.join(MODELS_DIR, 'saves', file_name)
puts "Saving to #{save_path}"
system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
return true
system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
end
def train(model, episodes)
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
end
def force_train(model, episodes)
system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
end
def evaluate(model, episodes, method)
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
end
@ -33,11 +33,9 @@ model = ARGV[0]
if model.nil? then raise "no model specified" end
while true do
if not File.exists? File.join(MODELS_DIR, model) then
force_train model, 10
save model
train model, 1000
save model
train model, 1000
3.times do
evaluate model, 250, "pubeval"
end
@ -45,3 +43,27 @@ while true do
evaluate model, 250, "dumbeval"
end
end
# while true do
# save model
# train model, 1000
# save model
# train model, 1000
# 3.times do
# evaluate model, 250, "pubeval"
# end
# 3.times do
# evaluate model, 250, "dumbeval"
# end
# end
while true do
save model
train model, 500
5.times do
evaluate model, 250, "pubeval"
end
5.times do
evaluate model, 250, "dumbeval"
end
end

View File

@ -15,9 +15,7 @@ class Board:
def idxs_with_checkers_of_player(board, player):
return quack.idxs_with_checkers_of_player(board, player)
# TODO: Write a test for this
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
# index 26 is player 1 home, index 27 is player -1 home
@staticmethod
def board_features_to_pubeval(board, player):
@ -51,7 +49,6 @@ class Board:
# board += ([1, 0] if np.sign(player) > 0 else [0, 1])
# return np.array(board).reshape(1,30)
# quack-fatter
@staticmethod
def board_features_quack_norm(board, player):
@ -66,7 +63,7 @@ class Board:
board.append(15 - sum(positives))
board.append(-15 - sum(negatives))
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
return np.array(board).reshape(1,30)
return np.array(board).reshape(1, 30)
# tesauro
@staticmethod
@ -95,9 +92,62 @@ class Board:
board_rep += bar_trans(board, player)
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
board_rep += ([1,0] if cur_player == 1 else [1,0])
board_rep += ([1, 0] if cur_player == 1 else [0, 1])
return np.array(board_rep).reshape(1,198)
return np.array(board_rep).reshape(1, 198)
@staticmethod
def board_features_tesauro_fat(board, cur_player):
def ordinary_trans(val, player):
abs_val = val*player
if abs_val <= 0:
return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 1:
return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 2:
return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 3:
return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 4:
return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 5:
return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 6:
return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 7:
return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 8:
return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
elif abs_val == 9:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
elif abs_val == 10:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
elif abs_val == 11:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
elif abs_val == 12:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
elif abs_val == 13:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
elif abs_val == 14:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
elif abs_val == 15:
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
def bar_trans(board, player):
if player == 1: return (abs(board[0]/2),)
elif player == -1: return (abs(board[25]/2),)
board_rep = []
for player in [1, -1]:
for x in board[1:25]:
board_rep += ordinary_trans(x, player)
board_rep += bar_trans(board, player)
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
board_rep += ([1, 0] if cur_player == 1 else [0, 1])
return np.array(board_rep).reshape(1, len(board_rep))
@staticmethod
@ -197,9 +247,6 @@ class Board:
# Find all points with checkers on them belonging to the player
# Iterate through each index and check if it's a possible move given the roll
# TODO: make sure that it is not possible to do nothing on first part of
# turn and then do something with the second die
def calc_moves(board, face_value):
if face_value == 0:
return [board]
@ -221,23 +268,13 @@ class Board:
# print("Dice permuts:",dice_permutations)
for roll in dice_permutations:
# Calculate boards resulting from first move
#print("initial board: ", board)
#print("roll:", roll)
#print("Rest of roll:",roll[1:])
boards = calc_moves(board, roll[0])
#print("Boards:",boards)
#print("Roll:",roll[0])
#print("boards after first die: ", boards)
for die in roll[1:]:
# Calculate boards resulting from second move
nested_boards = [calc_moves(board, die) for board in boards]
#print("nested boards: ", nested_boards)
boards = [board for boards in nested_boards for board in boards]
# What the fuck
#for board in boards:
# print(board)
# print("type__:",type(board))
# Add resulting unique boards to set of legal boards resulting from roll
#print("printing boards from calculate_legal_states: ", boards)

30
main.py
View File

@ -2,6 +2,7 @@ import argparse
import sys
import os
import time
import subprocess
# Parse command line arguments
parser = argparse.ArgumentParser(description="Backgammon games")
@ -77,27 +78,20 @@ if not os.path.isdir(model_path()):
if not os.path.isdir(log_path):
os.mkdir(log_path)
def save_config():
import yaml
# checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
# config_path = os.path.join(checkpoint_path, 'config')
# with open(config_path, 'a+') as f:
# print("lol")
print(yaml.dump(config))
# Define helper functions
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
format_vars = { 'trained_eps': trained_eps,
'count': len(outcome),
'sum': sum(outcome),
'mean': sum(outcome) / len(outcome),
'time': int(time.time()),
'average_diff_in_vals': diff_in_values/len(outcome)
'average_diff_in_vals': diff_in_values,
'commit': commit
}
with open(log_path, 'a+') as f:
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
@ -108,9 +102,12 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
:param log_path:
:return:
"""
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'trained_eps': trained_eps,
format_vars = { 'commit': commit,
'trained_eps': trained_eps,
'method': outcome[0],
'count': len(scores),
'sum': sum(scores),
@ -118,9 +115,10 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
'time': int(time.time())
}
with open(log_path, 'a+') as f:
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'trained_eps': trained_eps,
@ -130,9 +128,10 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
'mean': sum(scores) / len(scores),
'time': time,
'index': index,
'commit': commit
}
with open(log_path, 'a+') as f:
f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")
def find_board_rep():
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
@ -172,7 +171,6 @@ if __name__ == "__main__":
# Set up network
from network import Network
save_config()
# Set up variables
episode_count = config['episode_count']
@ -211,6 +209,8 @@ if __name__ == "__main__":
elif args.eval:
network = Network(config, config['model'])
network.restore_model()
for i in range(int(config['repeat_eval'])):
start_episode = network.episodes_trained
# Evaluation measures are described in `config`

View File

@ -21,10 +21,10 @@ class Network:
'quack' : (28, Board.board_features_quack),
'tesauro' : (198, Board.board_features_tesauro),
'quack-norm' : (30, Board.board_features_quack_norm),
'tesauro-fat' : (726, Board.board_features_tesauro_fat),
'tesauro-poop': (198, Board.board_features_tesauro_wrong)
}
def custom_tanh(self, x, name=None):
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
@ -39,6 +39,11 @@ class Network:
'0': self.make_move_0_ply
}
self.max_or_min = {
1: np.argmax,
-1: np.argmin
}
tf.enable_eager_execution()
xavier_init = tf.contrib.layers.xavier_initializer()
@ -93,7 +98,7 @@ class Network:
:param decay_steps: The amount of steps between each decay
:return: The result of the exponential decay performed on the learning rate
"""
res = max_lr * decay_rate**(global_step // decay_steps)
res = max_lr * decay_rate ** (global_step // decay_steps)
return res
def do_backprop(self, prev_state, value_next):
@ -104,20 +109,19 @@ class Network:
:return: Nothing, the calculation is performed on the model of the network
"""
self.learning_rate = tf.maximum(self.min_learning_rate,
self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
name="learning_rate")
self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
name="learning_rate")
with tf.GradientTape() as tape:
value = self.model(prev_state.reshape(1,-1))
grads = tape.gradient(value, self.model.variables)
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
with tf.variable_scope('apply_gradients'):
for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad
train_var.assign_add(backprop_calc)
for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad
train_var.assign_add(backprop_calc)
@ -144,8 +148,9 @@ class Network:
:param episode_count:
:return:
"""
tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
#self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
print("[NETWK] ({name}) Saving model to:".format(name=self.name),
os.path.join(self.checkpoint_path, 'model.ckpt'))
@ -165,16 +170,14 @@ class Network:
:param states: A number of states. The states have to be transformed before being given to this function.
:return:
"""
values = self.model.predict_on_batch(states)
return values
return self.model.predict_on_batch(states)
def restore_model(self):
"""
Restore a model for a session, such that a trained model and either be further trained or
used for evaluation
:param sess: Current session
:return: Nothing. It's a side-effect that a model gets restored for the network.
"""
@ -186,9 +189,6 @@ class Network:
str(latest_checkpoint))
tfe.Saver(self.model.variables).restore(latest_checkpoint)
# variables_names = [v.name for v in self.model.variables]
# Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path):
@ -211,7 +211,6 @@ class Network:
and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
:param sess:
:param board: Current board
:param roll: Current roll
:param player: Current player
@ -221,13 +220,12 @@ class Network:
legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
scores = self.model.predict_on_batch(legal_states)
transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores]
best_score_idx = np.argmax(np.array(transformed_scores))
best_move = legal_moves[best_score_idx]
best_score = scores[best_score_idx]
best_score_idx = self.max_or_min[player](scores)
return [best_move, best_score]
best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
return (best_move, best_score)
def make_move_1_ply(self, board, roll, player):
"""
@ -237,9 +235,9 @@ class Network:
:param player:
:return:
"""
# start = time.time()
start = time.time()
best_pair = self.calculate_1_ply(board, roll, player)
# print(time.time() - start)
#print(time.time() - start)
return best_pair
@ -248,35 +246,31 @@ class Network:
Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
all moves and scores are found for them. The expected score is then calculated for each of the boards from the
0-ply.
:param sess:
:param board:
:param roll: The original roll
:param player: The current player
:return: Best possible move based on 1-ply look-ahead
"""
# find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll)
legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
scores = self.calc_vals(legal_states)
scores = [score.numpy() for score in scores]
scores = [ score.numpy()
for score
in self.calc_vals(legal_states) ]
moves_and_scores = list(zip(init_legal_states, scores))
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
scores = self.do_ply(best_boards, player)
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
best_score_idx = self.max_or_min[player](scores)
# best_score_idx = np.array(trans_scores).argmax()
scores, trans_scores = self.do_ply(best_boards, player)
best_score_idx = np.array(trans_scores).argmax()
return [best_boards[best_score_idx], scores[best_score_idx]]
return (best_boards[best_score_idx], scores[best_score_idx])
def do_ply(self, boards, player):
"""
@ -285,7 +279,6 @@ class Network:
allowing the function to search deeper, which could result in an even larger search space. If we wish
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
:param sess:
:param boards: The boards to try all rolls on
:param player: The player of the previous ply
:return: An array of scores where each index describes one of the boards which was given as param
@ -305,11 +298,11 @@ class Network:
length_list = []
test_list = []
# Prepping of data
start= time.time()
# start = time.time()
for board in boards:
length = 0
for roll in all_rolls:
all_states = list(Board.calculate_legal_states(board, player*-1, roll))
all_states = Board.calculate_legal_states(board, player*-1, roll)
for state in all_states:
state = np.array(self.board_trans_func(state, player*-1)[0])
test_list.append(state)
@ -320,146 +313,19 @@ class Network:
start = time.time()
all_scores_legit = self.model.predict_on_batch(np.array(test_list))
all_scores = self.model.predict_on_batch(np.array(test_list))
split_scores = []
from_idx = 0
for length in length_list:
split_scores.append(all_scores_legit[from_idx:from_idx+length])
split_scores.append(all_scores[from_idx:from_idx+length])
from_idx += length
means_splits = [tf.reduce_mean(scores) for scores in split_scores]
transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits]
# print(time.time() - start)
return ([means_splits, transformed_means_splits])
def calc_n_ply(self, n_init, sess, board, player, roll):
"""
:param n_init:
:param sess:
:param board:
:param player:
:param roll:
:return:
"""
# find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll)
# find all values for the above boards
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
return best_move_score_pair
def n_ply(self, n_init, sess, boards_init, player_init):
"""
:param n_init:
:param sess:
:param boards_init:
:param player_init:
:return:
"""
def ply(n, boards, player):
def calculate_possible_states(board):
possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
(1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
(2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
(4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
(6, 6) ]
# for roll in possible_rolls:
# print(len(Board.calculate_legal_states(board, player, roll)))
return [ Board.calculate_legal_states(board, player, roll)
for roll
in possible_rolls ]
def find_best_state_score(boards):
score_pairs = [ (board, self.eval_state(sess, self.board_trans_func(board, player)))
for board
in boards ]
scores = [ pair[1]
for pair
in score_pairs ]
best_score_pair = score_pairs[np.array(scores).argmax()]
return best_score_pair
def average_score(boards):
return sum(boards)/len(boards)
def average_ply_score(board):
states_for_rolls = calculate_possible_states(board)
best_state_score_for_each_roll = [
find_best_state_score(states)
for states
in states_for_rolls ]
best_score_for_each_roll = [ x[1]
for x
in best_state_score_for_each_roll ]
average_score_var = average_score(best_score_for_each_roll)
return average_score_var
if n == 1:
average_score_pairs = [ (board, average_ply_score(board))
for board
in boards ]
return average_score_pairs
elif n > 1: # n != 1
def average_for_score_pairs(score_pairs):
scores = [ pair[1]
for pair
in score_pairs ]
return sum(scores)/len(scores)
def average_plain(scores):
return sum(scores)/len(scores)
print("+"*20)
print(n)
print(type(boards))
print(boards)
possible_states_for_boards = [
(board, calculate_possible_states(board))
for board
in boards ]
average_score_pairs = [
(inner_boards[0], average_plain([ average_for_score_pairs(ply(n - 1, inner_board, player * -1 if n == 1 else player))
for inner_board
in inner_boards[1] ]))
for inner_boards
in possible_states_for_boards ]
return average_score_pairs
else:
assert False
if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
boards_with_scores = ply(n_init, boards_init, -1 * player_init)
#print("Boards with scores:",boards_with_scores)
scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
for pair
in boards_with_scores ]
#print("All the scores:",scores)
best_score_pair = boards_with_scores[np.array(scores).argmax()]
return best_score_pair
# print("/"*50)
return means_splits
def eval(self, episode_count, trained_eps = 0):
@ -477,7 +343,6 @@ class Network:
"""
Do the actual evaluation
:param sess:
:param method: Either pubeval or dumbeval
:param episodes: Amount of episodes to use in the evaluation
:param trained_eps:
@ -501,7 +366,6 @@ class Network:
sys.stderr.write(
"[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
if method == 'pubeval':
outcomes = []
for i in range(1, episodes + 1):
@ -509,11 +373,9 @@ class Network:
board = Board.initial_state
while Board.outcome(board) is None:
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = (self.make_move(board, roll, 1))[0]
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -532,11 +394,9 @@ class Network:
board = Board.initial_state
while Board.outcome(board) is None:
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = (self.make_move(board, roll, 1))[0]
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -596,10 +456,8 @@ class Network:
:return:
"""
difference_in_vals = 0
self.restore_model()
average_diffs = 0
start_time = time.time()
def print_time_estimate(eps_completed):
@ -619,28 +477,27 @@ class Network:
for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
# TODO decide which player should be here
player = 1
# player = 1
player = random.choice([-1,1])
prev_board = Board.initial_state
i = 0
difference_in_values = 0
while Board.outcome(prev_board) is None:
i += 1
self.global_step += 1
cur_board, cur_board_value = self.make_move(prev_board,
(random.randrange(1, 7), random.randrange(1, 7)),
player)
difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
if self.config['verbose']:
print("Difference in values:", difference_in_vals)
print("Current board value :", cur_board_value)
print("Current board is :\n",cur_board)
# adjust weights
if Board.outcome(cur_board) is None:
self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
@ -654,6 +511,10 @@ class Network:
final_score = np.array([Board.outcome(final_board)[1]])
scaled_final_score = ((final_score + 2) / 4)
difference_in_values += abs(scaled_final_score-cur_board_value)
average_diffs += (difference_in_values[0][0] / (i+1))
self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
sys.stderr.write("\n")
@ -666,8 +527,9 @@ class Network:
print_time_estimate(episode)
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
self.save_model(episode+trained_eps)
return outcomes, difference_in_vals[0][0]
return outcomes, average_diffs/len(outcomes)

View File

@ -57,4 +57,11 @@ boards = {initial_state,
# print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
network.play_against_network()
diff = [0, 0]
val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
print(val)
diff[0] += abs(-1-val)
diff[1] += 1
print(diff[1])

View File

@ -20,21 +20,22 @@ class Player:
sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
total += r
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
print(sets)
return sets
def tmp_name(self, from_board, to_board, roll, player, total_moves):
def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
sets = self.calc_move_sets(from_board, roll, player)
return_board = from_board
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
print(to_board)
print(board_set)
# print(to_board)
# print(board_set)
if to_board in board_set[0]:
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < 2:
if idx < (4 if is_quad else 2):
roll[idx] = 0
else:
roll = [0,0]
@ -43,8 +44,11 @@ class Player:
return total_moves, roll, return_board
def make_human_move(self, board, roll):
total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
move = ""
is_quad = roll[0] == roll[1]
total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
if is_quad:
roll = [roll[0]]*4
while total_moves != 0:
while True:
print("You have {roll} left!".format(roll=total_moves))
@ -60,6 +64,6 @@ class Player:
print("The correct syntax is: 2/5 for a move from index 2 to 5.")
to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
print(Board.pretty(board))
return board

17
test.py
View File

@ -737,6 +737,23 @@ class TestBoardFlip(unittest.TestCase):
self.assertTrue((Board.board_features_tesauro(board, 1) ==
np.array(expected).reshape(1, 198)).all())
def test_pubeval_features(self):
board = Board.initial_state
expected = (0,
2, 0, 0, 0, 0, -5,
0, -3, 0, 0, 0, 5,
-5, 0, 0, 0, 3, 0,
5, 0, 0, 0, 0, -2,
0,
0, 0)
import numpy as np
self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
np.array(expected).reshape(1, 28)).all())
self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
np.array(expected).reshape(1, 28)).all())
def test_tesauro_bars(self):
board = list(Board.initial_state)
board[1] = 0