Compare commits
27 Commits
eager_eval
...
master
Author | SHA1 | Date | |
---|---|---|---|
ea4efc5a2b | |||
26c0b469eb | |||
f170bad9b1 | |||
6e061171da | |||
40c228ef01 | |||
c2c6c89e9f | |||
b7708b3675 | |||
bad870c27a | |||
653d6e30a8 | |||
7e51b44e33 | |||
1fd6c35baa | |||
d426c1c3b5 | |||
5ab144cffc | |||
cef8e54709 | |||
2efbc446f2 | |||
c54f7aca24 | |||
c31bc39780 | |||
6133cb439f | |||
5acd79b6da | |||
|
b11e783b30 | ||
f834b10e02 | |||
72f01a2a2d | |||
d14e6c5994 | |||
a266293ecd | |||
e9a46c79df | |||
816cdfae00 | |||
ff9664eb38 |
141
app.py
Normal file
141
app.py
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
from flask_json import FlaskJSON, as_json_p
|
||||||
|
from flask_cors import CORS
|
||||||
|
from board import Board
|
||||||
|
from eval import Eval
|
||||||
|
import main
|
||||||
|
import random
|
||||||
|
from network import Network
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
app.config['JSON_ADD_STATUS'] = False
|
||||||
|
app.config['JSON_JSONP_OPTIONAL'] = False
|
||||||
|
|
||||||
|
json = FlaskJSON(app)
|
||||||
|
CORS(app)
|
||||||
|
|
||||||
|
config = main.config.copy()
|
||||||
|
config['model'] = "player_testings"
|
||||||
|
config['ply'] = "0"
|
||||||
|
config['board_representation'] = 'tesauro'
|
||||||
|
network = Network(config, config['model'])
|
||||||
|
|
||||||
|
network.restore_model()
|
||||||
|
|
||||||
|
|
||||||
|
def calc_move_sets(from_board, roll, player):
|
||||||
|
board = from_board
|
||||||
|
sets = []
|
||||||
|
total = 0
|
||||||
|
for r in roll:
|
||||||
|
# print("Value of r:", r)
|
||||||
|
sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
|
||||||
|
total += r
|
||||||
|
sets.append([Board.calculate_legal_states(board, player, roll), total])
|
||||||
|
return sets
|
||||||
|
|
||||||
|
|
||||||
|
def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
|
||||||
|
sets = calc_move_sets(from_board, roll, player)
|
||||||
|
return_board = from_board
|
||||||
|
print("To board:\n",to_board)
|
||||||
|
print("All sets:\n",sets)
|
||||||
|
for idx, board_set in enumerate(sets):
|
||||||
|
board_set[0] = list(board_set[0])
|
||||||
|
# print(to_board)
|
||||||
|
# print(board_set)
|
||||||
|
if to_board in board_set[0]:
|
||||||
|
# print("To board:", to_board)
|
||||||
|
# print(board_set[0])
|
||||||
|
# print(board_set[1])
|
||||||
|
total_moves -= board_set[1]
|
||||||
|
# if it's not the sum of the moves
|
||||||
|
if idx < (4 if is_quad else 2):
|
||||||
|
roll[idx] = 0
|
||||||
|
else:
|
||||||
|
roll = [0, 0]
|
||||||
|
return_board = to_board
|
||||||
|
break
|
||||||
|
|
||||||
|
# print("Return board!:\n",return_board)
|
||||||
|
return total_moves, roll, return_board
|
||||||
|
|
||||||
|
def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
|
||||||
|
|
||||||
|
total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
|
||||||
|
return board, total_moves, roll
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/get_board', methods=['GET'])
|
||||||
|
@as_json_p
|
||||||
|
def get_board():
|
||||||
|
return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def check_move(prev, curr):
|
||||||
|
|
||||||
|
# TODO: Decide on player system and implement roll properly
|
||||||
|
legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
|
||||||
|
|
||||||
|
truth_list = [list(curr) == list(ele) for ele in legal_states]
|
||||||
|
|
||||||
|
return any(truth_list)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/bot_move', methods=['POST'])
|
||||||
|
def bot_move():
|
||||||
|
data = request.get_json(force=True)
|
||||||
|
|
||||||
|
board = [int(x) for x in data['board'].split(',')]
|
||||||
|
use_pubeval = bool(data['pubeval'])
|
||||||
|
|
||||||
|
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||||
|
|
||||||
|
if use_pubeval:
|
||||||
|
board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
|
||||||
|
else:
|
||||||
|
board, _ = network.make_move(tuple(board), roll, 1)
|
||||||
|
|
||||||
|
# print("Board!:",board)
|
||||||
|
|
||||||
|
return ",".join([str(x) for x in list(board)])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/post_board', methods=['POST'])
|
||||||
|
def post_board():
|
||||||
|
data = request.get_json(force=True)
|
||||||
|
|
||||||
|
# TODO: Fix hardcoded player
|
||||||
|
player = -1
|
||||||
|
|
||||||
|
board = [int(x) for x in data['board'].split(',')]
|
||||||
|
prev_board = [int(x) for x in data['prevBoard'].split(',')]
|
||||||
|
print(data['roll'])
|
||||||
|
roll = [int(x) for x in data['roll'].split(',')]
|
||||||
|
print(roll)
|
||||||
|
quad = data['quad'] == "true"
|
||||||
|
|
||||||
|
|
||||||
|
# print(board)
|
||||||
|
|
||||||
|
total_roll = int(data['totalRoll'])
|
||||||
|
print("total roll is:", total_roll)
|
||||||
|
return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
|
||||||
|
|
||||||
|
str_board = ",".join([str(x) for x in return_board])
|
||||||
|
str_roll = ",".join([str(x) for x in roll])
|
||||||
|
|
||||||
|
|
||||||
|
return_string = str_board + "#" + str(total_moves) + "#" + str_roll
|
||||||
|
|
||||||
|
print(return_string)
|
||||||
|
|
||||||
|
return return_string
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(host = '0.0.0.0', port=35270)
|
78
bin/0-ply-tests.rb
Normal file
78
bin/0-ply-tests.rb
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
def run_stuff(board_rep, model_name, ply)
|
||||||
|
epi_count = 0
|
||||||
|
system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
|
||||||
|
while epi_count < 200000 do
|
||||||
|
system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
|
||||||
|
system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
|
||||||
|
system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
|
||||||
|
epi_count += 2000
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
### ///////////////////////////////////////////////////////////////
|
||||||
|
# QUACK TESTINGS
|
||||||
|
### ///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
board_rep = "quack"
|
||||||
|
model_name = "quack_test_0_ply"
|
||||||
|
ply = 0
|
||||||
|
|
||||||
|
run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
|
|
||||||
|
# board_rep = "quack"
|
||||||
|
# model_name = "quack_test_1_ply"
|
||||||
|
# ply = 1
|
||||||
|
|
||||||
|
# run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
|
### ///////////////////////////////////////////////////////////////
|
||||||
|
# QUACK-FAT TESTING
|
||||||
|
### ///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
board_rep = "quack-fat"
|
||||||
|
model_name = "quack-fat_test_0_ply"
|
||||||
|
ply = 0
|
||||||
|
|
||||||
|
run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
|
# board_rep = "quack-fat"
|
||||||
|
# model_name = "quack-fat_test_1_ply"
|
||||||
|
# ply = 1
|
||||||
|
|
||||||
|
# run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
|
### ///////////////////////////////////////////////////////////////
|
||||||
|
# QUACK-NORM TESTING
|
||||||
|
### ///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
board_rep = "quack-norm"
|
||||||
|
model_name = "quack-norm_test_0_ply"
|
||||||
|
ply = 0
|
||||||
|
|
||||||
|
run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
|
# board_rep = "quack-norm"
|
||||||
|
# model_name = "quack-norm_test_1_ply"
|
||||||
|
# ply = 1
|
||||||
|
|
||||||
|
# run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
|
### ///////////////////////////////////////////////////////////////
|
||||||
|
# TESAURO TESTING
|
||||||
|
### ///////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
|
board_rep = "tesauro"
|
||||||
|
model_name = "tesauro_test_0_ply"
|
||||||
|
ply = 0
|
||||||
|
|
||||||
|
run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
|
# board_rep = "tesauro"
|
||||||
|
# model_name = "tesauro_test_1_ply"
|
||||||
|
# ply = 1
|
||||||
|
|
||||||
|
# run_stuff(board_rep, model_name, ply)
|
|
@ -1,30 +1,30 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
|
MODELS_DIR = 'models'
|
||||||
|
|
||||||
def save(model_name)
|
def save(model_name)
|
||||||
require 'date'
|
require 'date'
|
||||||
|
|
||||||
models_dir = 'models'
|
model_path = File.join(MODELS_DIR, model_name)
|
||||||
model_path = File.join(models_dir, model_name)
|
|
||||||
if not File.exists? model_path then
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
|
|
||||||
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
|
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
|
||||||
|
|
||||||
puts "Found model #{model_name} with episodes #{episode_count} trained!"
|
puts "Found model #{model_name} with episodes #{episode_count} trained!"
|
||||||
|
|
||||||
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
|
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
|
||||||
save_path = File.join(models_dir, 'saves', file_name)
|
save_path = File.join(MODELS_DIR, 'saves', file_name)
|
||||||
puts "Saving to #{save_path}"
|
puts "Saving to #{save_path}"
|
||||||
|
|
||||||
system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
|
system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
|
||||||
|
|
||||||
return true
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def train(model, episodes)
|
def train(model, episodes)
|
||||||
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
|
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def force_train(model, episodes)
|
||||||
|
system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
|
||||||
|
end
|
||||||
|
|
||||||
def evaluate(model, episodes, method)
|
def evaluate(model, episodes, method)
|
||||||
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
|
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
|
||||||
end
|
end
|
||||||
|
@ -33,11 +33,9 @@ model = ARGV[0]
|
||||||
|
|
||||||
if model.nil? then raise "no model specified" end
|
if model.nil? then raise "no model specified" end
|
||||||
|
|
||||||
while true do
|
if not File.exists? File.join(MODELS_DIR, model) then
|
||||||
|
force_train model, 10
|
||||||
save model
|
save model
|
||||||
train model, 1000
|
|
||||||
save model
|
|
||||||
train model, 1000
|
|
||||||
3.times do
|
3.times do
|
||||||
evaluate model, 250, "pubeval"
|
evaluate model, 250, "pubeval"
|
||||||
end
|
end
|
||||||
|
@ -45,3 +43,27 @@ while true do
|
||||||
evaluate model, 250, "dumbeval"
|
evaluate model, 250, "dumbeval"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# while true do
|
||||||
|
# save model
|
||||||
|
# train model, 1000
|
||||||
|
# save model
|
||||||
|
# train model, 1000
|
||||||
|
# 3.times do
|
||||||
|
# evaluate model, 250, "pubeval"
|
||||||
|
# end
|
||||||
|
# 3.times do
|
||||||
|
# evaluate model, 250, "dumbeval"
|
||||||
|
# end
|
||||||
|
# end
|
||||||
|
|
||||||
|
while true do
|
||||||
|
save model
|
||||||
|
train model, 500
|
||||||
|
5.times do
|
||||||
|
evaluate model, 250, "pubeval"
|
||||||
|
end
|
||||||
|
5.times do
|
||||||
|
evaluate model, 250, "dumbeval"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
77
board.py
77
board.py
|
@ -16,8 +16,6 @@ class Board:
|
||||||
return quack.idxs_with_checkers_of_player(board, player)
|
return quack.idxs_with_checkers_of_player(board, player)
|
||||||
|
|
||||||
|
|
||||||
# TODO: Write a test for this
|
|
||||||
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
|
|
||||||
# index 26 is player 1 home, index 27 is player -1 home
|
# index 26 is player 1 home, index 27 is player -1 home
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def board_features_to_pubeval(board, player):
|
def board_features_to_pubeval(board, player):
|
||||||
|
@ -51,7 +49,6 @@ class Board:
|
||||||
# board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
# board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
||||||
# return np.array(board).reshape(1,30)
|
# return np.array(board).reshape(1,30)
|
||||||
|
|
||||||
|
|
||||||
# quack-fatter
|
# quack-fatter
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def board_features_quack_norm(board, player):
|
def board_features_quack_norm(board, player):
|
||||||
|
@ -66,7 +63,7 @@ class Board:
|
||||||
board.append(15 - sum(positives))
|
board.append(15 - sum(positives))
|
||||||
board.append(-15 - sum(negatives))
|
board.append(-15 - sum(negatives))
|
||||||
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
||||||
return np.array(board).reshape(1,30)
|
return np.array(board).reshape(1, 30)
|
||||||
|
|
||||||
# tesauro
|
# tesauro
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -95,9 +92,62 @@ class Board:
|
||||||
board_rep += bar_trans(board, player)
|
board_rep += bar_trans(board, player)
|
||||||
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
|
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
|
||||||
|
|
||||||
board_rep += ([1,0] if cur_player == 1 else [1,0])
|
board_rep += ([1, 0] if cur_player == 1 else [0, 1])
|
||||||
|
|
||||||
return np.array(board_rep).reshape(1,198)
|
return np.array(board_rep).reshape(1, 198)
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def board_features_tesauro_fat(board, cur_player):
|
||||||
|
def ordinary_trans(val, player):
|
||||||
|
abs_val = val*player
|
||||||
|
if abs_val <= 0:
|
||||||
|
return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 1:
|
||||||
|
return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 2:
|
||||||
|
return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 3:
|
||||||
|
return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 4:
|
||||||
|
return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 5:
|
||||||
|
return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 6:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 7:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 8:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 9:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 10:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 11:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
|
||||||
|
elif abs_val == 12:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
|
||||||
|
elif abs_val == 13:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
|
||||||
|
elif abs_val == 14:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
|
||||||
|
elif abs_val == 15:
|
||||||
|
return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
|
||||||
|
|
||||||
|
def bar_trans(board, player):
|
||||||
|
if player == 1: return (abs(board[0]/2),)
|
||||||
|
elif player == -1: return (abs(board[25]/2),)
|
||||||
|
|
||||||
|
board_rep = []
|
||||||
|
for player in [1, -1]:
|
||||||
|
for x in board[1:25]:
|
||||||
|
board_rep += ordinary_trans(x, player)
|
||||||
|
board_rep += bar_trans(board, player)
|
||||||
|
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
|
||||||
|
|
||||||
|
board_rep += ([1, 0] if cur_player == 1 else [0, 1])
|
||||||
|
|
||||||
|
return np.array(board_rep).reshape(1, len(board_rep))
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -197,9 +247,6 @@ class Board:
|
||||||
# Find all points with checkers on them belonging to the player
|
# Find all points with checkers on them belonging to the player
|
||||||
# Iterate through each index and check if it's a possible move given the roll
|
# Iterate through each index and check if it's a possible move given the roll
|
||||||
|
|
||||||
# TODO: make sure that it is not possible to do nothing on first part of
|
|
||||||
# turn and then do something with the second die
|
|
||||||
|
|
||||||
def calc_moves(board, face_value):
|
def calc_moves(board, face_value):
|
||||||
if face_value == 0:
|
if face_value == 0:
|
||||||
return [board]
|
return [board]
|
||||||
|
@ -221,23 +268,13 @@ class Board:
|
||||||
# print("Dice permuts:",dice_permutations)
|
# print("Dice permuts:",dice_permutations)
|
||||||
for roll in dice_permutations:
|
for roll in dice_permutations:
|
||||||
# Calculate boards resulting from first move
|
# Calculate boards resulting from first move
|
||||||
#print("initial board: ", board)
|
|
||||||
#print("roll:", roll)
|
|
||||||
#print("Rest of roll:",roll[1:])
|
|
||||||
boards = calc_moves(board, roll[0])
|
boards = calc_moves(board, roll[0])
|
||||||
#print("Boards:",boards)
|
|
||||||
#print("Roll:",roll[0])
|
|
||||||
#print("boards after first die: ", boards)
|
|
||||||
|
|
||||||
for die in roll[1:]:
|
for die in roll[1:]:
|
||||||
# Calculate boards resulting from second move
|
# Calculate boards resulting from second move
|
||||||
nested_boards = [calc_moves(board, die) for board in boards]
|
nested_boards = [calc_moves(board, die) for board in boards]
|
||||||
#print("nested boards: ", nested_boards)
|
|
||||||
boards = [board for boards in nested_boards for board in boards]
|
boards = [board for boards in nested_boards for board in boards]
|
||||||
# What the fuck
|
|
||||||
#for board in boards:
|
|
||||||
# print(board)
|
|
||||||
# print("type__:",type(board))
|
|
||||||
# Add resulting unique boards to set of legal boards resulting from roll
|
# Add resulting unique boards to set of legal boards resulting from roll
|
||||||
|
|
||||||
#print("printing boards from calculate_legal_states: ", boards)
|
#print("printing boards from calculate_legal_states: ", boards)
|
||||||
|
|
30
main.py
30
main.py
|
@ -2,6 +2,7 @@ import argparse
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import subprocess
|
||||||
|
|
||||||
# Parse command line arguments
|
# Parse command line arguments
|
||||||
parser = argparse.ArgumentParser(description="Backgammon games")
|
parser = argparse.ArgumentParser(description="Backgammon games")
|
||||||
|
@ -77,27 +78,20 @@ if not os.path.isdir(model_path()):
|
||||||
if not os.path.isdir(log_path):
|
if not os.path.isdir(log_path):
|
||||||
os.mkdir(log_path)
|
os.mkdir(log_path)
|
||||||
|
|
||||||
|
|
||||||
def save_config():
|
|
||||||
import yaml
|
|
||||||
# checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
|
||||||
# config_path = os.path.join(checkpoint_path, 'config')
|
|
||||||
# with open(config_path, 'a+') as f:
|
|
||||||
# print("lol")
|
|
||||||
print(yaml.dump(config))
|
|
||||||
|
|
||||||
# Define helper functions
|
# Define helper functions
|
||||||
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
|
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
|
||||||
|
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
|
||||||
format_vars = { 'trained_eps': trained_eps,
|
format_vars = { 'trained_eps': trained_eps,
|
||||||
'count': len(outcome),
|
'count': len(outcome),
|
||||||
'sum': sum(outcome),
|
'sum': sum(outcome),
|
||||||
'mean': sum(outcome) / len(outcome),
|
'mean': sum(outcome) / len(outcome),
|
||||||
'time': int(time.time()),
|
'time': int(time.time()),
|
||||||
'average_diff_in_vals': diff_in_values/len(outcome)
|
'average_diff_in_vals': diff_in_values,
|
||||||
|
'commit': commit
|
||||||
}
|
}
|
||||||
|
|
||||||
with open(log_path, 'a+') as f:
|
with open(log_path, 'a+') as f:
|
||||||
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
|
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
|
||||||
|
|
||||||
|
|
||||||
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
|
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
|
||||||
|
@ -108,9 +102,12 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
|
||||||
:param log_path:
|
:param log_path:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
|
||||||
|
|
||||||
for outcome in outcomes:
|
for outcome in outcomes:
|
||||||
scores = outcome[1]
|
scores = outcome[1]
|
||||||
format_vars = { 'trained_eps': trained_eps,
|
format_vars = { 'commit': commit,
|
||||||
|
'trained_eps': trained_eps,
|
||||||
'method': outcome[0],
|
'method': outcome[0],
|
||||||
'count': len(scores),
|
'count': len(scores),
|
||||||
'sum': sum(scores),
|
'sum': sum(scores),
|
||||||
|
@ -118,9 +115,10 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
|
||||||
'time': int(time.time())
|
'time': int(time.time())
|
||||||
}
|
}
|
||||||
with open(log_path, 'a+') as f:
|
with open(log_path, 'a+') as f:
|
||||||
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
|
f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")
|
||||||
|
|
||||||
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
|
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
|
||||||
|
commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
|
||||||
for outcome in outcomes:
|
for outcome in outcomes:
|
||||||
scores = outcome[1]
|
scores = outcome[1]
|
||||||
format_vars = { 'trained_eps': trained_eps,
|
format_vars = { 'trained_eps': trained_eps,
|
||||||
|
@ -130,9 +128,10 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
|
||||||
'mean': sum(scores) / len(scores),
|
'mean': sum(scores) / len(scores),
|
||||||
'time': time,
|
'time': time,
|
||||||
'index': index,
|
'index': index,
|
||||||
|
'commit': commit
|
||||||
}
|
}
|
||||||
with open(log_path, 'a+') as f:
|
with open(log_path, 'a+') as f:
|
||||||
f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
|
f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")
|
||||||
|
|
||||||
def find_board_rep():
|
def find_board_rep():
|
||||||
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
||||||
|
@ -172,7 +171,6 @@ if __name__ == "__main__":
|
||||||
# Set up network
|
# Set up network
|
||||||
from network import Network
|
from network import Network
|
||||||
|
|
||||||
save_config()
|
|
||||||
# Set up variables
|
# Set up variables
|
||||||
episode_count = config['episode_count']
|
episode_count = config['episode_count']
|
||||||
|
|
||||||
|
@ -211,6 +209,8 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
elif args.eval:
|
elif args.eval:
|
||||||
network = Network(config, config['model'])
|
network = Network(config, config['model'])
|
||||||
|
network.restore_model()
|
||||||
|
|
||||||
for i in range(int(config['repeat_eval'])):
|
for i in range(int(config['repeat_eval'])):
|
||||||
start_episode = network.episodes_trained
|
start_episode = network.episodes_trained
|
||||||
# Evaluation measures are described in `config`
|
# Evaluation measures are described in `config`
|
||||||
|
|
238
network.py
238
network.py
|
@ -21,10 +21,10 @@ class Network:
|
||||||
'quack' : (28, Board.board_features_quack),
|
'quack' : (28, Board.board_features_quack),
|
||||||
'tesauro' : (198, Board.board_features_tesauro),
|
'tesauro' : (198, Board.board_features_tesauro),
|
||||||
'quack-norm' : (30, Board.board_features_quack_norm),
|
'quack-norm' : (30, Board.board_features_quack_norm),
|
||||||
|
'tesauro-fat' : (726, Board.board_features_tesauro_fat),
|
||||||
'tesauro-poop': (198, Board.board_features_tesauro_wrong)
|
'tesauro-poop': (198, Board.board_features_tesauro_wrong)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def custom_tanh(self, x, name=None):
|
def custom_tanh(self, x, name=None):
|
||||||
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
|
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
|
||||||
|
|
||||||
|
@ -39,6 +39,11 @@ class Network:
|
||||||
'0': self.make_move_0_ply
|
'0': self.make_move_0_ply
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.max_or_min = {
|
||||||
|
1: np.argmax,
|
||||||
|
-1: np.argmin
|
||||||
|
}
|
||||||
|
|
||||||
tf.enable_eager_execution()
|
tf.enable_eager_execution()
|
||||||
|
|
||||||
xavier_init = tf.contrib.layers.xavier_initializer()
|
xavier_init = tf.contrib.layers.xavier_initializer()
|
||||||
|
@ -93,7 +98,7 @@ class Network:
|
||||||
:param decay_steps: The amount of steps between each decay
|
:param decay_steps: The amount of steps between each decay
|
||||||
:return: The result of the exponential decay performed on the learning rate
|
:return: The result of the exponential decay performed on the learning rate
|
||||||
"""
|
"""
|
||||||
res = max_lr * decay_rate**(global_step // decay_steps)
|
res = max_lr * decay_rate ** (global_step // decay_steps)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
def do_backprop(self, prev_state, value_next):
|
def do_backprop(self, prev_state, value_next):
|
||||||
|
@ -104,20 +109,19 @@ class Network:
|
||||||
:return: Nothing, the calculation is performed on the model of the network
|
:return: Nothing, the calculation is performed on the model of the network
|
||||||
"""
|
"""
|
||||||
self.learning_rate = tf.maximum(self.min_learning_rate,
|
self.learning_rate = tf.maximum(self.min_learning_rate,
|
||||||
self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
|
self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
|
||||||
name="learning_rate")
|
name="learning_rate")
|
||||||
|
|
||||||
with tf.GradientTape() as tape:
|
with tf.GradientTape() as tape:
|
||||||
value = self.model(prev_state.reshape(1,-1))
|
value = self.model(prev_state.reshape(1,-1))
|
||||||
|
|
||||||
grads = tape.gradient(value, self.model.variables)
|
grads = tape.gradient(value, self.model.variables)
|
||||||
|
|
||||||
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
|
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
|
||||||
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
|
|
||||||
|
|
||||||
with tf.variable_scope('apply_gradients'):
|
for grad, train_var in zip(grads, self.model.variables):
|
||||||
for grad, train_var in zip(grads, self.model.variables):
|
backprop_calc = self.learning_rate * difference_in_values * grad
|
||||||
backprop_calc = self.learning_rate * difference_in_values * grad
|
train_var.assign_add(backprop_calc)
|
||||||
train_var.assign_add(backprop_calc)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -144,8 +148,9 @@ class Network:
|
||||||
:param episode_count:
|
:param episode_count:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
|
tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
|
||||||
#self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
|
|
||||||
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
|
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
|
||||||
print("[NETWK] ({name}) Saving model to:".format(name=self.name),
|
print("[NETWK] ({name}) Saving model to:".format(name=self.name),
|
||||||
os.path.join(self.checkpoint_path, 'model.ckpt'))
|
os.path.join(self.checkpoint_path, 'model.ckpt'))
|
||||||
|
@ -165,8 +170,7 @@ class Network:
|
||||||
:param states: A number of states. The states have to be transformed before being given to this function.
|
:param states: A number of states. The states have to be transformed before being given to this function.
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
values = self.model.predict_on_batch(states)
|
return self.model.predict_on_batch(states)
|
||||||
return values
|
|
||||||
|
|
||||||
|
|
||||||
def restore_model(self):
|
def restore_model(self):
|
||||||
|
@ -174,7 +178,6 @@ class Network:
|
||||||
Restore a model for a session, such that a trained model and either be further trained or
|
Restore a model for a session, such that a trained model and either be further trained or
|
||||||
used for evaluation
|
used for evaluation
|
||||||
|
|
||||||
:param sess: Current session
|
|
||||||
:return: Nothing. It's a side-effect that a model gets restored for the network.
|
:return: Nothing. It's a side-effect that a model gets restored for the network.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -186,9 +189,6 @@ class Network:
|
||||||
str(latest_checkpoint))
|
str(latest_checkpoint))
|
||||||
tfe.Saver(self.model.variables).restore(latest_checkpoint)
|
tfe.Saver(self.model.variables).restore(latest_checkpoint)
|
||||||
|
|
||||||
# variables_names = [v.name for v in self.model.variables]
|
|
||||||
|
|
||||||
|
|
||||||
# Restore trained episode count for model
|
# Restore trained episode count for model
|
||||||
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
||||||
if os.path.isfile(episode_count_path):
|
if os.path.isfile(episode_count_path):
|
||||||
|
@ -211,7 +211,6 @@ class Network:
|
||||||
and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
|
and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
|
||||||
The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
|
The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
|
||||||
|
|
||||||
:param sess:
|
|
||||||
:param board: Current board
|
:param board: Current board
|
||||||
:param roll: Current roll
|
:param roll: Current roll
|
||||||
:param player: Current player
|
:param player: Current player
|
||||||
|
@ -221,13 +220,12 @@ class Network:
|
||||||
legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
|
legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
|
||||||
|
|
||||||
scores = self.model.predict_on_batch(legal_states)
|
scores = self.model.predict_on_batch(legal_states)
|
||||||
transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores]
|
|
||||||
|
|
||||||
best_score_idx = np.argmax(np.array(transformed_scores))
|
best_score_idx = self.max_or_min[player](scores)
|
||||||
best_move = legal_moves[best_score_idx]
|
|
||||||
best_score = scores[best_score_idx]
|
|
||||||
|
|
||||||
return [best_move, best_score]
|
best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
|
||||||
|
|
||||||
|
return (best_move, best_score)
|
||||||
|
|
||||||
def make_move_1_ply(self, board, roll, player):
|
def make_move_1_ply(self, board, roll, player):
|
||||||
"""
|
"""
|
||||||
|
@ -237,9 +235,9 @@ class Network:
|
||||||
:param player:
|
:param player:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
# start = time.time()
|
start = time.time()
|
||||||
best_pair = self.calculate_1_ply(board, roll, player)
|
best_pair = self.calculate_1_ply(board, roll, player)
|
||||||
# print(time.time() - start)
|
#print(time.time() - start)
|
||||||
return best_pair
|
return best_pair
|
||||||
|
|
||||||
|
|
||||||
|
@ -248,35 +246,31 @@ class Network:
|
||||||
Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
|
Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
|
||||||
all moves and scores are found for them. The expected score is then calculated for each of the boards from the
|
all moves and scores are found for them. The expected score is then calculated for each of the boards from the
|
||||||
0-ply.
|
0-ply.
|
||||||
:param sess:
|
|
||||||
:param board:
|
:param board:
|
||||||
:param roll: The original roll
|
:param roll: The original roll
|
||||||
:param player: The current player
|
:param player: The current player
|
||||||
:return: Best possible move based on 1-ply look-ahead
|
:return: Best possible move based on 1-ply look-ahead
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# find all legal states from the given board and the given roll
|
# find all legal states from the given board and the given roll
|
||||||
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
||||||
|
|
||||||
legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
|
legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
|
||||||
|
|
||||||
scores = self.calc_vals(legal_states)
|
scores = [ score.numpy()
|
||||||
scores = [score.numpy() for score in scores]
|
for score
|
||||||
|
in self.calc_vals(legal_states) ]
|
||||||
|
|
||||||
moves_and_scores = list(zip(init_legal_states, scores))
|
moves_and_scores = list(zip(init_legal_states, scores))
|
||||||
|
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
|
||||||
|
best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]
|
||||||
|
|
||||||
sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
|
scores = self.do_ply(best_boards, player)
|
||||||
|
|
||||||
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
|
best_score_idx = self.max_or_min[player](scores)
|
||||||
|
# best_score_idx = np.array(trans_scores).argmax()
|
||||||
|
|
||||||
|
return (best_boards[best_score_idx], scores[best_score_idx])
|
||||||
|
|
||||||
scores, trans_scores = self.do_ply(best_boards, player)
|
|
||||||
|
|
||||||
best_score_idx = np.array(trans_scores).argmax()
|
|
||||||
|
|
||||||
return [best_boards[best_score_idx], scores[best_score_idx]]
|
|
||||||
|
|
||||||
def do_ply(self, boards, player):
|
def do_ply(self, boards, player):
|
||||||
"""
|
"""
|
||||||
|
@ -285,7 +279,6 @@ class Network:
|
||||||
allowing the function to search deeper, which could result in an even larger search space. If we wish
|
allowing the function to search deeper, which could result in an even larger search space. If we wish
|
||||||
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
|
to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
|
||||||
|
|
||||||
:param sess:
|
|
||||||
:param boards: The boards to try all rolls on
|
:param boards: The boards to try all rolls on
|
||||||
:param player: The player of the previous ply
|
:param player: The player of the previous ply
|
||||||
:return: An array of scores where each index describes one of the boards which was given as param
|
:return: An array of scores where each index describes one of the boards which was given as param
|
||||||
|
@ -305,11 +298,11 @@ class Network:
|
||||||
length_list = []
|
length_list = []
|
||||||
test_list = []
|
test_list = []
|
||||||
# Prepping of data
|
# Prepping of data
|
||||||
start= time.time()
|
# start = time.time()
|
||||||
for board in boards:
|
for board in boards:
|
||||||
length = 0
|
length = 0
|
||||||
for roll in all_rolls:
|
for roll in all_rolls:
|
||||||
all_states = list(Board.calculate_legal_states(board, player*-1, roll))
|
all_states = Board.calculate_legal_states(board, player*-1, roll)
|
||||||
for state in all_states:
|
for state in all_states:
|
||||||
state = np.array(self.board_trans_func(state, player*-1)[0])
|
state = np.array(self.board_trans_func(state, player*-1)[0])
|
||||||
test_list.append(state)
|
test_list.append(state)
|
||||||
|
@ -320,146 +313,19 @@ class Network:
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
all_scores_legit = self.model.predict_on_batch(np.array(test_list))
|
all_scores = self.model.predict_on_batch(np.array(test_list))
|
||||||
|
|
||||||
split_scores = []
|
split_scores = []
|
||||||
from_idx = 0
|
from_idx = 0
|
||||||
for length in length_list:
|
for length in length_list:
|
||||||
split_scores.append(all_scores_legit[from_idx:from_idx+length])
|
split_scores.append(all_scores[from_idx:from_idx+length])
|
||||||
from_idx += length
|
from_idx += length
|
||||||
|
|
||||||
means_splits = [tf.reduce_mean(scores) for scores in split_scores]
|
means_splits = [tf.reduce_mean(scores) for scores in split_scores]
|
||||||
transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits]
|
|
||||||
# print(time.time() - start)
|
# print(time.time() - start)
|
||||||
|
# print("/"*50)
|
||||||
return ([means_splits, transformed_means_splits])
|
return means_splits
|
||||||
|
|
||||||
|
|
||||||
def calc_n_ply(self, n_init, sess, board, player, roll):
|
|
||||||
"""
|
|
||||||
:param n_init:
|
|
||||||
:param sess:
|
|
||||||
:param board:
|
|
||||||
:param player:
|
|
||||||
:param roll:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
|
|
||||||
# find all legal states from the given board and the given roll
|
|
||||||
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
|
||||||
|
|
||||||
# find all values for the above boards
|
|
||||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
|
||||||
|
|
||||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
|
||||||
sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
|
|
||||||
|
|
||||||
|
|
||||||
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
|
|
||||||
|
|
||||||
best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
|
|
||||||
|
|
||||||
return best_move_score_pair
|
|
||||||
|
|
||||||
|
|
||||||
def n_ply(self, n_init, sess, boards_init, player_init):
|
|
||||||
"""
|
|
||||||
:param n_init:
|
|
||||||
:param sess:
|
|
||||||
:param boards_init:
|
|
||||||
:param player_init:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
def ply(n, boards, player):
|
|
||||||
def calculate_possible_states(board):
|
|
||||||
possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
|
|
||||||
(1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
|
|
||||||
(2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
|
|
||||||
(4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
|
|
||||||
(6, 6) ]
|
|
||||||
|
|
||||||
# for roll in possible_rolls:
|
|
||||||
# print(len(Board.calculate_legal_states(board, player, roll)))
|
|
||||||
|
|
||||||
return [ Board.calculate_legal_states(board, player, roll)
|
|
||||||
for roll
|
|
||||||
in possible_rolls ]
|
|
||||||
|
|
||||||
def find_best_state_score(boards):
|
|
||||||
score_pairs = [ (board, self.eval_state(sess, self.board_trans_func(board, player)))
|
|
||||||
for board
|
|
||||||
in boards ]
|
|
||||||
scores = [ pair[1]
|
|
||||||
for pair
|
|
||||||
in score_pairs ]
|
|
||||||
best_score_pair = score_pairs[np.array(scores).argmax()]
|
|
||||||
|
|
||||||
return best_score_pair
|
|
||||||
|
|
||||||
def average_score(boards):
|
|
||||||
return sum(boards)/len(boards)
|
|
||||||
|
|
||||||
def average_ply_score(board):
|
|
||||||
states_for_rolls = calculate_possible_states(board)
|
|
||||||
|
|
||||||
best_state_score_for_each_roll = [
|
|
||||||
find_best_state_score(states)
|
|
||||||
for states
|
|
||||||
in states_for_rolls ]
|
|
||||||
best_score_for_each_roll = [ x[1]
|
|
||||||
for x
|
|
||||||
in best_state_score_for_each_roll ]
|
|
||||||
|
|
||||||
average_score_var = average_score(best_score_for_each_roll)
|
|
||||||
return average_score_var
|
|
||||||
|
|
||||||
|
|
||||||
if n == 1:
|
|
||||||
average_score_pairs = [ (board, average_ply_score(board))
|
|
||||||
for board
|
|
||||||
in boards ]
|
|
||||||
return average_score_pairs
|
|
||||||
elif n > 1: # n != 1
|
|
||||||
def average_for_score_pairs(score_pairs):
|
|
||||||
scores = [ pair[1]
|
|
||||||
for pair
|
|
||||||
in score_pairs ]
|
|
||||||
return sum(scores)/len(scores)
|
|
||||||
|
|
||||||
def average_plain(scores):
|
|
||||||
return sum(scores)/len(scores)
|
|
||||||
|
|
||||||
print("+"*20)
|
|
||||||
print(n)
|
|
||||||
print(type(boards))
|
|
||||||
print(boards)
|
|
||||||
possible_states_for_boards = [
|
|
||||||
(board, calculate_possible_states(board))
|
|
||||||
for board
|
|
||||||
in boards ]
|
|
||||||
|
|
||||||
average_score_pairs = [
|
|
||||||
(inner_boards[0], average_plain([ average_for_score_pairs(ply(n - 1, inner_board, player * -1 if n == 1 else player))
|
|
||||||
for inner_board
|
|
||||||
in inner_boards[1] ]))
|
|
||||||
for inner_boards
|
|
||||||
in possible_states_for_boards ]
|
|
||||||
|
|
||||||
return average_score_pairs
|
|
||||||
|
|
||||||
else:
|
|
||||||
assert False
|
|
||||||
|
|
||||||
if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
|
|
||||||
|
|
||||||
boards_with_scores = ply(n_init, boards_init, -1 * player_init)
|
|
||||||
#print("Boards with scores:",boards_with_scores)
|
|
||||||
scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
|
|
||||||
for pair
|
|
||||||
in boards_with_scores ]
|
|
||||||
#print("All the scores:",scores)
|
|
||||||
best_score_pair = boards_with_scores[np.array(scores).argmax()]
|
|
||||||
return best_score_pair
|
|
||||||
|
|
||||||
|
|
||||||
def eval(self, episode_count, trained_eps = 0):
|
def eval(self, episode_count, trained_eps = 0):
|
||||||
|
@ -477,7 +343,6 @@ class Network:
|
||||||
"""
|
"""
|
||||||
Do the actual evaluation
|
Do the actual evaluation
|
||||||
|
|
||||||
:param sess:
|
|
||||||
:param method: Either pubeval or dumbeval
|
:param method: Either pubeval or dumbeval
|
||||||
:param episodes: Amount of episodes to use in the evaluation
|
:param episodes: Amount of episodes to use in the evaluation
|
||||||
:param trained_eps:
|
:param trained_eps:
|
||||||
|
@ -501,7 +366,6 @@ class Network:
|
||||||
sys.stderr.write(
|
sys.stderr.write(
|
||||||
"[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
|
"[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
|
||||||
|
|
||||||
|
|
||||||
if method == 'pubeval':
|
if method == 'pubeval':
|
||||||
outcomes = []
|
outcomes = []
|
||||||
for i in range(1, episodes + 1):
|
for i in range(1, episodes + 1):
|
||||||
|
@ -509,11 +373,9 @@ class Network:
|
||||||
board = Board.initial_state
|
board = Board.initial_state
|
||||||
while Board.outcome(board) is None:
|
while Board.outcome(board) is None:
|
||||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||||
|
|
||||||
board = (self.make_move(board, roll, 1))[0]
|
board = (self.make_move(board, roll, 1))[0]
|
||||||
|
|
||||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||||
|
|
||||||
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
|
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
|
||||||
|
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||||
|
@ -532,11 +394,9 @@ class Network:
|
||||||
board = Board.initial_state
|
board = Board.initial_state
|
||||||
while Board.outcome(board) is None:
|
while Board.outcome(board) is None:
|
||||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||||
|
|
||||||
board = (self.make_move(board, roll, 1))[0]
|
board = (self.make_move(board, roll, 1))[0]
|
||||||
|
|
||||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||||
|
|
||||||
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
|
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
|
||||||
|
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||||
|
@ -596,10 +456,8 @@ class Network:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
difference_in_vals = 0
|
|
||||||
|
|
||||||
self.restore_model()
|
self.restore_model()
|
||||||
|
average_diffs = 0
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
def print_time_estimate(eps_completed):
|
def print_time_estimate(eps_completed):
|
||||||
|
@ -619,28 +477,27 @@ class Network:
|
||||||
for episode in range(1, episodes + 1):
|
for episode in range(1, episodes + 1):
|
||||||
|
|
||||||
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
|
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
|
||||||
# TODO decide which player should be here
|
|
||||||
|
|
||||||
player = 1
|
# player = 1
|
||||||
|
player = random.choice([-1,1])
|
||||||
prev_board = Board.initial_state
|
prev_board = Board.initial_state
|
||||||
i = 0
|
i = 0
|
||||||
|
difference_in_values = 0
|
||||||
while Board.outcome(prev_board) is None:
|
while Board.outcome(prev_board) is None:
|
||||||
i += 1
|
i += 1
|
||||||
self.global_step += 1
|
self.global_step += 1
|
||||||
|
|
||||||
|
|
||||||
cur_board, cur_board_value = self.make_move(prev_board,
|
cur_board, cur_board_value = self.make_move(prev_board,
|
||||||
(random.randrange(1, 7), random.randrange(1, 7)),
|
(random.randrange(1, 7), random.randrange(1, 7)),
|
||||||
player)
|
player)
|
||||||
|
|
||||||
difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
|
difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
|
||||||
|
|
||||||
if self.config['verbose']:
|
if self.config['verbose']:
|
||||||
print("Difference in values:", difference_in_vals)
|
print("Difference in values:", difference_in_vals)
|
||||||
print("Current board value :", cur_board_value)
|
print("Current board value :", cur_board_value)
|
||||||
print("Current board is :\n",cur_board)
|
print("Current board is :\n",cur_board)
|
||||||
|
|
||||||
|
|
||||||
# adjust weights
|
# adjust weights
|
||||||
if Board.outcome(cur_board) is None:
|
if Board.outcome(cur_board) is None:
|
||||||
self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
|
self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
|
||||||
|
@ -654,6 +511,10 @@ class Network:
|
||||||
final_score = np.array([Board.outcome(final_board)[1]])
|
final_score = np.array([Board.outcome(final_board)[1]])
|
||||||
scaled_final_score = ((final_score + 2) / 4)
|
scaled_final_score = ((final_score + 2) / 4)
|
||||||
|
|
||||||
|
difference_in_values += abs(scaled_final_score-cur_board_value)
|
||||||
|
|
||||||
|
average_diffs += (difference_in_values[0][0] / (i+1))
|
||||||
|
|
||||||
self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
|
self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
|
||||||
|
|
||||||
sys.stderr.write("\n")
|
sys.stderr.write("\n")
|
||||||
|
@ -666,8 +527,9 @@ class Network:
|
||||||
print_time_estimate(episode)
|
print_time_estimate(episode)
|
||||||
|
|
||||||
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
|
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
|
||||||
|
|
||||||
self.save_model(episode+trained_eps)
|
self.save_model(episode+trained_eps)
|
||||||
|
|
||||||
return outcomes, difference_in_vals[0][0]
|
return outcomes, average_diffs/len(outcomes)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -57,4 +57,11 @@ boards = {initial_state,
|
||||||
|
|
||||||
# print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
|
# print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
|
||||||
|
|
||||||
network.play_against_network()
|
|
||||||
|
diff = [0, 0]
|
||||||
|
val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
|
||||||
|
print(val)
|
||||||
|
diff[0] += abs(-1-val)
|
||||||
|
diff[1] += 1
|
||||||
|
|
||||||
|
print(diff[1])
|
18
player.py
18
player.py
|
@ -20,21 +20,22 @@ class Player:
|
||||||
sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
|
sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
|
||||||
total += r
|
total += r
|
||||||
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
|
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
|
||||||
|
print(sets)
|
||||||
return sets
|
return sets
|
||||||
|
|
||||||
|
|
||||||
def tmp_name(self, from_board, to_board, roll, player, total_moves):
|
def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
|
||||||
sets = self.calc_move_sets(from_board, roll, player)
|
sets = self.calc_move_sets(from_board, roll, player)
|
||||||
return_board = from_board
|
return_board = from_board
|
||||||
for idx, board_set in enumerate(sets):
|
for idx, board_set in enumerate(sets):
|
||||||
|
|
||||||
board_set[0] = list(board_set[0])
|
board_set[0] = list(board_set[0])
|
||||||
print(to_board)
|
# print(to_board)
|
||||||
print(board_set)
|
# print(board_set)
|
||||||
if to_board in board_set[0]:
|
if to_board in board_set[0]:
|
||||||
total_moves -= board_set[1]
|
total_moves -= board_set[1]
|
||||||
# if it's not the sum of the moves
|
# if it's not the sum of the moves
|
||||||
if idx < 2:
|
if idx < (4 if is_quad else 2):
|
||||||
roll[idx] = 0
|
roll[idx] = 0
|
||||||
else:
|
else:
|
||||||
roll = [0,0]
|
roll = [0,0]
|
||||||
|
@ -43,8 +44,11 @@ class Player:
|
||||||
return total_moves, roll, return_board
|
return total_moves, roll, return_board
|
||||||
|
|
||||||
def make_human_move(self, board, roll):
|
def make_human_move(self, board, roll):
|
||||||
total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
|
is_quad = roll[0] == roll[1]
|
||||||
move = ""
|
total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
|
||||||
|
if is_quad:
|
||||||
|
roll = [roll[0]]*4
|
||||||
|
|
||||||
while total_moves != 0:
|
while total_moves != 0:
|
||||||
while True:
|
while True:
|
||||||
print("You have {roll} left!".format(roll=total_moves))
|
print("You have {roll} left!".format(roll=total_moves))
|
||||||
|
@ -60,6 +64,6 @@ class Player:
|
||||||
print("The correct syntax is: 2/5 for a move from index 2 to 5.")
|
print("The correct syntax is: 2/5 for a move from index 2 to 5.")
|
||||||
|
|
||||||
to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
|
to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
|
||||||
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
|
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
|
||||||
print(Board.pretty(board))
|
print(Board.pretty(board))
|
||||||
return board
|
return board
|
17
test.py
17
test.py
|
@ -737,6 +737,23 @@ class TestBoardFlip(unittest.TestCase):
|
||||||
self.assertTrue((Board.board_features_tesauro(board, 1) ==
|
self.assertTrue((Board.board_features_tesauro(board, 1) ==
|
||||||
np.array(expected).reshape(1, 198)).all())
|
np.array(expected).reshape(1, 198)).all())
|
||||||
|
|
||||||
|
def test_pubeval_features(self):
|
||||||
|
board = Board.initial_state
|
||||||
|
|
||||||
|
expected = (0,
|
||||||
|
2, 0, 0, 0, 0, -5,
|
||||||
|
0, -3, 0, 0, 0, 5,
|
||||||
|
-5, 0, 0, 0, 3, 0,
|
||||||
|
5, 0, 0, 0, 0, -2,
|
||||||
|
0,
|
||||||
|
0, 0)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
|
||||||
|
np.array(expected).reshape(1, 28)).all())
|
||||||
|
self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
|
||||||
|
np.array(expected).reshape(1, 28)).all())
|
||||||
|
|
||||||
def test_tesauro_bars(self):
|
def test_tesauro_bars(self):
|
||||||
board = list(Board.initial_state)
|
board = list(Board.initial_state)
|
||||||
board[1] = 0
|
board[1] = 0
|
||||||
|
|
Loading…
Reference in New Issue
Block a user