Compare commits

...

8 Commits

7 changed files with 75 additions and 45 deletions

18
app.py
View File

@ -2,6 +2,7 @@ from flask import Flask, request, jsonify
from flask_json import FlaskJSON, as_json_p from flask_json import FlaskJSON, as_json_p
from flask_cors import CORS from flask_cors import CORS
from board import Board from board import Board
from eval import Eval
import main import main
import random import random
from network import Network from network import Network
@ -17,8 +18,8 @@ CORS(app)
config = main.config.copy() config = main.config.copy()
config['model'] = "player_testings" config['model'] = "player_testings"
config['ply'] = "1" config['ply'] = "0"
config['board_representation'] = 'quack-fat' config['board_representation'] = 'tesauro'
network = Network(config, config['model']) network = Network(config, config['model'])
network.restore_model() network.restore_model()
@ -90,11 +91,16 @@ def bot_move():
data = request.get_json(force=True) data = request.get_json(force=True)
board = [int(x) for x in data['board'].split(',')] board = [int(x) for x in data['board'].split(',')]
use_pubeval = bool(data['pubeval'])
roll = (random.randrange(1,7), random.randrange(1,7)) roll = (random.randrange(1, 7), random.randrange(1, 7))
# print(roll)
board, _ = network.make_move(tuple(board), roll, 1) if use_pubeval:
# print("Boards!:",board) board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
else:
board, _ = network.make_move(tuple(board), roll, 1)
# print("Board!:",board)
return ",".join([str(x) for x in list(board)]) return ",".join([str(x) for x in list(board)])

View File

@ -50,7 +50,7 @@ run_stuff(board_rep, model_name, ply)
board_rep = "quack-norm" board_rep = "quack-norm"
model_name = "quack-norm_test_0_ply" model_name = "quack-norm_test_0_ply"
ply = 1 ply = 0
run_stuff(board_rep, model_name, ply) run_stuff(board_rep, model_name, ply)
@ -67,7 +67,7 @@ run_stuff(board_rep, model_name, ply)
board_rep = "tesauro" board_rep = "tesauro"
model_name = "tesauro_test_0_ply" model_name = "tesauro_test_0_ply"
ply = 1 ply = 0
run_stuff(board_rep, model_name, ply) run_stuff(board_rep, model_name, ply)

View File

@ -1,30 +1,30 @@
#!/usr/bin/env ruby #!/usr/bin/env ruby
MODELS_DIR = 'models'
def save(model_name) def save(model_name)
require 'date' require 'date'
models_dir = 'models' model_path = File.join(MODELS_DIR, model_name)
model_path = File.join(models_dir, model_name)
if not File.exists? model_path then
return false
end
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
puts "Found model #{model_name} with episodes #{episode_count} trained!" puts "Found model #{model_name} with episodes #{episode_count} trained!"
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz" file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
save_path = File.join(models_dir, 'saves', file_name) save_path = File.join(MODELS_DIR, 'saves', file_name)
puts "Saving to #{save_path}" puts "Saving to #{save_path}"
system("tar", "-cvzf", save_path, "-C", models_dir, model_name) system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
return true
end end
def train(model, episodes) def train(model, episodes)
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s) system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
end end
def force_train(model, episodes)
system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
end
def evaluate(model, episodes, method) def evaluate(model, episodes, method)
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method) system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
end end
@ -33,11 +33,9 @@ model = ARGV[0]
if model.nil? then raise "no model specified" end if model.nil? then raise "no model specified" end
while true do if not File.exists? File.join(MODELS_DIR, model) then
force_train model, 10
save model save model
train model, 1000
save model
train model, 1000
3.times do 3.times do
evaluate model, 250, "pubeval" evaluate model, 250, "pubeval"
end end
@ -45,3 +43,27 @@ while true do
evaluate model, 250, "dumbeval" evaluate model, 250, "dumbeval"
end end
end end
# while true do
# save model
# train model, 1000
# save model
# train model, 1000
# 3.times do
# evaluate model, 250, "pubeval"
# end
# 3.times do
# evaluate model, 250, "dumbeval"
# end
# end
while true do
save model
train model, 500
5.times do
evaluate model, 250, "pubeval"
end
5.times do
evaluate model, 250, "dumbeval"
end
end

View File

@ -16,8 +16,6 @@ class Board:
return quack.idxs_with_checkers_of_player(board, player) return quack.idxs_with_checkers_of_player(board, player)
# TODO: Write a test for this
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
# index 26 is player 1 home, index 27 is player -1 home # index 26 is player 1 home, index 27 is player -1 home
@staticmethod @staticmethod
def board_features_to_pubeval(board, player): def board_features_to_pubeval(board, player):
@ -249,9 +247,6 @@ class Board:
# Find all points with checkers on them belonging to the player # Find all points with checkers on them belonging to the player
# Iterate through each index and check if it's a possible move given the roll # Iterate through each index and check if it's a possible move given the roll
# TODO: make sure that it is not possible to do nothing on first part of
# turn and then do something with the second die
def calc_moves(board, face_value): def calc_moves(board, face_value):
if face_value == 0: if face_value == 0:
return [board] return [board]
@ -273,23 +268,13 @@ class Board:
# print("Dice permuts:",dice_permutations) # print("Dice permuts:",dice_permutations)
for roll in dice_permutations: for roll in dice_permutations:
# Calculate boards resulting from first move # Calculate boards resulting from first move
#print("initial board: ", board)
#print("roll:", roll)
#print("Rest of roll:",roll[1:])
boards = calc_moves(board, roll[0]) boards = calc_moves(board, roll[0])
#print("Boards:",boards)
#print("Roll:",roll[0])
#print("boards after first die: ", boards)
for die in roll[1:]: for die in roll[1:]:
# Calculate boards resulting from second move # Calculate boards resulting from second move
nested_boards = [calc_moves(board, die) for board in boards] nested_boards = [calc_moves(board, die) for board in boards]
#print("nested boards: ", nested_boards)
boards = [board for boards in nested_boards for board in boards] boards = [board for boards in nested_boards for board in boards]
# What the fuck
#for board in boards:
# print(board)
# print("type__:",type(board))
# Add resulting unique boards to set of legal boards resulting from roll # Add resulting unique boards to set of legal boards resulting from roll
#print("printing boards from calculate_legal_states: ", boards) #print("printing boards from calculate_legal_states: ", boards)

View File

@ -209,6 +209,8 @@ if __name__ == "__main__":
elif args.eval: elif args.eval:
network = Network(config, config['model']) network = Network(config, config['model'])
network.restore_model()
for i in range(int(config['repeat_eval'])): for i in range(int(config['repeat_eval'])):
start_episode = network.episodes_trained start_episode = network.episodes_trained
# Evaluation measures are described in `config` # Evaluation measures are described in `config`

View File

@ -114,15 +114,14 @@ class Network:
with tf.GradientTape() as tape: with tf.GradientTape() as tape:
value = self.model(prev_state.reshape(1,-1)) value = self.model(prev_state.reshape(1,-1))
grads = tape.gradient(value, self.model.variables) grads = tape.gradient(value, self.model.variables)
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
with tf.variable_scope('apply_gradients'): for grad, train_var in zip(grads, self.model.variables):
for grad, train_var in zip(grads, self.model.variables): backprop_calc = self.learning_rate * difference_in_values * grad
backprop_calc = self.learning_rate * difference_in_values * grad train_var.assign_add(backprop_calc)
train_var.assign_add(backprop_calc)
@ -299,7 +298,7 @@ class Network:
length_list = [] length_list = []
test_list = [] test_list = []
# Prepping of data # Prepping of data
start = time.time() # start = time.time()
for board in boards: for board in boards:
length = 0 length = 0
for roll in all_rolls: for roll in all_rolls:
@ -478,7 +477,6 @@ class Network:
for episode in range(1, episodes + 1): for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
# TODO decide which player should be here
# player = 1 # player = 1
player = random.choice([-1,1]) player = random.choice([-1,1])

17
test.py
View File

@ -737,6 +737,23 @@ class TestBoardFlip(unittest.TestCase):
self.assertTrue((Board.board_features_tesauro(board, 1) == self.assertTrue((Board.board_features_tesauro(board, 1) ==
np.array(expected).reshape(1, 198)).all()) np.array(expected).reshape(1, 198)).all())
def test_pubeval_features(self):
board = Board.initial_state
expected = (0,
2, 0, 0, 0, 0, -5,
0, -3, 0, 0, 0, 5,
-5, 0, 0, 0, 3, 0,
5, 0, 0, 0, 0, -2,
0,
0, 0)
import numpy as np
self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
np.array(expected).reshape(1, 28)).all())
self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
np.array(expected).reshape(1, 28)).all())
def test_tesauro_bars(self): def test_tesauro_bars(self):
board = list(Board.initial_state) board = list(Board.initial_state)
board[1] = 0 board[1] = 0