Compare commits
8 Commits
Author | SHA1 | Date | |
---|---|---|---|
ea4efc5a2b | |||
26c0b469eb | |||
f170bad9b1 | |||
6e061171da | |||
40c228ef01 | |||
c2c6c89e9f | |||
b7708b3675 | |||
bad870c27a |
14
app.py
14
app.py
|
@ -2,6 +2,7 @@ from flask import Flask, request, jsonify
|
||||||
from flask_json import FlaskJSON, as_json_p
|
from flask_json import FlaskJSON, as_json_p
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
from board import Board
|
from board import Board
|
||||||
|
from eval import Eval
|
||||||
import main
|
import main
|
||||||
import random
|
import random
|
||||||
from network import Network
|
from network import Network
|
||||||
|
@ -17,8 +18,8 @@ CORS(app)
|
||||||
|
|
||||||
config = main.config.copy()
|
config = main.config.copy()
|
||||||
config['model'] = "player_testings"
|
config['model'] = "player_testings"
|
||||||
config['ply'] = "1"
|
config['ply'] = "0"
|
||||||
config['board_representation'] = 'quack-fat'
|
config['board_representation'] = 'tesauro'
|
||||||
network = Network(config, config['model'])
|
network = Network(config, config['model'])
|
||||||
|
|
||||||
network.restore_model()
|
network.restore_model()
|
||||||
|
@ -90,11 +91,16 @@ def bot_move():
|
||||||
data = request.get_json(force=True)
|
data = request.get_json(force=True)
|
||||||
|
|
||||||
board = [int(x) for x in data['board'].split(',')]
|
board = [int(x) for x in data['board'].split(',')]
|
||||||
|
use_pubeval = bool(data['pubeval'])
|
||||||
|
|
||||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||||
# print(roll)
|
|
||||||
|
if use_pubeval:
|
||||||
|
board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
|
||||||
|
else:
|
||||||
board, _ = network.make_move(tuple(board), roll, 1)
|
board, _ = network.make_move(tuple(board), roll, 1)
|
||||||
# print("Boards!:",board)
|
|
||||||
|
# print("Board!:",board)
|
||||||
|
|
||||||
return ",".join([str(x) for x in list(board)])
|
return ",".join([str(x) for x in list(board)])
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
board_rep = "quack-norm"
|
board_rep = "quack-norm"
|
||||||
model_name = "quack-norm_test_0_ply"
|
model_name = "quack-norm_test_0_ply"
|
||||||
ply = 1
|
ply = 0
|
||||||
|
|
||||||
run_stuff(board_rep, model_name, ply)
|
run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
board_rep = "tesauro"
|
board_rep = "tesauro"
|
||||||
model_name = "tesauro_test_0_ply"
|
model_name = "tesauro_test_0_ply"
|
||||||
ply = 1
|
ply = 0
|
||||||
|
|
||||||
run_stuff(board_rep, model_name, ply)
|
run_stuff(board_rep, model_name, ply)
|
||||||
|
|
||||||
|
|
|
@ -1,30 +1,30 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
|
MODELS_DIR = 'models'
|
||||||
|
|
||||||
def save(model_name)
|
def save(model_name)
|
||||||
require 'date'
|
require 'date'
|
||||||
|
|
||||||
models_dir = 'models'
|
model_path = File.join(MODELS_DIR, model_name)
|
||||||
model_path = File.join(models_dir, model_name)
|
|
||||||
if not File.exists? model_path then
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
|
|
||||||
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
|
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
|
||||||
|
|
||||||
puts "Found model #{model_name} with episodes #{episode_count} trained!"
|
puts "Found model #{model_name} with episodes #{episode_count} trained!"
|
||||||
|
|
||||||
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
|
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
|
||||||
save_path = File.join(models_dir, 'saves', file_name)
|
save_path = File.join(MODELS_DIR, 'saves', file_name)
|
||||||
puts "Saving to #{save_path}"
|
puts "Saving to #{save_path}"
|
||||||
|
|
||||||
system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
|
system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
|
||||||
|
|
||||||
return true
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def train(model, episodes)
|
def train(model, episodes)
|
||||||
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
|
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def force_train(model, episodes)
|
||||||
|
system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
|
||||||
|
end
|
||||||
|
|
||||||
def evaluate(model, episodes, method)
|
def evaluate(model, episodes, method)
|
||||||
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
|
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
|
||||||
end
|
end
|
||||||
|
@ -33,11 +33,9 @@ model = ARGV[0]
|
||||||
|
|
||||||
if model.nil? then raise "no model specified" end
|
if model.nil? then raise "no model specified" end
|
||||||
|
|
||||||
while true do
|
if not File.exists? File.join(MODELS_DIR, model) then
|
||||||
|
force_train model, 10
|
||||||
save model
|
save model
|
||||||
train model, 1000
|
|
||||||
save model
|
|
||||||
train model, 1000
|
|
||||||
3.times do
|
3.times do
|
||||||
evaluate model, 250, "pubeval"
|
evaluate model, 250, "pubeval"
|
||||||
end
|
end
|
||||||
|
@ -45,3 +43,27 @@ while true do
|
||||||
evaluate model, 250, "dumbeval"
|
evaluate model, 250, "dumbeval"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# while true do
|
||||||
|
# save model
|
||||||
|
# train model, 1000
|
||||||
|
# save model
|
||||||
|
# train model, 1000
|
||||||
|
# 3.times do
|
||||||
|
# evaluate model, 250, "pubeval"
|
||||||
|
# end
|
||||||
|
# 3.times do
|
||||||
|
# evaluate model, 250, "dumbeval"
|
||||||
|
# end
|
||||||
|
# end
|
||||||
|
|
||||||
|
while true do
|
||||||
|
save model
|
||||||
|
train model, 500
|
||||||
|
5.times do
|
||||||
|
evaluate model, 250, "pubeval"
|
||||||
|
end
|
||||||
|
5.times do
|
||||||
|
evaluate model, 250, "dumbeval"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
17
board.py
17
board.py
|
@ -16,8 +16,6 @@ class Board:
|
||||||
return quack.idxs_with_checkers_of_player(board, player)
|
return quack.idxs_with_checkers_of_player(board, player)
|
||||||
|
|
||||||
|
|
||||||
# TODO: Write a test for this
|
|
||||||
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
|
|
||||||
# index 26 is player 1 home, index 27 is player -1 home
|
# index 26 is player 1 home, index 27 is player -1 home
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def board_features_to_pubeval(board, player):
|
def board_features_to_pubeval(board, player):
|
||||||
|
@ -249,9 +247,6 @@ class Board:
|
||||||
# Find all points with checkers on them belonging to the player
|
# Find all points with checkers on them belonging to the player
|
||||||
# Iterate through each index and check if it's a possible move given the roll
|
# Iterate through each index and check if it's a possible move given the roll
|
||||||
|
|
||||||
# TODO: make sure that it is not possible to do nothing on first part of
|
|
||||||
# turn and then do something with the second die
|
|
||||||
|
|
||||||
def calc_moves(board, face_value):
|
def calc_moves(board, face_value):
|
||||||
if face_value == 0:
|
if face_value == 0:
|
||||||
return [board]
|
return [board]
|
||||||
|
@ -273,23 +268,13 @@ class Board:
|
||||||
# print("Dice permuts:",dice_permutations)
|
# print("Dice permuts:",dice_permutations)
|
||||||
for roll in dice_permutations:
|
for roll in dice_permutations:
|
||||||
# Calculate boards resulting from first move
|
# Calculate boards resulting from first move
|
||||||
#print("initial board: ", board)
|
|
||||||
#print("roll:", roll)
|
|
||||||
#print("Rest of roll:",roll[1:])
|
|
||||||
boards = calc_moves(board, roll[0])
|
boards = calc_moves(board, roll[0])
|
||||||
#print("Boards:",boards)
|
|
||||||
#print("Roll:",roll[0])
|
|
||||||
#print("boards after first die: ", boards)
|
|
||||||
|
|
||||||
for die in roll[1:]:
|
for die in roll[1:]:
|
||||||
# Calculate boards resulting from second move
|
# Calculate boards resulting from second move
|
||||||
nested_boards = [calc_moves(board, die) for board in boards]
|
nested_boards = [calc_moves(board, die) for board in boards]
|
||||||
#print("nested boards: ", nested_boards)
|
|
||||||
boards = [board for boards in nested_boards for board in boards]
|
boards = [board for boards in nested_boards for board in boards]
|
||||||
# What the fuck
|
|
||||||
#for board in boards:
|
|
||||||
# print(board)
|
|
||||||
# print("type__:",type(board))
|
|
||||||
# Add resulting unique boards to set of legal boards resulting from roll
|
# Add resulting unique boards to set of legal boards resulting from roll
|
||||||
|
|
||||||
#print("printing boards from calculate_legal_states: ", boards)
|
#print("printing boards from calculate_legal_states: ", boards)
|
||||||
|
|
2
main.py
2
main.py
|
@ -209,6 +209,8 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
elif args.eval:
|
elif args.eval:
|
||||||
network = Network(config, config['model'])
|
network = Network(config, config['model'])
|
||||||
|
network.restore_model()
|
||||||
|
|
||||||
for i in range(int(config['repeat_eval'])):
|
for i in range(int(config['repeat_eval'])):
|
||||||
start_episode = network.episodes_trained
|
start_episode = network.episodes_trained
|
||||||
# Evaluation measures are described in `config`
|
# Evaluation measures are described in `config`
|
||||||
|
|
|
@ -114,12 +114,11 @@ class Network:
|
||||||
|
|
||||||
with tf.GradientTape() as tape:
|
with tf.GradientTape() as tape:
|
||||||
value = self.model(prev_state.reshape(1,-1))
|
value = self.model(prev_state.reshape(1,-1))
|
||||||
|
|
||||||
grads = tape.gradient(value, self.model.variables)
|
grads = tape.gradient(value, self.model.variables)
|
||||||
|
|
||||||
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
|
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
|
||||||
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
|
|
||||||
|
|
||||||
with tf.variable_scope('apply_gradients'):
|
|
||||||
for grad, train_var in zip(grads, self.model.variables):
|
for grad, train_var in zip(grads, self.model.variables):
|
||||||
backprop_calc = self.learning_rate * difference_in_values * grad
|
backprop_calc = self.learning_rate * difference_in_values * grad
|
||||||
train_var.assign_add(backprop_calc)
|
train_var.assign_add(backprop_calc)
|
||||||
|
@ -299,7 +298,7 @@ class Network:
|
||||||
length_list = []
|
length_list = []
|
||||||
test_list = []
|
test_list = []
|
||||||
# Prepping of data
|
# Prepping of data
|
||||||
start = time.time()
|
# start = time.time()
|
||||||
for board in boards:
|
for board in boards:
|
||||||
length = 0
|
length = 0
|
||||||
for roll in all_rolls:
|
for roll in all_rolls:
|
||||||
|
@ -478,7 +477,6 @@ class Network:
|
||||||
for episode in range(1, episodes + 1):
|
for episode in range(1, episodes + 1):
|
||||||
|
|
||||||
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
|
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
|
||||||
# TODO decide which player should be here
|
|
||||||
|
|
||||||
# player = 1
|
# player = 1
|
||||||
player = random.choice([-1,1])
|
player = random.choice([-1,1])
|
||||||
|
|
17
test.py
17
test.py
|
@ -737,6 +737,23 @@ class TestBoardFlip(unittest.TestCase):
|
||||||
self.assertTrue((Board.board_features_tesauro(board, 1) ==
|
self.assertTrue((Board.board_features_tesauro(board, 1) ==
|
||||||
np.array(expected).reshape(1, 198)).all())
|
np.array(expected).reshape(1, 198)).all())
|
||||||
|
|
||||||
|
def test_pubeval_features(self):
|
||||||
|
board = Board.initial_state
|
||||||
|
|
||||||
|
expected = (0,
|
||||||
|
2, 0, 0, 0, 0, -5,
|
||||||
|
0, -3, 0, 0, 0, 5,
|
||||||
|
-5, 0, 0, 0, 3, 0,
|
||||||
|
5, 0, 0, 0, 0, -2,
|
||||||
|
0,
|
||||||
|
0, 0)
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
|
||||||
|
np.array(expected).reshape(1, 28)).all())
|
||||||
|
self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
|
||||||
|
np.array(expected).reshape(1, 28)).all())
|
||||||
|
|
||||||
def test_tesauro_bars(self):
|
def test_tesauro_bars(self):
|
||||||
board = list(Board.initial_state)
|
board = list(Board.initial_state)
|
||||||
board[1] = 0
|
board[1] = 0
|
||||||
|
|
Loading…
Reference in New Issue
Block a user