Compare commits

...

8 Commits

7 changed files with 75 additions and 45 deletions

18
app.py
View File

@ -2,6 +2,7 @@ from flask import Flask, request, jsonify
from flask_json import FlaskJSON, as_json_p
from flask_cors import CORS
from board import Board
from eval import Eval
import main
import random
from network import Network
@ -17,8 +18,8 @@ CORS(app)
config = main.config.copy()
config['model'] = "player_testings"
config['ply'] = "1"
config['board_representation'] = 'quack-fat'
config['ply'] = "0"
config['board_representation'] = 'tesauro'
network = Network(config, config['model'])
network.restore_model()
@ -90,11 +91,16 @@ def bot_move():
data = request.get_json(force=True)
board = [int(x) for x in data['board'].split(',')]
use_pubeval = bool(data['pubeval'])
roll = (random.randrange(1,7), random.randrange(1,7))
# print(roll)
board, _ = network.make_move(tuple(board), roll, 1)
# print("Boards!:",board)
roll = (random.randrange(1, 7), random.randrange(1, 7))
if use_pubeval:
board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
else:
board, _ = network.make_move(tuple(board), roll, 1)
# print("Board!:",board)
return ",".join([str(x) for x in list(board)])

View File

@ -50,7 +50,7 @@ run_stuff(board_rep, model_name, ply)
board_rep = "quack-norm"
model_name = "quack-norm_test_0_ply"
ply = 1
ply = 0
run_stuff(board_rep, model_name, ply)
@ -67,7 +67,7 @@ run_stuff(board_rep, model_name, ply)
board_rep = "tesauro"
model_name = "tesauro_test_0_ply"
ply = 1
ply = 0
run_stuff(board_rep, model_name, ply)

View File

@ -1,30 +1,30 @@
#!/usr/bin/env ruby
MODELS_DIR = 'models'
def save(model_name)
require 'date'
models_dir = 'models'
model_path = File.join(models_dir, model_name)
if not File.exists? model_path then
return false
end
model_path = File.join(MODELS_DIR, model_name)
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
puts "Found model #{model_name} with episodes #{episode_count} trained!"
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
save_path = File.join(models_dir, 'saves', file_name)
save_path = File.join(MODELS_DIR, 'saves', file_name)
puts "Saving to #{save_path}"
system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
return true
system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
end
def train(model, episodes)
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
end
def force_train(model, episodes)
system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
end
def evaluate(model, episodes, method)
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
end
@ -33,11 +33,9 @@ model = ARGV[0]
if model.nil? then raise "no model specified" end
while true do
if not File.exists? File.join(MODELS_DIR, model) then
force_train model, 10
save model
train model, 1000
save model
train model, 1000
3.times do
evaluate model, 250, "pubeval"
end
@ -45,3 +43,27 @@ while true do
evaluate model, 250, "dumbeval"
end
end
# while true do
# save model
# train model, 1000
# save model
# train model, 1000
# 3.times do
# evaluate model, 250, "pubeval"
# end
# 3.times do
# evaluate model, 250, "dumbeval"
# end
# end
while true do
save model
train model, 500
5.times do
evaluate model, 250, "pubeval"
end
5.times do
evaluate model, 250, "dumbeval"
end
end

View File

@ -15,9 +15,7 @@ class Board:
def idxs_with_checkers_of_player(board, player):
return quack.idxs_with_checkers_of_player(board, player)
# TODO: Write a test for this
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
# index 26 is player 1 home, index 27 is player -1 home
@staticmethod
def board_features_to_pubeval(board, player):
@ -249,9 +247,6 @@ class Board:
# Find all points with checkers on them belonging to the player
# Iterate through each index and check if it's a possible move given the roll
# TODO: make sure that it is not possible to do nothing on first part of
# turn and then do something with the second die
def calc_moves(board, face_value):
if face_value == 0:
return [board]
@ -273,23 +268,13 @@ class Board:
# print("Dice permuts:",dice_permutations)
for roll in dice_permutations:
# Calculate boards resulting from first move
#print("initial board: ", board)
#print("roll:", roll)
#print("Rest of roll:",roll[1:])
boards = calc_moves(board, roll[0])
#print("Boards:",boards)
#print("Roll:",roll[0])
#print("boards after first die: ", boards)
for die in roll[1:]:
# Calculate boards resulting from second move
nested_boards = [calc_moves(board, die) for board in boards]
#print("nested boards: ", nested_boards)
boards = [board for boards in nested_boards for board in boards]
# What the fuck
#for board in boards:
# print(board)
# print("type__:",type(board))
# Add resulting unique boards to set of legal boards resulting from roll
#print("printing boards from calculate_legal_states: ", boards)

View File

@ -209,6 +209,8 @@ if __name__ == "__main__":
elif args.eval:
network = Network(config, config['model'])
network.restore_model()
for i in range(int(config['repeat_eval'])):
start_episode = network.episodes_trained
# Evaluation measures are described in `config`

View File

@ -114,15 +114,14 @@ class Network:
with tf.GradientTape() as tape:
value = self.model(prev_state.reshape(1,-1))
grads = tape.gradient(value, self.model.variables)
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
with tf.variable_scope('apply_gradients'):
for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad
train_var.assign_add(backprop_calc)
for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad
train_var.assign_add(backprop_calc)
@ -299,7 +298,7 @@ class Network:
length_list = []
test_list = []
# Prepping of data
start = time.time()
# start = time.time()
for board in boards:
length = 0
for roll in all_rolls:
@ -478,7 +477,6 @@ class Network:
for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
# TODO decide which player should be here
# player = 1
player = random.choice([-1,1])

17
test.py
View File

@ -737,6 +737,23 @@ class TestBoardFlip(unittest.TestCase):
self.assertTrue((Board.board_features_tesauro(board, 1) ==
np.array(expected).reshape(1, 198)).all())
def test_pubeval_features(self):
board = Board.initial_state
expected = (0,
2, 0, 0, 0, 0, -5,
0, -3, 0, 0, 0, 5,
-5, 0, 0, 0, 3, 0,
5, 0, 0, 0, 0, -2,
0,
0, 0)
import numpy as np
self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
np.array(expected).reshape(1, 28)).all())
self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
np.array(expected).reshape(1, 28)).all())
def test_tesauro_bars(self):
board = list(Board.initial_state)
board[1] = 0