Compare commits

..

No commits in common. "master" and "v1.0-rc1" have entirely different histories.

7 changed files with 46 additions and 76 deletions

16
app.py
View File

@ -2,7 +2,6 @@ from flask import Flask, request, jsonify
from flask_json import FlaskJSON, as_json_p
from flask_cors import CORS
from board import Board
from eval import Eval
import main
import random
from network import Network
@ -18,8 +17,8 @@ CORS(app)
config = main.config.copy()
config['model'] = "player_testings"
config['ply'] = "0"
config['board_representation'] = 'tesauro'
config['ply'] = "1"
config['board_representation'] = 'quack-fat'
network = Network(config, config['model'])
network.restore_model()
@ -91,16 +90,11 @@ def bot_move():
data = request.get_json(force=True)
board = [int(x) for x in data['board'].split(',')]
use_pubeval = bool(data['pubeval'])
roll = (random.randrange(1, 7), random.randrange(1, 7))
if use_pubeval:
board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
else:
roll = (random.randrange(1,7), random.randrange(1,7))
# print(roll)
board, _ = network.make_move(tuple(board), roll, 1)
# print("Board!:",board)
# print("Boards!:",board)
return ",".join([str(x) for x in list(board)])

View File

@ -50,7 +50,7 @@ run_stuff(board_rep, model_name, ply)
board_rep = "quack-norm"
model_name = "quack-norm_test_0_ply"
ply = 0
ply = 1
run_stuff(board_rep, model_name, ply)
@ -67,7 +67,7 @@ run_stuff(board_rep, model_name, ply)
board_rep = "tesauro"
model_name = "tesauro_test_0_ply"
ply = 0
ply = 1
run_stuff(board_rep, model_name, ply)

View File

@ -1,30 +1,30 @@
#!/usr/bin/env ruby
MODELS_DIR = 'models'
def save(model_name)
require 'date'
model_path = File.join(MODELS_DIR, model_name)
models_dir = 'models'
model_path = File.join(models_dir, model_name)
if not File.exists? model_path then
return false
end
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
puts "Found model #{model_name} with episodes #{episode_count} trained!"
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
save_path = File.join(MODELS_DIR, 'saves', file_name)
save_path = File.join(models_dir, 'saves', file_name)
puts "Saving to #{save_path}"
system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
return true
end
def train(model, episodes)
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
end
def force_train(model, episodes)
system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
end
def evaluate(model, episodes, method)
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
end
@ -33,37 +33,15 @@ model = ARGV[0]
if model.nil? then raise "no model specified" end
if not File.exists? File.join(MODELS_DIR, model) then
force_train model, 10
save model
3.times do
evaluate model, 250, "pubeval"
end
3.times do
evaluate model, 250, "dumbeval"
end
end
# while true do
# save model
# train model, 1000
# save model
# train model, 1000
# 3.times do
# evaluate model, 250, "pubeval"
# end
# 3.times do
# evaluate model, 250, "dumbeval"
# end
# end
while true do
save model
train model, 500
5.times do
train model, 1000
save model
train model, 1000
3.times do
evaluate model, 250, "pubeval"
end
5.times do
3.times do
evaluate model, 250, "dumbeval"
end
end

View File

@ -16,6 +16,8 @@ class Board:
return quack.idxs_with_checkers_of_player(board, player)
# TODO: Write a test for this
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
# index 26 is player 1 home, index 27 is player -1 home
@staticmethod
def board_features_to_pubeval(board, player):
@ -247,6 +249,9 @@ class Board:
# Find all points with checkers on them belonging to the player
# Iterate through each index and check if it's a possible move given the roll
# TODO: make sure that it is not possible to do nothing on first part of
# turn and then do something with the second die
def calc_moves(board, face_value):
if face_value == 0:
return [board]
@ -268,13 +273,23 @@ class Board:
# print("Dice permuts:",dice_permutations)
for roll in dice_permutations:
# Calculate boards resulting from first move
#print("initial board: ", board)
#print("roll:", roll)
#print("Rest of roll:",roll[1:])
boards = calc_moves(board, roll[0])
#print("Boards:",boards)
#print("Roll:",roll[0])
#print("boards after first die: ", boards)
for die in roll[1:]:
# Calculate boards resulting from second move
nested_boards = [calc_moves(board, die) for board in boards]
#print("nested boards: ", nested_boards)
boards = [board for boards in nested_boards for board in boards]
# What the fuck
#for board in boards:
# print(board)
# print("type__:",type(board))
# Add resulting unique boards to set of legal boards resulting from roll
#print("printing boards from calculate_legal_states: ", boards)

View File

@ -209,8 +209,6 @@ if __name__ == "__main__":
elif args.eval:
network = Network(config, config['model'])
network.restore_model()
for i in range(int(config['repeat_eval'])):
start_episode = network.episodes_trained
# Evaluation measures are described in `config`

View File

@ -114,11 +114,12 @@ class Network:
with tf.GradientTape() as tape:
value = self.model(prev_state.reshape(1,-1))
grads = tape.gradient(value, self.model.variables)
difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
with tf.variable_scope('apply_gradients'):
for grad, train_var in zip(grads, self.model.variables):
backprop_calc = self.learning_rate * difference_in_values * grad
train_var.assign_add(backprop_calc)
@ -298,7 +299,7 @@ class Network:
length_list = []
test_list = []
# Prepping of data
# start = time.time()
start = time.time()
for board in boards:
length = 0
for roll in all_rolls:
@ -477,6 +478,7 @@ class Network:
for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
# TODO decide which player should be here
# player = 1
player = random.choice([-1,1])

17
test.py
View File

@ -737,23 +737,6 @@ class TestBoardFlip(unittest.TestCase):
self.assertTrue((Board.board_features_tesauro(board, 1) ==
np.array(expected).reshape(1, 198)).all())
def test_pubeval_features(self):
board = Board.initial_state
expected = (0,
2, 0, 0, 0, 0, -5,
0, -3, 0, 0, 0, 5,
-5, 0, 0, 0, 3, 0,
5, 0, 0, 0, 0, -2,
0,
0, 0)
import numpy as np
self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
np.array(expected).reshape(1, 28)).all())
self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
np.array(expected).reshape(1, 28)).all())
def test_tesauro_bars(self):
board = list(Board.initial_state)
board[1] = 0