7 changed files with 46 additions and 76 deletions
--- a/app.py
+++ b/app.py
@ -2,7 +2,6 @@ from flask import Flask, request, jsonify
 from flask_json import FlaskJSON, as_json_p
 from flask_cors import CORS
 from board import Board
-from eval import Eval
 import main
 import random
 from network import Network
@ -18,8 +17,8 @@ CORS(app)

 config = main.config.copy()
 config['model'] = "player_testings"
-config['ply'] = "0"
-config['board_representation'] = 'tesauro'
+config['ply'] = "1"
+config['board_representation'] = 'quack-fat'
 network = Network(config, config['model'])

 network.restore_model()
@ -91,16 +90,11 @@ def bot_move():
    data = request.get_json(force=True)

    board = [int(x) for x in data['board'].split(',')]
-    use_pubeval = bool(data['pubeval'])

-    roll = (random.randrange(1, 7), random.randrange(1, 7))
-
-    if use_pubeval:
-        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
-    else:
+    roll = (random.randrange(1,7), random.randrange(1,7))
+    # print(roll)
    board, _ = network.make_move(tuple(board), roll, 1)
-
-    # print("Board!:",board)
+    # print("Boards!:",board)

    return ",".join([str(x) for x in list(board)])

--- a/bin/0-ply-tests.rb
+++ b/bin/0-ply-tests.rb
@ -50,7 +50,7 @@ run_stuff(board_rep, model_name, ply)

 board_rep = "quack-norm"
 model_name = "quack-norm_test_0_ply"
-ply = 0
+ply = 1

 run_stuff(board_rep, model_name, ply)

@ -67,7 +67,7 @@ run_stuff(board_rep, model_name, ply)

 board_rep = "tesauro"
 model_name = "tesauro_test_0_ply"
-ply = 0
+ply = 1

 run_stuff(board_rep, model_name, ply)

--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -1,30 +1,30 @@
 #!/usr/bin/env ruby
-MODELS_DIR = 'models'
-
 def save(model_name)
  require 'date'

-  model_path = File.join(MODELS_DIR, model_name)
+  models_dir = 'models'
+  model_path = File.join(models_dir, model_name)
+  if not File.exists? model_path then
+    return false
+  end

  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i

  puts "Found model #{model_name} with episodes #{episode_count} trained!"

  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
-  save_path = File.join(MODELS_DIR, 'saves', file_name)
+  save_path = File.join(models_dir, 'saves', file_name)
  puts "Saving to #{save_path}"
  
-  system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
+  system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
+
+  return true
 end

 def train(model, episodes)
  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
 end

-def force_train(model, episodes)
-  system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
-end
-
 def evaluate(model, episodes, method)
  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
 end
@ -33,37 +33,15 @@ model = ARGV[0]

 if model.nil? then raise "no model specified" end

-if not File.exists? File.join(MODELS_DIR, model) then
-  force_train model, 10
-  save model
-  3.times do
-    evaluate model, 250, "pubeval"
-  end
-  3.times do
-    evaluate model, 250, "dumbeval"
-  end
-end
-
-# while true do
-#   save model
-#   train model, 1000
-#   save model
-#   train model, 1000
-#   3.times do
-#     evaluate model, 250, "pubeval"
-#   end
-#   3.times do
-#     evaluate model, 250, "dumbeval"
-#   end
-# end
-
 while true do
  save model
-  train model, 500
-  5.times do
+  train model, 1000
+  save model
+  train model, 1000
+  3.times do
    evaluate model, 250, "pubeval"
  end
-  5.times do
+  3.times do
    evaluate model, 250, "dumbeval"
  end
 end
--- a/board.py
+++ b/board.py
@ -16,6 +16,8 @@ class Board:
        return quack.idxs_with_checkers_of_player(board, player)


+    # TODO: Write a test for this
+    # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
    # index 26 is player 1 home, index 27 is player -1 home
    @staticmethod
    def board_features_to_pubeval(board, player):
@ -247,6 +249,9 @@ class Board:
        # Find all points with checkers on them belonging to the player
        # Iterate through each index and check if it's a possible move given the roll

+        # TODO: make sure that it is not possible to do nothing on first part of
+        #       turn and then do something with the second die
+        
        def calc_moves(board, face_value):
            if face_value == 0:
                return [board]
@ -268,13 +273,23 @@ class Board:
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
+            #print("initial board: ", board)
+            #print("roll:", roll)
+            #print("Rest of roll:",roll[1:])
            boards = calc_moves(board, roll[0])
+            #print("Boards:",boards)
+            #print("Roll:",roll[0])
+            #print("boards after first die: ", boards)

            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
+                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]
-
+                # What the fuck
+                #for board in boards:
+                #    print(board)
+                #    print("type__:",type(board))
                # Add resulting unique boards to set of legal boards resulting from roll

                #print("printing boards from calculate_legal_states: ", boards)
--- a/main.py
+++ b/main.py
@ -209,8 +209,6 @@ if __name__ == "__main__":

    elif args.eval:
        network = Network(config, config['model'])
-        network.restore_model()
-
        for i in range(int(config['repeat_eval'])):
            start_episode = network.episodes_trained
            # Evaluation measures are described in `config`
--- a/network.py
+++ b/network.py
@ -114,11 +114,12 @@ class Network:

        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
-
        grads = tape.gradient(value, self.model.variables)

        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
+        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))

+        with tf.variable_scope('apply_gradients'):
            for grad, train_var in zip(grads, self.model.variables):
                backprop_calc = self.learning_rate * difference_in_values * grad
                train_var.assign_add(backprop_calc)
@ -298,7 +299,7 @@ class Network:
        length_list = []
        test_list = []
        # Prepping of data
-        # start = time.time()
+        start = time.time()
        for board in boards:
            length = 0
            for roll in all_rolls:
@ -477,6 +478,7 @@ class Network:
        for episode in range(1, episodes + 1):

            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
+            # TODO decide which player should be here

            # player = 1
            player = random.choice([-1,1])
--- a/test.py
+++ b/test.py
@ -737,23 +737,6 @@ class TestBoardFlip(unittest.TestCase):
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())

-    def test_pubeval_features(self):
-        board = Board.initial_state
-
-        expected = (0,
-                    2, 0, 0, 0, 0, -5,
-                    0, -3, 0, 0, 0, 5,
-                    -5, 0, 0, 0, 3, 0,
-                    5, 0, 0, 0, 0, -2,
-                    0,
-                    0, 0)
-
-        import numpy as np
-        self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
-                         np.array(expected).reshape(1, 28)).all())
-        self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
-                         np.array(expected).reshape(1, 28)).all())
-
    def test_tesauro_bars(self):
        board = list(Board.initial_state)
        board[1] = 0