9 changed files with 247 additions and 415 deletions
--- a/app.py
+++ b/app.py
@ -1,141 +0,0 @@
-from flask import Flask, request, jsonify
-from flask_json import FlaskJSON, as_json_p
-from flask_cors import CORS
-from board import Board
-from eval import Eval
-import main
-import random
-from network import Network
-
-app = Flask(__name__)
-
-
-app.config['JSON_ADD_STATUS'] = False
-app.config['JSON_JSONP_OPTIONAL'] = False
-
-json = FlaskJSON(app)
-CORS(app)
-
-config = main.config.copy()
-config['model'] = "player_testings"
-config['ply'] = "0"
-config['board_representation'] = 'tesauro'
-network = Network(config, config['model'])
-
-network.restore_model()
-
-
-def calc_move_sets(from_board, roll, player):
-    board = from_board
-    sets = []
-    total = 0
-    for r in roll:
-        # print("Value of r:", r)
-        sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
-        total += r
-    sets.append([Board.calculate_legal_states(board, player, roll), total])
-    return sets
-
-
-def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
-    sets = calc_move_sets(from_board, roll, player)
-    return_board = from_board
-    print("To board:\n",to_board)
-    print("All sets:\n",sets)
-    for idx, board_set in enumerate(sets):
-        board_set[0] = list(board_set[0])
-        # print(to_board)
-        # print(board_set)
-        if to_board in board_set[0]:
-            # print("To board:", to_board)
-            # print(board_set[0])
-            # print(board_set[1])
-            total_moves -= board_set[1]
-            # if it's not the sum of the moves
-            if idx < (4 if is_quad else 2):
-                roll[idx] = 0
-            else:
-                roll = [0, 0]
-            return_board = to_board
-            break
-
-    # print("Return board!:\n",return_board)
-    return total_moves, roll, return_board
-
-def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
-
-    total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
-    return board, total_moves, roll
-
-
-@app.route('/get_board', methods=['GET'])
-@as_json_p
-def get_board():
-    return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
-
-
-
-def check_move(prev, curr):
-
-    # TODO: Decide on player system and implement roll properly
-    legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
-
-    truth_list = [list(curr) == list(ele) for ele in legal_states]
-
-    return any(truth_list)
-
-
-
-@app.route('/bot_move', methods=['POST'])
-def bot_move():
-    data = request.get_json(force=True)
-
-    board = [int(x) for x in data['board'].split(',')]
-    use_pubeval = bool(data['pubeval'])
-
-    roll = (random.randrange(1, 7), random.randrange(1, 7))
-
-    if use_pubeval:
-        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
-    else:
-        board, _ = network.make_move(tuple(board), roll, 1)
-
-    # print("Board!:",board)
-
-    return ",".join([str(x) for x in list(board)])
-
-
-
-@app.route('/post_board', methods=['POST'])
-def post_board():
-    data = request.get_json(force=True)
-
-    # TODO: Fix hardcoded player
-    player = -1
-
-    board = [int(x) for x in data['board'].split(',')]
-    prev_board = [int(x) for x in data['prevBoard'].split(',')]
-    print(data['roll'])
-    roll = [int(x) for x in data['roll'].split(',')]
-    print(roll)
-    quad = data['quad'] == "true"
-
-
-    # print(board)
-
-    total_roll = int(data['totalRoll'])
-    print("total roll is:", total_roll)
-    return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
-
-    str_board = ",".join([str(x) for x in return_board])
-    str_roll = ",".join([str(x) for x in roll])
-
-
-    return_string = str_board + "#" + str(total_moves) + "#" + str_roll
-
-    print(return_string)
-
-    return return_string
-
-if __name__ == '__main__':
-    app.run(host = '0.0.0.0', port=35270)
--- a/bin/0-ply-tests.rb
+++ b/bin/0-ply-tests.rb
@ -1,78 +0,0 @@
-def run_stuff(board_rep, model_name, ply)
-  epi_count = 0
-  system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
-  while epi_count < 200000 do
-    system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
-    system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
-    system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
-    epi_count += 2000
-  end
-end
-
-
-### ///////////////////////////////////////////////////////////////
-# QUACK TESTINGS
-### ///////////////////////////////////////////////////////////////
-
-board_rep = "quack"
-model_name = "quack_test_0_ply"
-ply = 0
-
-run_stuff(board_rep, model_name, ply)
-
-
-# board_rep = "quack"
-# model_name = "quack_test_1_ply"
-# ply = 1
-
-# run_stuff(board_rep, model_name, ply)
-
-### ///////////////////////////////////////////////////////////////
-# QUACK-FAT TESTING
-### ///////////////////////////////////////////////////////////////
-
-board_rep = "quack-fat"
-model_name = "quack-fat_test_0_ply"
-ply = 0
-
-run_stuff(board_rep, model_name, ply)
-
-# board_rep = "quack-fat"
-# model_name = "quack-fat_test_1_ply"
-# ply = 1
-
-# run_stuff(board_rep, model_name, ply)
-
-### ///////////////////////////////////////////////////////////////
-# QUACK-NORM TESTING
-### ///////////////////////////////////////////////////////////////
-
-
-board_rep = "quack-norm"
-model_name = "quack-norm_test_0_ply"
-ply = 0
-
-run_stuff(board_rep, model_name, ply)
-
-# board_rep = "quack-norm"
-# model_name = "quack-norm_test_1_ply"
-# ply = 1
-
-# run_stuff(board_rep, model_name, ply)
-
-### ///////////////////////////////////////////////////////////////
-# TESAURO TESTING
-### ///////////////////////////////////////////////////////////////
-
-
-board_rep = "tesauro"
-model_name = "tesauro_test_0_ply"
-ply = 0
-
-run_stuff(board_rep, model_name, ply)
-
-# board_rep = "tesauro"
-# model_name = "tesauro_test_1_ply"
-# ply = 1
-
-# run_stuff(board_rep, model_name, ply)
--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -1,30 +1,30 @@
 #!/usr/bin/env ruby
-MODELS_DIR = 'models'
-
 def save(model_name)
  require 'date'

-  model_path = File.join(MODELS_DIR, model_name)
+  models_dir = 'models'
+  model_path = File.join(models_dir, model_name)
+  if not File.exists? model_path then
+    return false
+  end

  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i

  puts "Found model #{model_name} with episodes #{episode_count} trained!"

  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
-  save_path = File.join(MODELS_DIR, 'saves', file_name)
+  save_path = File.join(models_dir, 'saves', file_name)
  puts "Saving to #{save_path}"
  
-  system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
+  system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
+
+  return true
 end

 def train(model, episodes)
  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
 end

-def force_train(model, episodes)
-  system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
-end
-
 def evaluate(model, episodes, method)
  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
 end
@ -33,37 +33,15 @@ model = ARGV[0]

 if model.nil? then raise "no model specified" end

-if not File.exists? File.join(MODELS_DIR, model) then
-  force_train model, 10
-  save model
-  3.times do
-    evaluate model, 250, "pubeval"
-  end
-  3.times do
-    evaluate model, 250, "dumbeval"
-  end
-end
-
-# while true do
-#   save model
-#   train model, 1000
-#   save model
-#   train model, 1000
-#   3.times do
-#     evaluate model, 250, "pubeval"
-#   end
-#   3.times do
-#     evaluate model, 250, "dumbeval"
-#   end
-# end
-
 while true do
  save model
-  train model, 500
-  5.times do
+  train model, 1000
+  save model
+  train model, 1000
+  3.times do
    evaluate model, 250, "pubeval"
  end
-  5.times do
+  3.times do
    evaluate model, 250, "dumbeval"
  end
 end
--- a/board.py
+++ b/board.py
@ -16,6 +16,8 @@ class Board:
        return quack.idxs_with_checkers_of_player(board, player)


+    # TODO: Write a test for this
+    # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
    # index 26 is player 1 home, index 27 is player -1 home
    @staticmethod
    def board_features_to_pubeval(board, player):
@ -49,6 +51,7 @@ class Board:
        # board += ([1, 0] if np.sign(player) > 0 else [0, 1])
        # return np.array(board).reshape(1,30)

+
    # quack-fatter
    @staticmethod
    def board_features_quack_norm(board, player):
@ -63,7 +66,7 @@ class Board:
        board.append(15 - sum(positives))
        board.append(-15 - sum(negatives))
        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
-        return np.array(board).reshape(1, 30)
+        return np.array(board).reshape(1,30)

    # tesauro
    @staticmethod
@ -92,62 +95,9 @@ class Board:
            board_rep += bar_trans(board, player)
            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)

-        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
+        board_rep += ([1,0] if cur_player == 1 else [1,0])

-        return np.array(board_rep).reshape(1, 198)
-
-
-    @staticmethod
-    def board_features_tesauro_fat(board, cur_player):
-        def ordinary_trans(val, player):
-            abs_val = val*player
-            if abs_val <= 0:
-                return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 1:
-                return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 2:
-                return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 3:
-                return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 4:
-                return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 5:
-                return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 6:
-                return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 7:
-                return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 8:
-                return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 9:
-                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
-            elif abs_val == 10:
-                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
-            elif abs_val == 11:
-                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
-            elif abs_val == 12:
-                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
-            elif abs_val == 13:
-                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
-            elif abs_val == 14:
-                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
-            elif abs_val == 15:
-                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
-
-        def bar_trans(board, player):
-            if   player == 1: return (abs(board[0]/2),)
-            elif player == -1: return (abs(board[25]/2),)
-
-        board_rep = []
-        for player in [1, -1]:
-            for x in board[1:25]:
-                board_rep += ordinary_trans(x, player)
-            board_rep += bar_trans(board, player)
-            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
-
-        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
-
-        return np.array(board_rep).reshape(1, len(board_rep))
+        return np.array(board_rep).reshape(1,198)


    @staticmethod
@ -247,6 +197,9 @@ class Board:
        # Find all points with checkers on them belonging to the player
        # Iterate through each index and check if it's a possible move given the roll

+        # TODO: make sure that it is not possible to do nothing on first part of
+        #       turn and then do something with the second die
+        
        def calc_moves(board, face_value):
            if face_value == 0:
                return [board]
@ -268,13 +221,23 @@ class Board:
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
+            #print("initial board: ", board)
+            #print("roll:", roll)
+            #print("Rest of roll:",roll[1:])
            boards = calc_moves(board, roll[0])
+            #print("Boards:",boards)
+            #print("Roll:",roll[0])
+            #print("boards after first die: ", boards)

            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
+                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]
-
+                # What the fuck
+                #for board in boards:
+                #    print(board)
+                #    print("type__:",type(board))
                # Add resulting unique boards to set of legal boards resulting from roll

                #print("printing boards from calculate_legal_states: ", boards)
--- a/main.py
+++ b/main.py
@ -2,7 +2,6 @@ import argparse
 import sys
 import os
 import time
-import subprocess

 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Backgammon games")
@ -78,20 +77,27 @@ if not os.path.isdir(model_path()):
 if not os.path.isdir(log_path):
    os.mkdir(log_path)

+
+def save_config():
+    import yaml
+    # checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
+    # config_path = os.path.join(checkpoint_path, 'config')
+    # with open(config_path, 'a+') as f:
+    #    print("lol")
+    print(yaml.dump(config))
+
 # Define helper functions
 def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
-    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    format_vars = { 'trained_eps': trained_eps,
                    'count': len(outcome),
                    'sum': sum(outcome),
                    'mean': sum(outcome) / len(outcome),
                    'time': int(time.time()),
-                    'average_diff_in_vals': diff_in_values,
-                    'commit': commit
+                    'average_diff_in_vals': diff_in_values/len(outcome)
    }

    with open(log_path, 'a+') as f:
-        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
+        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
    

 def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
@ -102,12 +108,9 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
    :param log_path:
    :return:
    """
-    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
-    
    for outcome in outcomes:
        scores = outcome[1]
-        format_vars = { 'commit': commit,
-                        'trained_eps': trained_eps,
+        format_vars = { 'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
@ -115,10 +118,9 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
                        'time': int(time.time())
        }
        with open(log_path, 'a+') as f:
-            f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")
+            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")

 def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
-    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
@ -128,10 +130,9 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
                        'mean': sum(scores) / len(scores),
                        'time': time,
                        'index': index,
-                        'commit': commit
        }
        with open(log_path, 'a+') as f:
-            f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")
+            f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")

 def find_board_rep():
    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
@ -171,6 +172,7 @@ if __name__ == "__main__":
    # Set up network
    from network import Network

+    save_config()
    # Set up variables
    episode_count = config['episode_count']

@ -209,8 +211,6 @@ if __name__ == "__main__":

    elif args.eval:
        network = Network(config, config['model'])
-        network.restore_model()
-
        for i in range(int(config['repeat_eval'])):
            start_episode = network.episodes_trained
            # Evaluation measures are described in `config`
--- a/network.py
+++ b/network.py
@ -21,10 +21,10 @@ class Network:
        'quack'       : (28, Board.board_features_quack),
        'tesauro'     : (198, Board.board_features_tesauro),
        'quack-norm'  : (30, Board.board_features_quack_norm),
-        'tesauro-fat' : (726, Board.board_features_tesauro_fat),
        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
    }

+
    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))

@ -39,11 +39,6 @@ class Network:
            '0': self.make_move_0_ply
        }

-        self.max_or_min = {
-            1: np.argmax,
-            -1: np.argmin
-        }
-
        tf.enable_eager_execution()

        xavier_init = tf.contrib.layers.xavier_initializer()
@ -98,7 +93,7 @@ class Network:
        :param decay_steps: The amount of steps between each decay
        :return: The result of the exponential decay performed on the learning rate
        """
-        res = max_lr * decay_rate ** (global_step // decay_steps)
+        res = max_lr * decay_rate**(global_step // decay_steps)
        return res

    def do_backprop(self, prev_state, value_next):
@ -109,19 +104,20 @@ class Network:
        :return: Nothing, the calculation is performed on the model of the network
        """
        self.learning_rate = tf.maximum(self.min_learning_rate,
-                                        self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
-                                        name="learning_rate")
+                                         self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
+                                         name="learning_rate")

        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
-
        grads = tape.gradient(value, self.model.variables)

        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
+        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))

-        for grad, train_var in zip(grads, self.model.variables):
-            backprop_calc = self.learning_rate * difference_in_values * grad
-            train_var.assign_add(backprop_calc)
+        with tf.variable_scope('apply_gradients'):
+            for grad, train_var in zip(grads, self.model.variables):
+                backprop_calc = self.learning_rate * difference_in_values * grad
+                train_var.assign_add(backprop_calc)



@ -148,9 +144,8 @@ class Network:
        :param episode_count:
        :return:
        """
-
        tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
-
+        #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
        with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
@ -170,7 +165,8 @@ class Network:
        :param states: A number of states. The states have to be transformed before being given to this function.
        :return:
        """
-        return self.model.predict_on_batch(states)
+        values = self.model.predict_on_batch(states)
+        return values


    def restore_model(self):
@ -178,6 +174,7 @@ class Network:
        Restore a model for a session, such that a trained model and either be further trained or
        used for evaluation

+        :param sess: Current session
        :return: Nothing. It's a side-effect that a model gets restored for the network.
        """

@ -189,6 +186,9 @@ class Network:
                  str(latest_checkpoint))
            tfe.Saver(self.model.variables).restore(latest_checkpoint)

+            # variables_names = [v.name for v in self.model.variables]
+
+
            # Restore trained episode count for model
            episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
            if os.path.isfile(episode_count_path):
@ -211,6 +211,7 @@ class Network:
        and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
        The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.

+        :param sess:
        :param board: Current board
        :param roll:  Current roll
        :param player: Current player
@ -220,12 +221,13 @@ class Network:
        legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])

        scores = self.model.predict_on_batch(legal_states)
+        transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores]

-        best_score_idx = self.max_or_min[player](scores)
+        best_score_idx = np.argmax(np.array(transformed_scores))
+        best_move = legal_moves[best_score_idx]
+        best_score = scores[best_score_idx]

-        best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
-
-        return (best_move, best_score)
+        return [best_move, best_score]

    def make_move_1_ply(self, board, roll, player):
        """
@ -235,9 +237,9 @@ class Network:
        :param player:
        :return:
        """
-        start = time.time()
+        # start = time.time()
        best_pair = self.calculate_1_ply(board, roll, player)
-        #print(time.time() - start)
+        # print(time.time() - start)
        return best_pair


@ -246,31 +248,35 @@ class Network:
        Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
        all moves and scores are found for them. The expected score is then calculated for each of the boards from the
        0-ply.
-
+        :param sess:
        :param board:
        :param roll: The original roll
        :param player: The current player
        :return: Best possible move based on 1-ply look-ahead
+
        """

        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
+
        legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])

-        scores = [ score.numpy()
-                   for score
-                   in  self.calc_vals(legal_states) ]
+        scores = self.calc_vals(legal_states)
+        scores = [score.numpy() for score in scores]

        moves_and_scores = list(zip(init_legal_states, scores))
-        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
-        best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]

-        scores = self.do_ply(best_boards, player)
+        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)

-        best_score_idx = self.max_or_min[player](scores)
-        # best_score_idx = np.array(trans_scores).argmax()
+        best_boards = [x[0] for x in sorted_moves_and_scores[:10]]

-        return (best_boards[best_score_idx], scores[best_score_idx])
+
+
+        scores, trans_scores = self.do_ply(best_boards, player)
+
+        best_score_idx = np.array(trans_scores).argmax()
+
+        return [best_boards[best_score_idx], scores[best_score_idx]]

    def do_ply(self, boards, player):
        """
@ -279,6 +285,7 @@ class Network:
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.

+        :param sess:
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
@ -298,11 +305,11 @@ class Network:
        length_list = []
        test_list = []
        # Prepping of data
-        # start = time.time()
+        start= time.time()
        for board in boards:
            length = 0
            for roll in all_rolls:
-                all_states = Board.calculate_legal_states(board, player*-1, roll)
+                all_states = list(Board.calculate_legal_states(board, player*-1, roll))
                for state in all_states:
                    state = np.array(self.board_trans_func(state, player*-1)[0])
                    test_list.append(state)
@ -313,19 +320,146 @@ class Network:

        start = time.time()

-        all_scores = self.model.predict_on_batch(np.array(test_list))
+        all_scores_legit = self.model.predict_on_batch(np.array(test_list))

        split_scores = []
        from_idx = 0
        for length in length_list:
-            split_scores.append(all_scores[from_idx:from_idx+length])
+            split_scores.append(all_scores_legit[from_idx:from_idx+length])
            from_idx += length

        means_splits = [tf.reduce_mean(scores) for scores in split_scores]
-
+        transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits]
        # print(time.time() - start)
-        # print("/"*50)
-        return means_splits
+
+        return ([means_splits, transformed_means_splits])
+
+
+    def calc_n_ply(self, n_init, sess, board, player, roll):
+        """
+        :param n_init:
+        :param sess:
+        :param board:
+        :param player:
+        :param roll:
+        :return:
+        """
+
+        # find all legal states from the given board and the given roll
+        init_legal_states = Board.calculate_legal_states(board, player, roll)
+
+        # find all values for the above boards
+        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
+
+        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
+        sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
+
+
+        best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
+
+        best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
+
+        return best_move_score_pair
+
+
+    def n_ply(self, n_init, sess, boards_init, player_init):
+        """
+        :param n_init:
+        :param sess:
+        :param boards_init:
+        :param player_init:
+        :return:
+        """
+        def ply(n, boards, player):
+            def calculate_possible_states(board):
+                possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
+                                   (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
+                                   (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
+                                   (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
+                                   (6, 6) ]
+
+                # for roll in possible_rolls:
+                #     print(len(Board.calculate_legal_states(board, player, roll)))
+
+                return [ Board.calculate_legal_states(board, player, roll)
+                         for roll
+                         in  possible_rolls ]
+
+            def find_best_state_score(boards):
+                score_pairs = [ (board, self.eval_state(sess, self.board_trans_func(board, player)))
+                                for board
+                                in  boards ]
+                scores = [ pair[1]
+                           for pair
+                           in score_pairs ]
+                best_score_pair = score_pairs[np.array(scores).argmax()]
+
+                return best_score_pair
+
+            def average_score(boards):
+                return sum(boards)/len(boards)
+
+            def average_ply_score(board):
+                states_for_rolls = calculate_possible_states(board)
+
+                best_state_score_for_each_roll = [
+                    find_best_state_score(states)
+                    for states
+                    in  states_for_rolls ]
+                best_score_for_each_roll = [ x[1]
+                                             for x
+                                             in best_state_score_for_each_roll ]
+
+                average_score_var = average_score(best_score_for_each_roll)
+                return average_score_var
+
+
+            if n == 1:
+                average_score_pairs = [ (board, average_ply_score(board))
+                                        for board
+                                        in  boards ]
+                return average_score_pairs
+            elif n > 1: # n != 1
+                def average_for_score_pairs(score_pairs):
+                    scores = [ pair[1]
+                               for pair
+                               in score_pairs ]
+                    return sum(scores)/len(scores)
+
+                def average_plain(scores):
+                    return sum(scores)/len(scores)
+
+                print("+"*20)
+                print(n)
+                print(type(boards))
+                print(boards)
+                possible_states_for_boards = [
+                    (board, calculate_possible_states(board))
+                    for board
+                    in  boards ]
+
+                average_score_pairs = [
+                    (inner_boards[0], average_plain([ average_for_score_pairs(ply(n - 1, inner_board, player * -1 if n == 1 else player))
+                                                      for inner_board
+                                                      in  inner_boards[1] ]))
+                    for inner_boards
+                    in  possible_states_for_boards ]
+
+                return average_score_pairs
+
+            else:
+                assert False
+
+        if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
+
+        boards_with_scores = ply(n_init, boards_init, -1 * player_init)
+        #print("Boards with scores:",boards_with_scores)
+        scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
+                   for pair
+                   in boards_with_scores ]
+        #print("All the scores:",scores)
+        best_score_pair = boards_with_scores[np.array(scores).argmax()]
+        return best_score_pair


    def eval(self, episode_count, trained_eps = 0):
@ -343,6 +477,7 @@ class Network:
            """
            Do the actual evaluation

+            :param sess:
            :param method:     Either pubeval or dumbeval
            :param episodes:   Amount of episodes to use in the evaluation
            :param trained_eps:
@ -366,6 +501,7 @@ class Network:
            sys.stderr.write(
                "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))

+
            if method == 'pubeval':
                outcomes = []
                for i in range(1, episodes + 1):
@ -373,9 +509,11 @@ class Network:
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+
                        board = (self.make_move(board, roll, 1))[0]

                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+
                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]

                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -394,9 +532,11 @@ class Network:
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+
                        board = (self.make_move(board, roll, 1))[0]

                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+
                        board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]

                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -456,8 +596,10 @@ class Network:
        :return:
        """

+        difference_in_vals = 0
+
        self.restore_model()
-        average_diffs = 0
+
        start_time = time.time()

        def print_time_estimate(eps_completed):
@ -477,27 +619,28 @@ class Network:
        for episode in range(1, episodes + 1):

            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
+            # TODO decide which player should be here

-            # player = 1
-            player = random.choice([-1,1])
+            player = 1
            prev_board = Board.initial_state
            i = 0
-            difference_in_values = 0
            while Board.outcome(prev_board) is None:
                i += 1
                self.global_step += 1

+
                cur_board, cur_board_value = self.make_move(prev_board,
                                                            (random.randrange(1, 7), random.randrange(1, 7)),
                                                            player)

-                difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
+                difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))

                if self.config['verbose']:
                    print("Difference in values:", difference_in_vals)
                    print("Current board value :", cur_board_value)
                    print("Current board is    :\n",cur_board)

+
                # adjust weights
                if Board.outcome(cur_board) is None:
                    self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
@ -511,10 +654,6 @@ class Network:
            final_score = np.array([Board.outcome(final_board)[1]])
            scaled_final_score = ((final_score + 2) / 4)

-            difference_in_values += abs(scaled_final_score-cur_board_value)
-
-            average_diffs += (difference_in_values[0][0] / (i+1))
-
            self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))

            sys.stderr.write("\n")
@ -527,9 +666,8 @@ class Network:
                print_time_estimate(episode)

        sys.stderr.write("[TRAIN] Saving model for final episode...\n")
-
        self.save_model(episode+trained_eps)

-        return outcomes, average_diffs/len(outcomes)
+        return outcomes, difference_in_vals[0][0]


--- a/network_test.py
+++ b/network_test.py
@ -57,11 +57,4 @@ boards = {initial_state,

 # print(network.calculate_1_ply(Board.initial_state, [3,2], 1))

-
-diff = [0, 0]
-val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
-print(val)
-diff[0] += abs(-1-val)
-diff[1] += 1
-
-print(diff[1])
+network.play_against_network()
--- a/player.py
+++ b/player.py
@ -20,22 +20,21 @@ class Player:
            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
            total += r
        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
-        print(sets)
        return sets


-    def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
+    def tmp_name(self, from_board, to_board, roll, player, total_moves):
        sets = self.calc_move_sets(from_board, roll, player)
        return_board = from_board
        for idx, board_set in enumerate(sets):

            board_set[0] = list(board_set[0])
-            # print(to_board)
-            # print(board_set)
+            print(to_board)
+            print(board_set)
            if to_board in board_set[0]:
                total_moves -= board_set[1]
                # if it's not the sum of the moves
-                if idx < (4 if is_quad else 2):
+                if idx < 2:
                    roll[idx] = 0
                else:
                    roll = [0,0]
@ -44,11 +43,8 @@ class Player:
        return total_moves, roll, return_board

    def make_human_move(self, board, roll):
-        is_quad = roll[0] == roll[1]
-        total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
-        if is_quad:
-            roll = [roll[0]]*4
-        
+        total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
+        move = ""
        while total_moves != 0:
            while True:
                print("You have {roll} left!".format(roll=total_moves))
@ -64,6 +60,6 @@ class Player:
                        print("The correct syntax is: 2/5 for a move from index 2 to 5.")

            to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
-            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
+            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
            print(Board.pretty(board))
        return board
--- a/test.py
+++ b/test.py
@ -737,23 +737,6 @@ class TestBoardFlip(unittest.TestCase):
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())

-    def test_pubeval_features(self):
-        board = Board.initial_state
-
-        expected = (0,
-                    2, 0, 0, 0, 0, -5,
-                    0, -3, 0, 0, 0, 5,
-                    -5, 0, 0, 0, 3, 0,
-                    5, 0, 0, 0, 0, -2,
-                    0,
-                    0, 0)
-
-        import numpy as np
-        self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
-                         np.array(expected).reshape(1, 28)).all())
-        self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
-                         np.array(expected).reshape(1, 28)).all())
-
    def test_tesauro_bars(self):
        board = list(Board.initial_state)
        board[1] = 0