Updated server code.

restore restore_model
tesauro fat and diffs in values
2018-06-07 21:36:06 +02:00 · 2018-05-22 20:49:10 +02:00 · 2018-05-22 15:39:14 +02:00 · 2018-05-22 15:38:04 +02:00 · 2018-05-22 15:36:23 +02:00 · 2018-05-22 13:16:10 +00:00
9 changed files with 414 additions and 246 deletions
--- a/app.py
+++ b/app.py
@ -0,0 +1,141 @@
+from flask import Flask, request, jsonify
+from flask_json import FlaskJSON, as_json_p
+from flask_cors import CORS
+from board import Board
+from eval import Eval
+import main
+import random
+from network import Network
+
+app = Flask(__name__)
+
+
+app.config['JSON_ADD_STATUS'] = False
+app.config['JSON_JSONP_OPTIONAL'] = False
+
+json = FlaskJSON(app)
+CORS(app)
+
+config = main.config.copy()
+config['model'] = "player_testings"
+config['ply'] = "0"
+config['board_representation'] = 'tesauro'
+network = Network(config, config['model'])
+
+network.restore_model()
+
+
+def calc_move_sets(from_board, roll, player):
+    board = from_board
+    sets = []
+    total = 0
+    for r in roll:
+        # print("Value of r:", r)
+        sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
+        total += r
+    sets.append([Board.calculate_legal_states(board, player, roll), total])
+    return sets
+
+
+def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
+    sets = calc_move_sets(from_board, roll, player)
+    return_board = from_board
+    print("To board:\n",to_board)
+    print("All sets:\n",sets)
+    for idx, board_set in enumerate(sets):
+        board_set[0] = list(board_set[0])
+        # print(to_board)
+        # print(board_set)
+        if to_board in board_set[0]:
+            # print("To board:", to_board)
+            # print(board_set[0])
+            # print(board_set[1])
+            total_moves -= board_set[1]
+            # if it's not the sum of the moves
+            if idx < (4 if is_quad else 2):
+                roll[idx] = 0
+            else:
+                roll = [0, 0]
+            return_board = to_board
+            break
+
+    # print("Return board!:\n",return_board)
+    return total_moves, roll, return_board
+
+def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
+
+    total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
+    return board, total_moves, roll
+
+
+@app.route('/get_board', methods=['GET'])
+@as_json_p
+def get_board():
+    return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
+
+
+
+def check_move(prev, curr):
+
+    # TODO: Decide on player system and implement roll properly
+    legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
+
+    truth_list = [list(curr) == list(ele) for ele in legal_states]
+
+    return any(truth_list)
+
+
+
+@app.route('/bot_move', methods=['POST'])
+def bot_move():
+    data = request.get_json(force=True)
+
+    board = [int(x) for x in data['board'].split(',')]
+    use_pubeval = bool(data['pubeval'])
+
+    roll = (random.randrange(1, 7), random.randrange(1, 7))
+
+    if use_pubeval:
+        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
+    else:
+        board, _ = network.make_move(tuple(board), roll, 1)
+
+    # print("Board!:",board)
+
+    return ",".join([str(x) for x in list(board)])
+
+
+
+@app.route('/post_board', methods=['POST'])
+def post_board():
+    data = request.get_json(force=True)
+
+    # TODO: Fix hardcoded player
+    player = -1
+
+    board = [int(x) for x in data['board'].split(',')]
+    prev_board = [int(x) for x in data['prevBoard'].split(',')]
+    print(data['roll'])
+    roll = [int(x) for x in data['roll'].split(',')]
+    print(roll)
+    quad = data['quad'] == "true"
+
+
+    # print(board)
+
+    total_roll = int(data['totalRoll'])
+    print("total roll is:", total_roll)
+    return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
+
+    str_board = ",".join([str(x) for x in return_board])
+    str_roll = ",".join([str(x) for x in roll])
+
+
+    return_string = str_board + "#" + str(total_moves) + "#" + str_roll
+
+    print(return_string)
+
+    return return_string
+
+if __name__ == '__main__':
+    app.run(host = '0.0.0.0', port=35270)
--- a/bin/0-ply-tests.rb
+++ b/bin/0-ply-tests.rb
@ -0,0 +1,78 @@
+def run_stuff(board_rep, model_name, ply)
+  epi_count = 0
+  system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
+  while epi_count < 200000 do
+    system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
+    system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
+    system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
+    epi_count += 2000
+  end
+end
+
+
+### ///////////////////////////////////////////////////////////////
+# QUACK TESTINGS
+### ///////////////////////////////////////////////////////////////
+
+board_rep = "quack"
+model_name = "quack_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+
+# board_rep = "quack"
+# model_name = "quack_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
+
+### ///////////////////////////////////////////////////////////////
+# QUACK-FAT TESTING
+### ///////////////////////////////////////////////////////////////
+
+board_rep = "quack-fat"
+model_name = "quack-fat_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+# board_rep = "quack-fat"
+# model_name = "quack-fat_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
+
+### ///////////////////////////////////////////////////////////////
+# QUACK-NORM TESTING
+### ///////////////////////////////////////////////////////////////
+
+
+board_rep = "quack-norm"
+model_name = "quack-norm_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+# board_rep = "quack-norm"
+# model_name = "quack-norm_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
+
+### ///////////////////////////////////////////////////////////////
+# TESAURO TESTING
+### ///////////////////////////////////////////////////////////////
+
+
+board_rep = "tesauro"
+model_name = "tesauro_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+# board_rep = "tesauro"
+# model_name = "tesauro_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -1,30 +1,30 @@
 #!/usr/bin/env ruby
+MODELS_DIR = 'models'
+
 def save(model_name)
  require 'date'

-  models_dir = 'models'
-  model_path = File.join(models_dir, model_name)
-  if not File.exists? model_path then
-    return false
-  end
+  model_path = File.join(MODELS_DIR, model_name)

  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i

  puts "Found model #{model_name} with episodes #{episode_count} trained!"

  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
-  save_path = File.join(models_dir, 'saves', file_name)
+  save_path = File.join(MODELS_DIR, 'saves', file_name)
  puts "Saving to #{save_path}"
  
-  system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
-
-  return true
+  system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
 end

 def train(model, episodes)
  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
 end

+def force_train(model, episodes)
+  system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
+end
+
 def evaluate(model, episodes, method)
  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
 end
@ -33,11 +33,9 @@ model = ARGV[0]

 if model.nil? then raise "no model specified" end

-while true do
+if not File.exists? File.join(MODELS_DIR, model) then
+  force_train model, 10
  save model
-  train model, 1000
-  save model
-  train model, 1000
  3.times do
    evaluate model, 250, "pubeval"
  end
@ -45,3 +43,27 @@ while true do
    evaluate model, 250, "dumbeval"
  end
 end
+
+# while true do
+#   save model
+#   train model, 1000
+#   save model
+#   train model, 1000
+#   3.times do
+#     evaluate model, 250, "pubeval"
+#   end
+#   3.times do
+#     evaluate model, 250, "dumbeval"
+#   end
+# end
+
+while true do
+  save model
+  train model, 500
+  5.times do
+    evaluate model, 250, "pubeval"
+  end
+  5.times do
+    evaluate model, 250, "dumbeval"
+  end
+end
--- a/board.py
+++ b/board.py
@ -16,8 +16,6 @@ class Board:
        return quack.idxs_with_checkers_of_player(board, player)

    
-    # TODO: Write a test for this
-    # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
    # index 26 is player 1 home, index 27 is player -1 home
    @staticmethod
    def board_features_to_pubeval(board, player):
@ -51,7 +49,6 @@ class Board:
        # board += ([1, 0] if np.sign(player) > 0 else [0, 1])
        # return np.array(board).reshape(1,30)

-
    # quack-fatter
    @staticmethod
    def board_features_quack_norm(board, player):
@ -66,7 +63,7 @@ class Board:
        board.append(15 - sum(positives))
        board.append(-15 - sum(negatives))
        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
-        return np.array(board).reshape(1,30)
+        return np.array(board).reshape(1, 30)

    # tesauro
    @staticmethod
@ -95,9 +92,62 @@ class Board:
            board_rep += bar_trans(board, player)
            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)

-        board_rep += ([1,0] if cur_player == 1 else [1,0])
+        board_rep += ([1, 0] if cur_player == 1 else [0, 1])

-        return np.array(board_rep).reshape(1,198)
+        return np.array(board_rep).reshape(1, 198)
+
+
+    @staticmethod
+    def board_features_tesauro_fat(board, cur_player):
+        def ordinary_trans(val, player):
+            abs_val = val*player
+            if abs_val <= 0:
+                return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 1:
+                return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 2:
+                return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 3:
+                return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 4:
+                return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 5:
+                return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 6:
+                return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 7:
+                return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 8:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 9:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
+            elif abs_val == 10:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
+            elif abs_val == 11:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
+            elif abs_val == 12:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
+            elif abs_val == 13:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
+            elif abs_val == 14:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
+            elif abs_val == 15:
+                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
+
+        def bar_trans(board, player):
+            if   player == 1: return (abs(board[0]/2),)
+            elif player == -1: return (abs(board[25]/2),)
+
+        board_rep = []
+        for player in [1, -1]:
+            for x in board[1:25]:
+                board_rep += ordinary_trans(x, player)
+            board_rep += bar_trans(board, player)
+            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
+
+        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
+
+        return np.array(board_rep).reshape(1, len(board_rep))


    @staticmethod
@ -197,9 +247,6 @@ class Board:
        # Find all points with checkers on them belonging to the player
        # Iterate through each index and check if it's a possible move given the roll

-        # TODO: make sure that it is not possible to do nothing on first part of
-        #       turn and then do something with the second die
-        
        def calc_moves(board, face_value):
            if face_value == 0:
                return [board]
@ -221,23 +268,13 @@ class Board:
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
-            #print("initial board: ", board)
-            #print("roll:", roll)
-            #print("Rest of roll:",roll[1:])
            boards = calc_moves(board, roll[0])
-            #print("Boards:",boards)
-            #print("Roll:",roll[0])
-            #print("boards after first die: ", boards)

            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
-                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]
-                # What the fuck
-                #for board in boards:
-                #    print(board)
-                #    print("type__:",type(board))
+
                # Add resulting unique boards to set of legal boards resulting from roll

                #print("printing boards from calculate_legal_states: ", boards)
--- a/main.py
+++ b/main.py
@ -2,6 +2,7 @@ import argparse
 import sys
 import os
 import time
+import subprocess

 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Backgammon games")
@ -77,27 +78,20 @@ if not os.path.isdir(model_path()):
 if not os.path.isdir(log_path):
    os.mkdir(log_path)

-
-def save_config():
-    import yaml
-    # checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
-    # config_path = os.path.join(checkpoint_path, 'config')
-    # with open(config_path, 'a+') as f:
-    #    print("lol")
-    print(yaml.dump(config))
-
 # Define helper functions
 def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
+    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    format_vars = { 'trained_eps': trained_eps,
                    'count': len(outcome),
                    'sum': sum(outcome),
                    'mean': sum(outcome) / len(outcome),
                    'time': int(time.time()),
-                    'average_diff_in_vals': diff_in_values/len(outcome)
+                    'average_diff_in_vals': diff_in_values,
+                    'commit': commit
    }

    with open(log_path, 'a+') as f:
-        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
+        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
    

 def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
@ -108,9 +102,12 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
    :param log_path:
    :return:
    """
+    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
+    
    for outcome in outcomes:
        scores = outcome[1]
-        format_vars = { 'trained_eps': trained_eps,
+        format_vars = { 'commit': commit,
+                        'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
@ -118,9 +115,10 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
                        'time': int(time.time())
        }
        with open(log_path, 'a+') as f:
-            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
+            f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")

 def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
+    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
@ -130,9 +128,10 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
                        'mean': sum(scores) / len(scores),
                        'time': time,
                        'index': index,
+                        'commit': commit
        }
        with open(log_path, 'a+') as f:
-            f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
+            f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")

 def find_board_rep():
    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
@ -172,7 +171,6 @@ if __name__ == "__main__":
    # Set up network
    from network import Network

-    save_config()
    # Set up variables
    episode_count = config['episode_count']

@ -211,6 +209,8 @@ if __name__ == "__main__":

    elif args.eval:
        network = Network(config, config['model'])
+        network.restore_model()
+
        for i in range(int(config['repeat_eval'])):
            start_episode = network.episodes_trained
            # Evaluation measures are described in `config`
--- a/network.py
+++ b/network.py
@ -21,10 +21,10 @@ class Network:
        'quack'       : (28, Board.board_features_quack),
        'tesauro'     : (198, Board.board_features_tesauro),
        'quack-norm'  : (30, Board.board_features_quack_norm),
+        'tesauro-fat' : (726, Board.board_features_tesauro_fat),
        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
    }

-
    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))

@ -39,6 +39,11 @@ class Network:
            '0': self.make_move_0_ply
        }

+        self.max_or_min = {
+            1: np.argmax,
+            -1: np.argmin
+        }
+
        tf.enable_eager_execution()

        xavier_init = tf.contrib.layers.xavier_initializer()
@ -93,7 +98,7 @@ class Network:
        :param decay_steps: The amount of steps between each decay
        :return: The result of the exponential decay performed on the learning rate
        """
-        res = max_lr * decay_rate**(global_step // decay_steps)
+        res = max_lr * decay_rate ** (global_step // decay_steps)
        return res

    def do_backprop(self, prev_state, value_next):
@ -104,20 +109,19 @@ class Network:
        :return: Nothing, the calculation is performed on the model of the network
        """
        self.learning_rate = tf.maximum(self.min_learning_rate,
-                                         self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
-                                         name="learning_rate")
+                                        self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
+                                        name="learning_rate")

        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
+
        grads = tape.gradient(value, self.model.variables)

        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
-        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))

-        with tf.variable_scope('apply_gradients'):
-            for grad, train_var in zip(grads, self.model.variables):
-                backprop_calc = self.learning_rate * difference_in_values * grad
-                train_var.assign_add(backprop_calc)
+        for grad, train_var in zip(grads, self.model.variables):
+            backprop_calc = self.learning_rate * difference_in_values * grad
+            train_var.assign_add(backprop_calc)



@ -144,8 +148,9 @@ class Network:
        :param episode_count:
        :return:
        """
+
        tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
-        #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
+
        with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
@ -165,8 +170,7 @@ class Network:
        :param states: A number of states. The states have to be transformed before being given to this function.
        :return:
        """
-        values = self.model.predict_on_batch(states)
-        return values
+        return self.model.predict_on_batch(states)


    def restore_model(self):
@ -174,7 +178,6 @@ class Network:
        Restore a model for a session, such that a trained model and either be further trained or
        used for evaluation
        
-        :param sess: Current session
        :return: Nothing. It's a side-effect that a model gets restored for the network.
        """

@ -186,9 +189,6 @@ class Network:
                  str(latest_checkpoint))
            tfe.Saver(self.model.variables).restore(latest_checkpoint)

-            # variables_names = [v.name for v in self.model.variables]
-
-
            # Restore trained episode count for model
            episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
            if os.path.isfile(episode_count_path):
@ -211,7 +211,6 @@ class Network:
        and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
        The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.

-        :param sess:
        :param board: Current board
        :param roll:  Current roll
        :param player: Current player
@ -221,13 +220,12 @@ class Network:
        legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])

        scores = self.model.predict_on_batch(legal_states)
-        transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores]

-        best_score_idx = np.argmax(np.array(transformed_scores))
-        best_move = legal_moves[best_score_idx]
-        best_score = scores[best_score_idx]
+        best_score_idx = self.max_or_min[player](scores)

-        return [best_move, best_score]
+        best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
+
+        return (best_move, best_score)

    def make_move_1_ply(self, board, roll, player):
        """
@ -237,9 +235,9 @@ class Network:
        :param player:
        :return:
        """
-        # start = time.time()
+        start = time.time()
        best_pair = self.calculate_1_ply(board, roll, player)
-        # print(time.time() - start)
+        #print(time.time() - start)
        return best_pair


@ -248,35 +246,31 @@ class Network:
        Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
        all moves and scores are found for them. The expected score is then calculated for each of the boards from the
        0-ply.
-        :param sess:
+
        :param board:
        :param roll: The original roll
        :param player: The current player
        :return: Best possible move based on 1-ply look-ahead
-
        """

        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
-
        legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])

-        scores = self.calc_vals(legal_states)
-        scores = [score.numpy() for score in scores]
+        scores = [ score.numpy()
+                   for score
+                   in  self.calc_vals(legal_states) ]

        moves_and_scores = list(zip(init_legal_states, scores))
+        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
+        best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]

-        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
+        scores = self.do_ply(best_boards, player)

-        best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
+        best_score_idx = self.max_or_min[player](scores)
+        # best_score_idx = np.array(trans_scores).argmax()

-
-
-        scores, trans_scores = self.do_ply(best_boards, player)
-
-        best_score_idx = np.array(trans_scores).argmax()
-
-        return [best_boards[best_score_idx], scores[best_score_idx]]
+        return (best_boards[best_score_idx], scores[best_score_idx])

    def do_ply(self, boards, player):
        """
@ -285,7 +279,6 @@ class Network:
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.

-        :param sess:
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
@ -305,11 +298,11 @@ class Network:
        length_list = []
        test_list = []
        # Prepping of data
-        start= time.time()
+        # start = time.time()
        for board in boards:
            length = 0
            for roll in all_rolls:
-                all_states = list(Board.calculate_legal_states(board, player*-1, roll))
+                all_states = Board.calculate_legal_states(board, player*-1, roll)
                for state in all_states:
                    state = np.array(self.board_trans_func(state, player*-1)[0])
                    test_list.append(state)
@ -320,146 +313,19 @@ class Network:

        start = time.time()

-        all_scores_legit = self.model.predict_on_batch(np.array(test_list))
+        all_scores = self.model.predict_on_batch(np.array(test_list))

        split_scores = []
        from_idx = 0
        for length in length_list:
-            split_scores.append(all_scores_legit[from_idx:from_idx+length])
+            split_scores.append(all_scores[from_idx:from_idx+length])
            from_idx += length

        means_splits = [tf.reduce_mean(scores) for scores in split_scores]
-        transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits]
+
        # print(time.time() - start)
-
-        return ([means_splits, transformed_means_splits])
-
-
-    def calc_n_ply(self, n_init, sess, board, player, roll):
-        """
-        :param n_init:
-        :param sess:
-        :param board:
-        :param player:
-        :param roll:
-        :return:
-        """
-
-        # find all legal states from the given board and the given roll
-        init_legal_states = Board.calculate_legal_states(board, player, roll)
-
-        # find all values for the above boards
-        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
-
-        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
-        sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
-
-
-        best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
-
-        best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
-
-        return best_move_score_pair
-
-
-    def n_ply(self, n_init, sess, boards_init, player_init):
-        """
-        :param n_init:
-        :param sess:
-        :param boards_init:
-        :param player_init:
-        :return:
-        """
-        def ply(n, boards, player):
-            def calculate_possible_states(board):
-                possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
-                                   (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
-                                   (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
-                                   (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
-                                   (6, 6) ]
-
-                # for roll in possible_rolls:
-                #     print(len(Board.calculate_legal_states(board, player, roll)))
-
-                return [ Board.calculate_legal_states(board, player, roll)
-                         for roll
-                         in  possible_rolls ]
-
-            def find_best_state_score(boards):
-                score_pairs = [ (board, self.eval_state(sess, self.board_trans_func(board, player)))
-                                for board
-                                in  boards ]
-                scores = [ pair[1]
-                           for pair
-                           in score_pairs ]
-                best_score_pair = score_pairs[np.array(scores).argmax()]
-
-                return best_score_pair
-
-            def average_score(boards):
-                return sum(boards)/len(boards)
-
-            def average_ply_score(board):
-                states_for_rolls = calculate_possible_states(board)
-
-                best_state_score_for_each_roll = [
-                    find_best_state_score(states)
-                    for states
-                    in  states_for_rolls ]
-                best_score_for_each_roll = [ x[1]
-                                             for x
-                                             in best_state_score_for_each_roll ]
-
-                average_score_var = average_score(best_score_for_each_roll)
-                return average_score_var
-
-
-            if n == 1:
-                average_score_pairs = [ (board, average_ply_score(board))
-                                        for board
-                                        in  boards ]
-                return average_score_pairs
-            elif n > 1: # n != 1
-                def average_for_score_pairs(score_pairs):
-                    scores = [ pair[1]
-                               for pair
-                               in score_pairs ]
-                    return sum(scores)/len(scores)
-
-                def average_plain(scores):
-                    return sum(scores)/len(scores)
-
-                print("+"*20)
-                print(n)
-                print(type(boards))
-                print(boards)
-                possible_states_for_boards = [
-                    (board, calculate_possible_states(board))
-                    for board
-                    in  boards ]
-
-                average_score_pairs = [
-                    (inner_boards[0], average_plain([ average_for_score_pairs(ply(n - 1, inner_board, player * -1 if n == 1 else player))
-                                                      for inner_board
-                                                      in  inner_boards[1] ]))
-                    for inner_boards
-                    in  possible_states_for_boards ]
-
-                return average_score_pairs
-
-            else:
-                assert False
-
-        if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
-
-        boards_with_scores = ply(n_init, boards_init, -1 * player_init)
-        #print("Boards with scores:",boards_with_scores)
-        scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
-                   for pair
-                   in boards_with_scores ]
-        #print("All the scores:",scores)
-        best_score_pair = boards_with_scores[np.array(scores).argmax()]
-        return best_score_pair
+        # print("/"*50)
+        return means_splits


    def eval(self, episode_count, trained_eps = 0):
@ -477,7 +343,6 @@ class Network:
            """
            Do the actual evaluation

-            :param sess:
            :param method:     Either pubeval or dumbeval
            :param episodes:   Amount of episodes to use in the evaluation
            :param trained_eps:
@ -501,7 +366,6 @@ class Network:
            sys.stderr.write(
                "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))

-
            if method == 'pubeval':
                outcomes = []
                for i in range(1, episodes + 1):
@ -509,11 +373,9 @@ class Network:
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-
                        board = (self.make_move(board, roll, 1))[0]

                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-
                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]

                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -532,11 +394,9 @@ class Network:
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-
                        board = (self.make_move(board, roll, 1))[0]

                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-
                        board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]

                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -596,10 +456,8 @@ class Network:
        :return:
        """

-        difference_in_vals = 0
-
        self.restore_model()
-
+        average_diffs = 0
        start_time = time.time()

        def print_time_estimate(eps_completed):
@ -619,28 +477,27 @@ class Network:
        for episode in range(1, episodes + 1):

            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
-            # TODO decide which player should be here

-            player = 1
+            # player = 1
+            player = random.choice([-1,1])
            prev_board = Board.initial_state
            i = 0
+            difference_in_values = 0
            while Board.outcome(prev_board) is None:
                i += 1
                self.global_step += 1

-
                cur_board, cur_board_value = self.make_move(prev_board,
                                                            (random.randrange(1, 7), random.randrange(1, 7)),
                                                            player)

-                difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
+                difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))

                if self.config['verbose']:
                    print("Difference in values:", difference_in_vals)
                    print("Current board value :", cur_board_value)
                    print("Current board is    :\n",cur_board)

-
                # adjust weights
                if Board.outcome(cur_board) is None:
                    self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
@ -654,6 +511,10 @@ class Network:
            final_score = np.array([Board.outcome(final_board)[1]])
            scaled_final_score = ((final_score + 2) / 4)

+            difference_in_values += abs(scaled_final_score-cur_board_value)
+
+            average_diffs += (difference_in_values[0][0] / (i+1))
+
            self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))

            sys.stderr.write("\n")
@ -666,8 +527,9 @@ class Network:
                print_time_estimate(episode)

        sys.stderr.write("[TRAIN] Saving model for final episode...\n")
+
        self.save_model(episode+trained_eps)

-        return outcomes, difference_in_vals[0][0]
+        return outcomes, average_diffs/len(outcomes)


--- a/network_test.py
+++ b/network_test.py
@ -57,4 +57,11 @@ boards = {initial_state,

 # print(network.calculate_1_ply(Board.initial_state, [3,2], 1))

-network.play_against_network()
+
+diff = [0, 0]
+val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
+print(val)
+diff[0] += abs(-1-val)
+diff[1] += 1
+
+print(diff[1])
--- a/player.py
+++ b/player.py
@ -20,21 +20,22 @@ class Player:
            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
            total += r
        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
+        print(sets)
        return sets


-    def tmp_name(self, from_board, to_board, roll, player, total_moves):
+    def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
        sets = self.calc_move_sets(from_board, roll, player)
        return_board = from_board
        for idx, board_set in enumerate(sets):

            board_set[0] = list(board_set[0])
-            print(to_board)
-            print(board_set)
+            # print(to_board)
+            # print(board_set)
            if to_board in board_set[0]:
                total_moves -= board_set[1]
                # if it's not the sum of the moves
-                if idx < 2:
+                if idx < (4 if is_quad else 2):
                    roll[idx] = 0
                else:
                    roll = [0,0]
@ -43,8 +44,11 @@ class Player:
        return total_moves, roll, return_board

    def make_human_move(self, board, roll):
-        total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
-        move = ""
+        is_quad = roll[0] == roll[1]
+        total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
+        if is_quad:
+            roll = [roll[0]]*4
+        
        while total_moves != 0:
            while True:
                print("You have {roll} left!".format(roll=total_moves))
@ -60,6 +64,6 @@ class Player:
                        print("The correct syntax is: 2/5 for a move from index 2 to 5.")

            to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
-            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
+            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
            print(Board.pretty(board))
        return board
--- a/test.py
+++ b/test.py
@ -737,6 +737,23 @@ class TestBoardFlip(unittest.TestCase):
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())

+    def test_pubeval_features(self):
+        board = Board.initial_state
+
+        expected = (0,
+                    2, 0, 0, 0, 0, -5,
+                    0, -3, 0, 0, 0, 5,
+                    -5, 0, 0, 0, 3, 0,
+                    5, 0, 0, 0, 0, -2,
+                    0,
+                    0, 0)
+
+        import numpy as np
+        self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
+                         np.array(expected).reshape(1, 28)).all())
+        self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
+                         np.array(expected).reshape(1, 28)).all())
+
    def test_tesauro_bars(self):
        board = list(Board.initial_state)
        board[1] = 0
Author	SHA1	Message	Date
Alexander Munch-Hansen	ea4efc5a2b	Updated server code.	2018-06-07 21:36:06 +02:00
Christoffer Müller Madsen	26c0b469eb	restore restore_model	2018-05-22 20:49:10 +02:00
Alexander Munch-Hansen	f170bad9b1	tesauro fat and diffs in values	2018-05-22 15:39:14 +02:00
Christoffer Müller Madsen	6e061171da	rm TODO	2018-05-22 15:38:04 +02:00
Christoffer Müller Madsen	40c228ef01	pubeval tests	2018-05-22 15:36:23 +02:00
Christoffer Müller Madsen	c2c6c89e9f	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!8	2018-05-22 13:16:10 +00:00
Christoffer Müller Madsen	b7708b3675	train-evaluate-save	2018-05-22 15:15:36 +02:00
Christoffer Müller Madsen	bad870c27a	update 0-ply-tests	2018-05-22 15:15:15 +02:00
Christoffer Müller Madsen	653d6e30a8	add missing comma	2018-05-22 15:12:47 +02:00
Christoffer Müller Madsen	7e51b44e33	Merge branch 'experimentation' into 'master' tesauro fat and diffs in values See merge request Pownie/backgammon!7	2018-05-22 13:12:10 +00:00
Christoffer Müller Madsen	1fd6c35baa	Merge branch 'master' into 'experimentation' # Conflicts: # main.py	2018-05-22 13:11:43 +00:00
Alexander Munch-Hansen	d426c1c3b5	tesauro fat and diffs in values	2018-05-22 15:10:41 +02:00
Christoffer Müller Madsen	5ab144cffc	add git commit status to all logs	2018-05-22 14:44:13 +02:00
Christoffer Müller Madsen	cef8e54709	Merge branch 'master' of gitfub.space:Pownie/backgammon	2018-05-22 14:37:46 +02:00
Christoffer Müller Madsen	2efbc446f2	log git commit status in evaluation logs	2018-05-22 14:37:27 +02:00
Christoffer Müller Madsen	c54f7aca24	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!6	2018-05-22 12:36:37 +00:00
Alexander Munch-Hansen	c31bc39780	More server	2018-05-22 00:26:32 +02:00
Alexander Munch-Hansen	6133cb439f	Merge remote-tracking branch 'origin/experimentation' into experimentation	2018-05-20 20:15:57 +02:00
Alexander Munch-Hansen	5acd79b6da	Slight modification to move calculation	2018-05-20 19:43:28 +02:00
=	b11e783b30	add 0-ply-tests	2018-05-20 18:50:28 +02:00
Christoffer Müller Madsen	f834b10e02	remove unnecessary print	2018-05-20 16:52:05 +02:00
Christoffer Müller Madsen	72f01a2a2d	remove dependency on yaml	2018-05-20 16:03:58 +02:00
Alexander Munch-Hansen	d14e6c5994	Everything might work, except for quad, that might be bugged.	2018-05-20 00:38:13 +02:00
Alexander Munch-Hansen	a266293ecd	Stuff is happening, moving is better!	2018-05-19 22:01:55 +02:00
Alexander Munch-Hansen	e9a46c79df	server and stuff	2018-05-19 14:12:13 +02:00
Alexander Munch-Hansen	816cdfae00	fix and clean	2018-05-18 14:55:10 +02:00
Christoffer Müller Madsen	ff9664eb38	Merge branch 'eager_eval' into 'master' Eager eval See merge request Pownie/backgammon!5	2018-05-18 12:06:12 +00:00