Updated server code.

restore restore_model
tesauro fat and diffs in values
2018-06-07 21:36:06 +02:00 · 2018-05-22 20:49:10 +02:00 · 2018-05-22 15:39:14 +02:00 · 2018-05-22 15:38:04 +02:00 · 2018-05-22 15:36:23 +02:00 · 2018-05-22 13:16:10 +00:00
9 changed files with 414 additions and 246 deletions
--- a/app.py
+++ b/app.py
@ -0,0 +1,141 @@
 from flask import Flask, request, jsonify
 from flask_json import FlaskJSON, as_json_p
 from flask_cors import CORS
 from board import Board
 from eval import Eval
 import main
 import random
 from network import Network
 app = Flask(__name__)
 app.config['JSON_ADD_STATUS'] = False
 app.config['JSON_JSONP_OPTIONAL'] = False
 json = FlaskJSON(app)
 CORS(app)
 config = main.config.copy()
 config['model'] = "player_testings"
 config['ply'] = "0"
 config['board_representation'] = 'tesauro'
 network = Network(config, config['model'])
 network.restore_model()
 def calc_move_sets(from_board, roll, player):
    board = from_board
    sets = []
    total = 0
    for r in roll:
        # print("Value of r:", r)
        sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
        total += r
    sets.append([Board.calculate_legal_states(board, player, roll), total])
    return sets
 def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
    sets = calc_move_sets(from_board, roll, player)
    return_board = from_board
    print("To board:\n",to_board)
    print("All sets:\n",sets)
    for idx, board_set in enumerate(sets):
        board_set[0] = list(board_set[0])
        # print(to_board)
        # print(board_set)
        if to_board in board_set[0]:
            # print("To board:", to_board)
            # print(board_set[0])
            # print(board_set[1])
            total_moves -= board_set[1]
            # if it's not the sum of the moves
            if idx < (4 if is_quad else 2):
                roll[idx] = 0
            else:
                roll = [0, 0]
            return_board = to_board
            break
    # print("Return board!:\n",return_board)
    return total_moves, roll, return_board
 def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
    total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
    return board, total_moves, roll
@app.route('/get_board', methods=['GET'])
@as_json_p
 def get_board():
    return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
 def check_move(prev, curr):
    # TODO: Decide on player system and implement roll properly
    legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
    truth_list = [list(curr) == list(ele) for ele in legal_states]
    return any(truth_list)
@app.route('/bot_move', methods=['POST'])
 def bot_move():
    data = request.get_json(force=True)
    board = [int(x) for x in data['board'].split(',')]
    use_pubeval = bool(data['pubeval'])
    roll = (random.randrange(1, 7), random.randrange(1, 7))
    if use_pubeval:
        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
    else:
        board, _ = network.make_move(tuple(board), roll, 1)
    # print("Board!:",board)
    return ",".join([str(x) for x in list(board)])
@app.route('/post_board', methods=['POST'])
 def post_board():
    data = request.get_json(force=True)
    # TODO: Fix hardcoded player
    player = -1
    board = [int(x) for x in data['board'].split(',')]
    prev_board = [int(x) for x in data['prevBoard'].split(',')]
    print(data['roll'])
    roll = [int(x) for x in data['roll'].split(',')]
    print(roll)
    quad = data['quad'] == "true"
    # print(board)
    total_roll = int(data['totalRoll'])
    print("total roll is:", total_roll)
    return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
    str_board = ",".join([str(x) for x in return_board])
    str_roll = ",".join([str(x) for x in roll])
    return_string = str_board + "#" + str(total_moves) + "#" + str_roll
    print(return_string)
    return return_string
 if __name__ == '__main__':
    app.run(host = '0.0.0.0', port=35270)
--- a/bin/0-ply-tests.rb
+++ b/bin/0-ply-tests.rb
@ -0,0 +1,78 @@
 def run_stuff(board_rep, model_name, ply)
  epi_count = 0
  system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
  while epi_count < 200000 do
    system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
    system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
    system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
    epi_count += 2000
  end
 end
 ### ///////////////////////////////////////////////////////////////
 # QUACK TESTINGS
 ### ///////////////////////////////////////////////////////////////
 board_rep = "quack"
 model_name = "quack_test_0_ply"
 ply = 0
 run_stuff(board_rep, model_name, ply)
 # board_rep = "quack"
 # model_name = "quack_test_1_ply"
 # ply = 1
 # run_stuff(board_rep, model_name, ply)
 ### ///////////////////////////////////////////////////////////////
 # QUACK-FAT TESTING
 ### ///////////////////////////////////////////////////////////////
 board_rep = "quack-fat"
 model_name = "quack-fat_test_0_ply"
 ply = 0
 run_stuff(board_rep, model_name, ply)
 # board_rep = "quack-fat"
 # model_name = "quack-fat_test_1_ply"
 # ply = 1
 # run_stuff(board_rep, model_name, ply)
 ### ///////////////////////////////////////////////////////////////
 # QUACK-NORM TESTING
 ### ///////////////////////////////////////////////////////////////
 board_rep = "quack-norm"
 model_name = "quack-norm_test_0_ply"
 ply = 0
 run_stuff(board_rep, model_name, ply)
 # board_rep = "quack-norm"
 # model_name = "quack-norm_test_1_ply"
 # ply = 1
 # run_stuff(board_rep, model_name, ply)
 ### ///////////////////////////////////////////////////////////////
 # TESAURO TESTING
 ### ///////////////////////////////////////////////////////////////
 board_rep = "tesauro"
 model_name = "tesauro_test_0_ply"
 ply = 0
 run_stuff(board_rep, model_name, ply)
 # board_rep = "tesauro"
 # model_name = "tesauro_test_1_ply"
 # ply = 1
 # run_stuff(board_rep, model_name, ply)
--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -1,30 +1,30 @@
 #!/usr/bin/env ruby
 MODELS_DIR = 'models'
 def save(model_name)
  require 'date'
-  models_dir = 'models'
+  model_path = File.join(MODELS_DIR, model_name)
  model_path = File.join(models_dir, model_name)
  if not File.exists? model_path then
    return false
  end
  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
  puts "Found model #{model_name} with episodes #{episode_count} trained!"
  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
-  save_path = File.join(models_dir, 'saves', file_name)
+  save_path = File.join(MODELS_DIR, 'saves', file_name)
  puts "Saving to #{save_path}"
-  system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
+  system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
  return true
 end
 def train(model, episodes)
  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
 end
 def force_train(model, episodes)
  system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
 end
 def evaluate(model, episodes, method)
  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
 end
@ -33,11 +33,9 @@ model = ARGV[0]
 if model.nil? then raise "no model specified" end
-while true do
+if not File.exists? File.join(MODELS_DIR, model) then
  force_train model, 10
  save model
  train model, 1000
  save model
  train model, 1000
  3.times do
    evaluate model, 250, "pubeval"
  end
@ -45,3 +43,27 @@ while true do
    evaluate model, 250, "dumbeval"
  end
 end
 # while true do
 #   save model
 #   train model, 1000
 #   save model
 #   train model, 1000
 #   3.times do
 #     evaluate model, 250, "pubeval"
 #   end
 #   3.times do
 #     evaluate model, 250, "dumbeval"
 #   end
 # end
 while true do
  save model
  train model, 500
  5.times do
    evaluate model, 250, "pubeval"
  end
  5.times do
    evaluate model, 250, "dumbeval"
  end
 end
--- a/board.py
+++ b/board.py
@ -15,9 +15,7 @@ class Board:
    def idxs_with_checkers_of_player(board, player):
        return quack.idxs_with_checkers_of_player(board, player)
-
+    
    # TODO: Write a test for this
    # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
    # index 26 is player 1 home, index 27 is player -1 home
    @staticmethod
    def board_features_to_pubeval(board, player):
@ -51,7 +49,6 @@ class Board:
        # board += ([1, 0] if np.sign(player) > 0 else [0, 1])
        # return np.array(board).reshape(1,30)
    # quack-fatter
    @staticmethod
    def board_features_quack_norm(board, player):
@ -66,7 +63,7 @@ class Board:
        board.append(15 - sum(positives))
        board.append(-15 - sum(negatives))
        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
-        return np.array(board).reshape(1,30)
+        return np.array(board).reshape(1, 30)
    # tesauro
    @staticmethod
@ -95,9 +92,62 @@ class Board:
            board_rep += bar_trans(board, player)
            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
-        board_rep += ([1,0] if cur_player == 1 else [1,0])
+        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
-        return np.array(board_rep).reshape(1,198)
+        return np.array(board_rep).reshape(1, 198)
    @staticmethod
    def board_features_tesauro_fat(board, cur_player):
        def ordinary_trans(val, player):
            abs_val = val*player
            if abs_val <= 0:
                return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 1:
                return (1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 2:
                return (1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 3:
                return (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 4:
                return (1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 5:
                return (1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 6:
                return (1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 7:
                return (1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 8:
                return (1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0)
            elif abs_val == 9:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0)
            elif abs_val == 10:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0)
            elif abs_val == 11:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0)
            elif abs_val == 12:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0)
            elif abs_val == 13:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0)
            elif abs_val == 14:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
            elif abs_val == 15:
                return (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)
        def bar_trans(board, player):
            if   player == 1: return (abs(board[0]/2),)
            elif player == -1: return (abs(board[25]/2),)
        board_rep = []
        for player in [1, -1]:
            for x in board[1:25]:
                board_rep += ordinary_trans(x, player)
            board_rep += bar_trans(board, player)
            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
        board_rep += ([1, 0] if cur_player == 1 else [0, 1])
        return np.array(board_rep).reshape(1, len(board_rep))
    @staticmethod
@ -197,9 +247,6 @@ class Board:
        # Find all points with checkers on them belonging to the player
        # Iterate through each index and check if it's a possible move given the roll
        # TODO: make sure that it is not possible to do nothing on first part of
        #       turn and then do something with the second die
        def calc_moves(board, face_value):
            if face_value == 0:
                return [board]
@ -221,23 +268,13 @@ class Board:
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
            #print("initial board: ", board)
            #print("roll:", roll)
            #print("Rest of roll:",roll[1:])
            boards = calc_moves(board, roll[0])
            #print("Boards:",boards)
            #print("Roll:",roll[0])
            #print("boards after first die: ", boards)
            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]
-                # What the fuck
+
                #for board in boards:
                #    print(board)
                #    print("type__:",type(board))
                # Add resulting unique boards to set of legal boards resulting from roll
                #print("printing boards from calculate_legal_states: ", boards)
--- a/main.py
+++ b/main.py
@ -2,6 +2,7 @@ import argparse
 import sys
 import os
 import time
 import subprocess
 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Backgammon games")
@ -77,27 +78,20 @@ if not os.path.isdir(model_path()):
 if not os.path.isdir(log_path):
    os.mkdir(log_path)
 def save_config():
    import yaml
    # checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
    # config_path = os.path.join(checkpoint_path, 'config')
    # with open(config_path, 'a+') as f:
    #    print("lol")
    print(yaml.dump(config))
 # Define helper functions
 def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    format_vars = { 'trained_eps': trained_eps,
                    'count': len(outcome),
                    'sum': sum(outcome),
                    'mean': sum(outcome) / len(outcome),
                    'time': int(time.time()),
-                    'average_diff_in_vals': diff_in_values/len(outcome)
+                    'average_diff_in_vals': diff_in_values,
                    'commit': commit
    }
    with open(log_path, 'a+') as f:
-        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
+        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
 def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
@ -108,9 +102,12 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
    :param log_path:
    :return:
    """
    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    for outcome in outcomes:
        scores = outcome[1]
-        format_vars = { 'trained_eps': trained_eps,
+        format_vars = { 'commit': commit,
                        'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
@ -118,9 +115,10 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
                        'time': int(time.time())
        }
        with open(log_path, 'a+') as f:
-            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
+            f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")
 def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
@ -130,9 +128,10 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
                        'mean': sum(scores) / len(scores),
                        'time': time,
                        'index': index,
                        'commit': commit
        }
        with open(log_path, 'a+') as f:
-            f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
+            f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")
 def find_board_rep():
    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
@ -172,7 +171,6 @@ if __name__ == "__main__":
    # Set up network
    from network import Network
    save_config()
    # Set up variables
    episode_count = config['episode_count']
@ -211,6 +209,8 @@ if __name__ == "__main__":
    elif args.eval:
        network = Network(config, config['model'])
        network.restore_model()
        for i in range(int(config['repeat_eval'])):
            start_episode = network.episodes_trained
            # Evaluation measures are described in `config`
--- a/network.py
+++ b/network.py
@ -21,10 +21,10 @@ class Network:
        'quack'       : (28, Board.board_features_quack),
        'tesauro'     : (198, Board.board_features_tesauro),
        'quack-norm'  : (30, Board.board_features_quack_norm),
        'tesauro-fat' : (726, Board.board_features_tesauro_fat),
        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
    }
    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
@ -39,6 +39,11 @@ class Network:
            '0': self.make_move_0_ply
        }
        self.max_or_min = {
            1: np.argmax,
            -1: np.argmin
        }
        tf.enable_eager_execution()
        xavier_init = tf.contrib.layers.xavier_initializer()
@ -93,7 +98,7 @@ class Network:
        :param decay_steps: The amount of steps between each decay
        :return: The result of the exponential decay performed on the learning rate
        """
-        res = max_lr * decay_rate**(global_step // decay_steps)
+        res = max_lr * decay_rate ** (global_step // decay_steps)
        return res
    def do_backprop(self, prev_state, value_next):
@ -104,20 +109,19 @@ class Network:
        :return: Nothing, the calculation is performed on the model of the network
        """
        self.learning_rate = tf.maximum(self.min_learning_rate,
-                                         self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
+                                        self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
-                                         name="learning_rate")
+                                        name="learning_rate")
        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
        grads = tape.gradient(value, self.model.variables)
        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
-        with tf.variable_scope('apply_gradients'):
+        for grad, train_var in zip(grads, self.model.variables):
-            for grad, train_var in zip(grads, self.model.variables):
+            backprop_calc = self.learning_rate * difference_in_values * grad
-                backprop_calc = self.learning_rate * difference_in_values * grad
+            train_var.assign_add(backprop_calc)
                train_var.assign_add(backprop_calc)
@ -144,8 +148,9 @@ class Network:
        :param episode_count:
        :return:
        """
        tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
-        #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
+
        with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
@ -165,16 +170,14 @@ class Network:
        :param states: A number of states. The states have to be transformed before being given to this function.
        :return:
        """
-        values = self.model.predict_on_batch(states)
+        return self.model.predict_on_batch(states)
        return values
    def restore_model(self):
        """
        Restore a model for a session, such that a trained model and either be further trained or
        used for evaluation
-
+        
        :param sess: Current session
        :return: Nothing. It's a side-effect that a model gets restored for the network.
        """
@ -186,9 +189,6 @@ class Network:
                  str(latest_checkpoint))
            tfe.Saver(self.model.variables).restore(latest_checkpoint)
            # variables_names = [v.name for v in self.model.variables]
            # Restore trained episode count for model
            episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
            if os.path.isfile(episode_count_path):
@ -211,7 +211,6 @@ class Network:
        and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
        The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
        :param sess:
        :param board: Current board
        :param roll:  Current roll
        :param player: Current player
@ -221,13 +220,12 @@ class Network:
        legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
        scores = self.model.predict_on_batch(legal_states)
        transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores]
-        best_score_idx = np.argmax(np.array(transformed_scores))
+        best_score_idx = self.max_or_min[player](scores)
        best_move = legal_moves[best_score_idx]
        best_score = scores[best_score_idx]
-        return [best_move, best_score]
+        best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
        return (best_move, best_score)
    def make_move_1_ply(self, board, roll, player):
        """
@ -237,9 +235,9 @@ class Network:
        :param player:
        :return:
        """
-        # start = time.time()
+        start = time.time()
        best_pair = self.calculate_1_ply(board, roll, player)
-        # print(time.time() - start)
+        #print(time.time() - start)
        return best_pair
@ -248,35 +246,31 @@ class Network:
        Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
        all moves and scores are found for them. The expected score is then calculated for each of the boards from the
        0-ply.
-        :param sess:
+
        :param board:
        :param roll: The original roll
        :param player: The current player
        :return: Best possible move based on 1-ply look-ahead
        """
        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
        legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
-        scores = self.calc_vals(legal_states)
+        scores = [ score.numpy()
-        scores = [score.numpy() for score in scores]
+                   for score
                   in  self.calc_vals(legal_states) ]
        moves_and_scores = list(zip(init_legal_states, scores))
        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
        best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]
-        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
+        scores = self.do_ply(best_boards, player)
-        best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
+        best_score_idx = self.max_or_min[player](scores)
        # best_score_idx = np.array(trans_scores).argmax()
-
+        return (best_boards[best_score_idx], scores[best_score_idx])
        scores, trans_scores = self.do_ply(best_boards, player)
        best_score_idx = np.array(trans_scores).argmax()
        return [best_boards[best_score_idx], scores[best_score_idx]]
    def do_ply(self, boards, player):
        """
@ -285,7 +279,6 @@ class Network:
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
        :param sess:
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
@ -305,11 +298,11 @@ class Network:
        length_list = []
        test_list = []
        # Prepping of data
-        start= time.time()
+        # start = time.time()
        for board in boards:
            length = 0
            for roll in all_rolls:
-                all_states = list(Board.calculate_legal_states(board, player*-1, roll))
+                all_states = Board.calculate_legal_states(board, player*-1, roll)
                for state in all_states:
                    state = np.array(self.board_trans_func(state, player*-1)[0])
                    test_list.append(state)
@ -320,146 +313,19 @@ class Network:
        start = time.time()
-        all_scores_legit = self.model.predict_on_batch(np.array(test_list))
+        all_scores = self.model.predict_on_batch(np.array(test_list))
        split_scores = []
        from_idx = 0
        for length in length_list:
-            split_scores.append(all_scores_legit[from_idx:from_idx+length])
+            split_scores.append(all_scores[from_idx:from_idx+length])
            from_idx += length
        means_splits = [tf.reduce_mean(scores) for scores in split_scores]
-        transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits]
+
        # print(time.time() - start)
-
+        # print("/"*50)
-        return ([means_splits, transformed_means_splits])
+        return means_splits
    def calc_n_ply(self, n_init, sess, board, player, roll):
        """
        :param n_init:
        :param sess:
        :param board:
        :param player:
        :param roll:
        :return:
        """
        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
        # find all values for the above boards
        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
        sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
        best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
        best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
        return best_move_score_pair
    def n_ply(self, n_init, sess, boards_init, player_init):
        """
        :param n_init:
        :param sess:
        :param boards_init:
        :param player_init:
        :return:
        """
        def ply(n, boards, player):
            def calculate_possible_states(board):
                possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
                                   (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
                                   (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
                                   (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
                                   (6, 6) ]
                # for roll in possible_rolls:
                #     print(len(Board.calculate_legal_states(board, player, roll)))
                return [ Board.calculate_legal_states(board, player, roll)
                         for roll
                         in  possible_rolls ]
            def find_best_state_score(boards):
                score_pairs = [ (board, self.eval_state(sess, self.board_trans_func(board, player)))
                                for board
                                in  boards ]
                scores = [ pair[1]
                           for pair
                           in score_pairs ]
                best_score_pair = score_pairs[np.array(scores).argmax()]
                return best_score_pair
            def average_score(boards):
                return sum(boards)/len(boards)
            def average_ply_score(board):
                states_for_rolls = calculate_possible_states(board)
                best_state_score_for_each_roll = [
                    find_best_state_score(states)
                    for states
                    in  states_for_rolls ]
                best_score_for_each_roll = [ x[1]
                                             for x
                                             in best_state_score_for_each_roll ]
                average_score_var = average_score(best_score_for_each_roll)
                return average_score_var
            if n == 1:
                average_score_pairs = [ (board, average_ply_score(board))
                                        for board
                                        in  boards ]
                return average_score_pairs
            elif n > 1: # n != 1
                def average_for_score_pairs(score_pairs):
                    scores = [ pair[1]
                               for pair
                               in score_pairs ]
                    return sum(scores)/len(scores)
                def average_plain(scores):
                    return sum(scores)/len(scores)
                print("+"*20)
                print(n)
                print(type(boards))
                print(boards)
                possible_states_for_boards = [
                    (board, calculate_possible_states(board))
                    for board
                    in  boards ]
                average_score_pairs = [
                    (inner_boards[0], average_plain([ average_for_score_pairs(ply(n - 1, inner_board, player * -1 if n == 1 else player))
                                                      for inner_board
                                                      in  inner_boards[1] ]))
                    for inner_boards
                    in  possible_states_for_boards ]
                return average_score_pairs
            else:
                assert False
        if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
        boards_with_scores = ply(n_init, boards_init, -1 * player_init)
        #print("Boards with scores:",boards_with_scores)
        scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
                   for pair
                   in boards_with_scores ]
        #print("All the scores:",scores)
        best_score_pair = boards_with_scores[np.array(scores).argmax()]
        return best_score_pair
    def eval(self, episode_count, trained_eps = 0):
@ -477,7 +343,6 @@ class Network:
            """
            Do the actual evaluation
            :param sess:
            :param method:     Either pubeval or dumbeval
            :param episodes:   Amount of episodes to use in the evaluation
            :param trained_eps:
@ -501,7 +366,6 @@ class Network:
            sys.stderr.write(
                "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
            if method == 'pubeval':
                outcomes = []
                for i in range(1, episodes + 1):
@ -509,11 +373,9 @@ class Network:
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = (self.make_move(board, roll, 1))[0]
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -532,11 +394,9 @@ class Network:
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = (self.make_move(board, roll, 1))[0]
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -596,10 +456,8 @@ class Network:
        :return:
        """
        difference_in_vals = 0
        self.restore_model()
-
+        average_diffs = 0
        start_time = time.time()
        def print_time_estimate(eps_completed):
@ -619,28 +477,27 @@ class Network:
        for episode in range(1, episodes + 1):
            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
            # TODO decide which player should be here
-            player = 1
+            # player = 1
            player = random.choice([-1,1])
            prev_board = Board.initial_state
            i = 0
            difference_in_values = 0
            while Board.outcome(prev_board) is None:
                i += 1
                self.global_step += 1
                cur_board, cur_board_value = self.make_move(prev_board,
                                                            (random.randrange(1, 7), random.randrange(1, 7)),
                                                            player)
-                difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
+                difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
                if self.config['verbose']:
                    print("Difference in values:", difference_in_vals)
                    print("Current board value :", cur_board_value)
                    print("Current board is    :\n",cur_board)
                # adjust weights
                if Board.outcome(cur_board) is None:
                    self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
@ -654,6 +511,10 @@ class Network:
            final_score = np.array([Board.outcome(final_board)[1]])
            scaled_final_score = ((final_score + 2) / 4)
            difference_in_values += abs(scaled_final_score-cur_board_value)
            average_diffs += (difference_in_values[0][0] / (i+1))
            self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
            sys.stderr.write("\n")
@ -666,8 +527,9 @@ class Network:
                print_time_estimate(episode)
        sys.stderr.write("[TRAIN] Saving model for final episode...\n")
        self.save_model(episode+trained_eps)
-        return outcomes, difference_in_vals[0][0]
+        return outcomes, average_diffs/len(outcomes)
--- a/network_test.py
+++ b/network_test.py
@ -57,4 +57,11 @@ boards = {initial_state,
 # print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
-network.play_against_network()
+
 diff = [0, 0]
 val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
 print(val)
 diff[0] += abs(-1-val)
 diff[1] += 1
 print(diff[1])
--- a/player.py
+++ b/player.py
@ -20,21 +20,22 @@ class Player:
            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
            total += r
        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
        print(sets)
        return sets
-    def tmp_name(self, from_board, to_board, roll, player, total_moves):
+    def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
        sets = self.calc_move_sets(from_board, roll, player)
        return_board = from_board
        for idx, board_set in enumerate(sets):
            board_set[0] = list(board_set[0])
-            print(to_board)
+            # print(to_board)
-            print(board_set)
+            # print(board_set)
            if to_board in board_set[0]:
                total_moves -= board_set[1]
                # if it's not the sum of the moves
-                if idx < 2:
+                if idx < (4 if is_quad else 2):
                    roll[idx] = 0
                else:
                    roll = [0,0]
@ -43,8 +44,11 @@ class Player:
        return total_moves, roll, return_board
    def make_human_move(self, board, roll):
-        total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
+        is_quad = roll[0] == roll[1]
-        move = ""
+        total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
        if is_quad:
            roll = [roll[0]]*4
        while total_moves != 0:
            while True:
                print("You have {roll} left!".format(roll=total_moves))
@ -60,6 +64,6 @@ class Player:
                        print("The correct syntax is: 2/5 for a move from index 2 to 5.")
            to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
-            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
+            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
            print(Board.pretty(board))
        return board
--- a/test.py
+++ b/test.py
@ -737,6 +737,23 @@ class TestBoardFlip(unittest.TestCase):
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_pubeval_features(self):
        board = Board.initial_state
        expected = (0,
                    2, 0, 0, 0, 0, -5,
                    0, -3, 0, 0, 0, 5,
                    -5, 0, 0, 0, 3, 0,
                    5, 0, 0, 0, 0, -2,
                    0,
                    0, 0)
        import numpy as np
        self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
                         np.array(expected).reshape(1, 28)).all())
        self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
                         np.array(expected).reshape(1, 28)).all())
    def test_tesauro_bars(self):
        board = list(Board.initial_state)
        board[1] = 0
Author	SHA1	Message	Date
Alexander Munch-Hansen	ea4efc5a2b	Updated server code.	2018-06-07 21:36:06 +02:00
Christoffer Müller Madsen	26c0b469eb	restore restore_model	2018-05-22 20:49:10 +02:00
Alexander Munch-Hansen	f170bad9b1	tesauro fat and diffs in values	2018-05-22 15:39:14 +02:00
Christoffer Müller Madsen	6e061171da	rm TODO	2018-05-22 15:38:04 +02:00
Christoffer Müller Madsen	40c228ef01	pubeval tests	2018-05-22 15:36:23 +02:00
Christoffer Müller Madsen	c2c6c89e9f	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!8	2018-05-22 13:16:10 +00:00
Christoffer Müller Madsen	b7708b3675	train-evaluate-save	2018-05-22 15:15:36 +02:00
Christoffer Müller Madsen	bad870c27a	update 0-ply-tests	2018-05-22 15:15:15 +02:00
Christoffer Müller Madsen	653d6e30a8	add missing comma	2018-05-22 15:12:47 +02:00
Christoffer Müller Madsen	7e51b44e33	Merge branch 'experimentation' into 'master' tesauro fat and diffs in values See merge request Pownie/backgammon!7	2018-05-22 13:12:10 +00:00
Christoffer Müller Madsen	1fd6c35baa	Merge branch 'master' into 'experimentation' # Conflicts: # main.py	2018-05-22 13:11:43 +00:00
Alexander Munch-Hansen	d426c1c3b5	tesauro fat and diffs in values	2018-05-22 15:10:41 +02:00
Christoffer Müller Madsen	5ab144cffc	add git commit status to all logs	2018-05-22 14:44:13 +02:00
Christoffer Müller Madsen	cef8e54709	Merge branch 'master' of gitfub.space:Pownie/backgammon	2018-05-22 14:37:46 +02:00
Christoffer Müller Madsen	2efbc446f2	log git commit status in evaluation logs	2018-05-22 14:37:27 +02:00
Christoffer Müller Madsen	c54f7aca24	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!6	2018-05-22 12:36:37 +00:00
Alexander Munch-Hansen	c31bc39780	More server	2018-05-22 00:26:32 +02:00
Alexander Munch-Hansen	6133cb439f	Merge remote-tracking branch 'origin/experimentation' into experimentation	2018-05-20 20:15:57 +02:00
Alexander Munch-Hansen	5acd79b6da	Slight modification to move calculation	2018-05-20 19:43:28 +02:00
=	b11e783b30	add 0-ply-tests	2018-05-20 18:50:28 +02:00
Christoffer Müller Madsen	f834b10e02	remove unnecessary print	2018-05-20 16:52:05 +02:00
Christoffer Müller Madsen	72f01a2a2d	remove dependency on yaml	2018-05-20 16:03:58 +02:00
Alexander Munch-Hansen	d14e6c5994	Everything might work, except for quad, that might be bugged.	2018-05-20 00:38:13 +02:00
Alexander Munch-Hansen	a266293ecd	Stuff is happening, moving is better!	2018-05-19 22:01:55 +02:00
Alexander Munch-Hansen	e9a46c79df	server and stuff	2018-05-19 14:12:13 +02:00
Alexander Munch-Hansen	816cdfae00	fix and clean	2018-05-18 14:55:10 +02:00
Christoffer Müller Madsen	ff9664eb38	Merge branch 'eager_eval' into 'master' Eager eval See merge request Pownie/backgammon!5	2018-05-18 12:06:12 +00:00