Merge branch 'eager_eval' into 'master'

Eager eval See merge request Pownie/backgammon!5
2018-05-18 12:06:12 +00:00 · 2018-05-18 12:06:12 +00:00 · ff9664eb38
commit ff9664eb38
parent 7b308be4e2 3e379b40c4
13 changed files with 1239 additions and 554 deletions
--- a/board.py
+++ b/board.py
@ -1,3 +1,4 @@
 import quack
 import numpy as np
 import itertools
@ -12,11 +13,7 @@ class Board:
    @staticmethod
    def idxs_with_checkers_of_player(board, player):
-        idxs = []
+        return quack.idxs_with_checkers_of_player(board, player)
        for idx, checker_count in enumerate(board):
            if checker_count * player >= 1:
                idxs.append(idx)
        return idxs
    # TODO: Write a test for this
@ -40,18 +37,19 @@ class Board:
    def board_features_quack(board, player):
        board = list(board)
        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
-        return np.array(board).reshape(1, -1)
+        return np.array(board).reshape(1,28)
    # quack-fat
    @staticmethod
    def board_features_quack_fat(board, player):
-        board = list(board)
+        return np.array(quack.board_features_quack_fat(board,player)).reshape(1,30)
-        positives = [x if x > 0 else 0 for x in board]
+        # board = list(board)
-        negatives = [x if x < 0 else 0 for x in board]
+        # positives = [x if x > 0 else 0 for x in board]
-        board.append( 15 - sum(positives))
+        # negatives = [x if x < 0 else 0 for x in board]
-        board.append(-15 - sum(negatives))
+        # board.append( 15 - sum(positives))
-        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
+        # board.append(-15 - sum(negatives))
-        return np.array(board).reshape(1,-1)
+        # board += ([1, 0] if np.sign(player) > 0 else [0, 1])
        # return np.array(board).reshape(1,30)
    # quack-fatter
@ -68,7 +66,7 @@ class Board:
        board.append(15 - sum(positives))
        board.append(-15 - sum(negatives))
        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
-        return np.array(board).reshape(1, -1)
+        return np.array(board).reshape(1,30)
    # tesauro
    @staticmethod
@ -124,98 +122,15 @@ class Board:
            # Calculate how many pieces there must be in the home state and divide it by 15
            features.append((15 - sum) / 15)
        features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
-        test = np.array(features).reshape(1,-1)
+        test = np.array(features)
        #print("TEST:",test)
-        return test
+        return test.reshape(1,198)
    @staticmethod
    def is_move_valid(board, player, face_value, move):
-        if face_value == 0:
+        return quack.is_move_valid(board, player, face_value, move)
            return True
        else:
            def sign(a):
                return (a > 0) - (a < 0)
            from_idx   = move[0]
            to_idx     = move[1]
            to_state   = None
            from_state = board[from_idx]
            delta      = to_idx - from_idx
            direction  = sign(delta)
            bearing_off = None
            # FIXME: Use get instead of array-like indexing
            if to_idx >= 1 and to_idx <= 24:
                to_state   = board[to_idx]
                bearing_off = False
            else:  # Bearing off
                to_state   = 0
                bearing_off = True
            # print("_"*20)
            # print("board:", board)
            # print("to_idx:", to_idx, "board[to_idx]:", board[to_idx], "to_state:", to_state)
            # print("+"*20)
            def is_forward_move():
                return direction == player
            def face_value_match_move_length():
                return abs(delta) == face_value
            def bear_in_if_checker_on_bar():
                if player == 1:
                    bar = 0
                else:
                    bar = 25
                bar_state = board[bar]
                if bar_state != 0:
                    return from_idx == bar
                else:
                    return True
            def checkers_at_from_idx():
                return sign(from_state) == player
            def no_block_at_to_idx():
                if -sign(to_state) == player:
                    return abs(to_state) == 1
                else:
                    return True
            def can_bear_off():
                checker_idxs = Board.idxs_with_checkers_of_player(board, player)
                def is_moving_backmost_checker():
                    if player == 1:
                        return all([(idx >= from_idx) for idx in checker_idxs])
                    else:
                        return all([(idx <= from_idx) for idx in checker_idxs])
                def all_checkers_in_last_quadrant():
                    if player == 1:
                        return all([(idx >= 19) for idx in checker_idxs])
                    else:
                        return all([(idx <= 6) for idx in checker_idxs])
                return all([ is_moving_backmost_checker(),
                             all_checkers_in_last_quadrant() ])
            # TODO: add switch here instead of wonky ternary in all        
            # print("is_forward:",is_forward_move())
            # print("face_value:",face_value_match_move_length())
            # print("Checkes_at_from:",checkers_at_from_idx())
            # print("no_block:",no_block_at_to_idx())
            return all([ is_forward_move(),
                         face_value_match_move_length(),
                         bear_in_if_checker_on_bar(),
                         checkers_at_from_idx(),
                         no_block_at_to_idx(),
                         can_bear_off() if bearing_off else True ])
    @staticmethod
    def any_move_valid(board, player, roll):
@ -255,12 +170,27 @@ class Board:
    @staticmethod
-    def apply_moves_to_board(board, player, moves):
+    def apply_moves_to_board(board, player, move):
-        for move in moves:
+        from_idx = move[0]
-            from_idx, to_idx = move.split("/")
+        to_idx = move[1]
-            board[int(from_idx)] -= int(player)
+        board = list(board)
-            board[int(to_idx)] += int(player)
+        board[from_idx] -= player
-        return board
+
        if (to_idx < 1 or to_idx > 24):
            return
        if (board[to_idx] * player == -1):
            if (player == 1):
                board[25] -= player
            else:
                board[0] -= player
            board[to_idx] = 0
        board[to_idx] += player
        return tuple(board)
    @staticmethod
    def calculate_legal_states(board, player, roll):
@ -271,24 +201,9 @@ class Board:
        #       turn and then do something with the second die
        def calc_moves(board, face_value):
-            idxs_with_checkers = Board.idxs_with_checkers_of_player(board, player)
+            if face_value == 0:
            if len(idxs_with_checkers) == 0:
                return [board]
-            boards = [(Board.do_move(board,
+            return quack.calc_moves(board, player, face_value)
                               player,
                               (idx, idx + (face_value * player)))
                       if Board.is_move_valid(board,
                                              player,
                                              face_value,
                                              (idx, idx + (face_value * player)))
                       else None)
                      for idx in idxs_with_checkers]
            # print("pls:",boards)
            board_list = list(filter(None, boards))  # Remove None-values
            # if len(board_list) == 0:
            #     return [board]
            # print("board list:", board_list)
            return board_list
        # Problem with cal_moves: Method can return empty list (should always contain at least same board).
        #               *Update*: Seems to be fixed.
@ -302,12 +217,16 @@ class Board:
        if not Board.any_move_valid(board, player, roll):
            return { board }
        dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4]
        #print("Permuts:",dice_permutations)
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
            #print("initial board: ", board)
            #print("roll:", roll)
            #print("Rest of roll:",roll[1:])
            boards = calc_moves(board, roll[0])
            #print("Boards:",boards)
            #print("Roll:",roll[0])
            #print("boards after first die: ", boards)
            for die in roll[1:]:
@ -347,9 +266,9 @@ class Board:
        return """
  13  14  15  16  17  18               19  20  21  22  23  24
 +--------------------------------------------------------------------------+
-| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO|
+| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
 |---|---|---|---|---|---|------------|---|---|---|---|---|---|             |
-| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end  1: TODO|
+| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
 +--------------------------------------------------------------------------+
  12  11  10   9   8   7                6   5   4   3   2   1 
 """.format(*temp)
@ -357,42 +276,8 @@ class Board:
    @staticmethod
    def do_move(board, player, move):
        # Implies that move is valid; make sure to check move validity before calling do_move(...)
        return quack.do_move(board, player, move)
        def move_to_bar(board, to_idx):
            board = list(board)
            if player == 1:
                board[25] -= player
            else:
                board[0] -= player
            board[to_idx] = 0
            return board
        # TODO: Moving in from bar is handled by the representation
        # TODONE: Handle bearing off
        from_idx = move[0]
        #print("from_idx: ", from_idx)
        to_idx = move[1]
        #print("to_idx: ", to_idx)
        # pdb.set_trace()
        board = list(board) # Make mutable copy of board
        # 'Lift' checker
        board[from_idx] -= player
        # Handle bearing off
        if to_idx < 1 or to_idx > 24:
            return tuple(board)
        # Handle hitting checkers
        if board[to_idx] * player == -1:
            board = move_to_bar(board, to_idx)
        # Put down checker
        board[to_idx] += player
        return tuple(board)
    @staticmethod
    def flip(board):
--- a/bot.py
+++ b/bot.py
@ -1,24 +1,8 @@
 from cup import Cup
 from network import Network
 from board import Board
 import tensorflow as tf
 import numpy as np
 import random
 class Bot:
-    def __init__(self, sym, config = None, name = "unnamed"):
+    def __init__(self, sym):
        self.config = config
        self.cup = Cup()
        self.sym = sym
        self.graph = tf.Graph()
        self.network = Network(config, name)
        self.network.restore_model()
    def restore_model(self):
        with self.graph.as_default():
            self.network.restore_model()
    def get_session(self):
        return self.session
@ -26,16 +10,60 @@ class Bot:
    def get_sym(self):
        return self.sym
    def get_network(self):
        return self.network
-    # TODO: DEPRECATE
+    def calc_move_sets(self, from_board, roll, player):
-    def make_move(self, board, sym, roll):
+        board = from_board
-        # print(Board.pretty(board))
+        sets = []
-        legal_moves = Board.calculate_legal_states(board, sym, roll)
+        total = 0
-        moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
+        print("board!:",board)
-        scores = [ x[1] for x in moves_and_scores ]
+        for r in roll:
-        best_move_pair = moves_and_scores[np.array(scores).argmax()]
+            # print("Value of r:",r)
-        #print("Found the best state, being:", np.array(move_scores).argmax())
+            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
-        return best_move_pair
+            total += r
        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
        return sets
    def handle_move(self, from_board, to_board, roll, player):
        # print("Cur board:",board)
        sets = self.calc_move_sets(from_board, roll, player)
        for idx, board_set in enumerate(sets):
            board_set[0] = list(board_set[0])
            # print("My board_set:",board_set)
            if to_board in [list(c) for c in board_set[0]]:
                self.total_moves -= board_set[1]
                if idx < 2:
                    # print("Roll object:",self.roll)
                    self.roll[idx] = 0
                else:
                    self.roll = [0,0]
                break
        print("Total moves left:",self.total_moves)
    def tmp_name(self, from_board, to_board, roll, player, total_moves):
        sets = self.calc_move_sets(from_board, roll, player)
        return_board = from_board
        for idx, board_set in enumerate(sets):
            board_set = list(board_set[0])
            if to_board in [list(board) for board in board_set]:
                total_moves -= board_set[1]
                # if it's not the sum of the moves
                if idx < 2:
                    roll[idx] = 0
                else:
                    roll = [0,0]
                return_board = to_board
                break
        return total_moves, roll, return_board
    def make_human_move(self, board, player, roll):
        total_moves = roll[0] + roll[1]
        previous_board = board
        while total_moves != 0:
            move = input("Pick a move!\n")
            to_board = Board.apply_moves_to_board(previous_board, player, move)
            total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves)
--- a/main.py
+++ b/main.py
@ -31,19 +31,17 @@ parser.add_argument('--train-perpetually', action='store_true',
                    help='start new training session as soon as the previous is finished')
 parser.add_argument('--list-models', action='store_true',
                    help='list all known models')
 parser.add_argument('--force-creation', action='store_true',
                    help='force model creation if model does not exist')
 parser.add_argument('--board-rep', action='store', dest='board_rep',
                    default='tesauro',
                    help='name of board representation to use as input to neural network')
-parser.add_argument('--use-baseline', action='store_true',
+parser.add_argument('--verbose', action='store_true',
-                    help='use the baseline model, note, has size 28')
+                    help='If set, a lot of stuff will be printed')
 parser.add_argument('--ply', action='store', dest='ply', default='0',
                    help='defines the amount of ply used when deciding what move to make')
 parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1',
                    help='the amount of times the evaluation method should be repeated')
 args = parser.parse_args()
 if args.model == "baseline_model":
    print("Model name 'baseline_model' not allowed")
    exit()
 config = {
    'model': args.model,
@ -59,10 +57,13 @@ config = {
    'model_storage_path': 'models',
    'bench_storage_path': 'bench',
    'board_representation': args.board_rep,
-    'force_creation': args.force_creation,
+    'global_step': 0,
-    'use_baseline': args.use_baseline
+    'verbose': args.verbose,
    'ply': args.ply,
    'repeat_eval': args.repeat_eval
 }
 # Create models folder
 if not os.path.exists(config['model_storage_path']):
    os.makedirs(config['model_storage_path'])
@ -77,6 +78,14 @@ if not os.path.isdir(log_path):
    os.mkdir(log_path)
 def save_config():
    import yaml
    # checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
    # config_path = os.path.join(checkpoint_path, 'config')
    # with open(config_path, 'a+') as f:
    #    print("lol")
    print(yaml.dump(config))
 # Define helper functions
 def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
    format_vars = { 'trained_eps': trained_eps,
@ -125,6 +134,24 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
        with open(log_path, 'a+') as f:
            f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
 def find_board_rep():
    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
    board_rep_path = os.path.join(checkpoint_path, "board_representation")
    with open(board_rep_path, 'r') as f:
        return f.read()
 def board_rep_file_exists():
    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
    board_rep_path = os.path.join(checkpoint_path, "board_representation")
    return os.path.isfile(board_rep_path)
 def create_board_rep():
    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
    board_rep_path = os.path.join(checkpoint_path, "board_representation")
    with open(board_rep_path, 'a+') as f:
        f.write(config['board_representation'])
 # Do actions specified by command-line
 if args.list_models:
    def get_eps_trained(folder):
@ -145,9 +172,26 @@ if __name__ == "__main__":
    # Set up network
    from network import Network
    save_config()
    # Set up variables
    episode_count = config['episode_count']
    if config['board_representation'] is None:
        if board_rep_file_exists():
            config['board_representation'] = find_board_rep()
        else:
            sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n")
            exit()
    else:
        if not board_rep_file_exists():
            create_board_rep()
        else:
            if config['board_representation'] != find_board_rep():
                sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n".
                                 format(given = config['board_representation'], board_rep = find_board_rep()))
                exit()
    if args.train:
        network = Network(config, config['model'])
        start_episode = network.episodes_trained
@ -161,15 +205,19 @@ if __name__ == "__main__":
            if not config['train_perpetually']:
                break
    elif args.play:
        network = Network(config, config['model'])
        network.play_against_network()
    elif args.eval:
        network = Network(config, config['model'])
-        start_episode = network.episodes_trained
+        for i in range(int(config['repeat_eval'])):
-        # Evaluation measures are described in `config`
+            start_episode = network.episodes_trained
-        outcomes = network.eval(config['episode_count'])
+            # Evaluation measures are described in `config`
-        log_eval_outcomes(outcomes, trained_eps = start_episode)
+            outcomes = network.eval(config['episode_count'])
-        # elif args.play:
+            log_eval_outcomes(outcomes, trained_eps = start_episode)
-        # g.play(episodes = episode_count)
+            # elif args.play:
            # g.play(episodes = episode_count)
    elif args.bench_eval_scores:
@ -191,7 +239,7 @@ if __name__ == "__main__":
        episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
                          10000, 20000]
-        def do_eval(sess):
+        def do_eval():
            for eval_method in config['eval_methods']:
                result_path = os.path.join(config['bench_storage_path'],
                                           eval_method) + "-{}.log".format(int(time.time()))
@ -199,8 +247,7 @@ if __name__ == "__main__":
                    for i in range(sample_count):
                        start_time = time.time()
                        # Evaluation measure to be benchmarked are described in `config`
-                        outcomes = network.eval(episode_count = n,
+                        outcomes = network.eval(episode_count = n)
                                                tf_session = sess)
                        time_diff = time.time() - start_time
                        log_bench_eval_outcomes(outcomes,
                                                time = time_diff,
@ -210,8 +257,8 @@ if __name__ == "__main__":
        # CMM: oh no
        import tensorflow as tf
-        with tf.Session() as session:
+
-            network.restore_model(session)
+        network.restore_model()
-            do_eval(session)
+        do_eval()
--- a/network.py
+++ b/network.py
@ -8,6 +8,8 @@ import random
 from eval import Eval
 import glob
 from operator import itemgetter
 import tensorflow.contrib.eager as tfe
 from player import Player
 class Network:
    # board_features_quack has size 28
@ -15,21 +17,41 @@ class Network:
    # board_features_tesauro has size 198
    board_reps = {
-        'quack-fat' : (30, Board.board_features_quack_fat),
+        'quack-fat'   : (30, Board.board_features_quack_fat),
-        'quack'     : (28, Board.board_features_quack),
+        'quack'       : (28, Board.board_features_quack),
-        'tesauro'   : (198, Board.board_features_tesauro),
+        'tesauro'     : (198, Board.board_features_tesauro),
-        'quack-norm': (30, Board.board_features_quack_norm)
+        'quack-norm'  : (30, Board.board_features_quack_norm),
        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
    }
    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
    def __init__(self, config, name):
        """
        :param config:
        :param name:
        """
        move_options = {
            '1': self.make_move_1_ply,
            '0': self.make_move_0_ply
        }
        tf.enable_eager_execution()
        xavier_init = tf.contrib.layers.xavier_initializer()
        self.config = config
        self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
        self.name = name
        self.make_move = move_options[
            self.config['ply']
        ]
        # Set board representation from config
        self.input_size, self.board_trans_func = Network.board_reps[
            self.config['board_representation']
@ -39,16 +61,6 @@ class Network:
        self.max_learning_rate = 0.1
        self.min_learning_rate = 0.001
        self.global_step = tf.Variable(0, trainable=False, name="global_step")
        self.learning_rate = tf.maximum(self.min_learning_rate,
                                        tf.train.exponential_decay(self.max_learning_rate,
                                                                   self.global_step, 50000,
                                                                   0.96,
                                                                   staircase=True),
                                        name="learning_rate")
        # Restore trained episode count for model
        episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
        if os.path.isfile(episode_count_path):
@ -57,62 +69,107 @@ class Network:
        else:
            self.episodes_trained = 0
-        self.x = tf.placeholder('float', [1, self.input_size], name='input')
+        global_step_path = os.path.join(self.checkpoint_path, "global_step")
-        self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
+        if os.path.isfile(global_step_path):
-
+            with open(global_step_path, 'r') as f:
-        xavier_init = tf.contrib.layers.xavier_initializer()
+                self.global_step = int(f.read())
-
+        else:
-        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
+            self.global_step = 0
                              initializer=xavier_init)
        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
                              initializer=xavier_init)
        b_1 = tf.get_variable("b_1", (self.hidden_size,),
                              initializer=tf.zeros_initializer)
        b_2 = tf.get_variable("b_2", (self.output_size,),
                              initializer=tf.zeros_initializer)
-        value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
+        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init,
                                  input_shape=(1,self.input_size)),
            tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init)
        ])
        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
-        # TODO: Alexander thinks that self.value will be computed twice (instead of once)
+    def exp_decay(self, max_lr, global_step, decay_rate, decay_steps):
-        difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
+        """
        Calculates the exponential decay on a learning rate
        :param max_lr: The learning rate that the network starts at
        :param global_step: The global step
        :param decay_rate: The rate at which the learning rate should decay
        :param decay_steps: The amount of steps between each decay
        :return: The result of the exponential decay performed on the learning rate
        """
        res = max_lr * decay_rate**(global_step // decay_steps)
        return res
    def do_backprop(self, prev_state, value_next):
        """
        Performs the Temporal-difference backpropagation step on the model
        :param prev_state: The previous state of the game, this has its value recalculated
        :param value_next: The value of the current move
        :return: Nothing, the calculation is performed on the model of the network
        """
        self.learning_rate = tf.maximum(self.min_learning_rate,
                                         self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
                                         name="learning_rate")
        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
        grads = tape.gradient(value, self.model.variables)
        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
        trainable_vars = tf.trainable_variables()
        gradients = tf.gradients(self.value, trainable_vars)
        apply_gradients = []
        global_step_op = self.global_step.assign_add(1)
        with tf.variable_scope('apply_gradients'):
-            for gradient, trainable_var in zip(gradients, trainable_vars):
+            for grad, train_var in zip(grads, self.model.variables):
-                backprop_calc = self.learning_rate * difference_in_values * gradient
+                backprop_calc = self.learning_rate * difference_in_values * grad
-                grad_apply = trainable_var.assign_add(backprop_calc)
+                train_var.assign_add(backprop_calc)
                apply_gradients.append(grad_apply)
        with tf.control_dependencies([global_step_op]):
-            self.training_op = tf.group(*apply_gradients, name='training_op')
+    def print_variables(self):
        """
        Prints all the variables of the model
        :return:
        """
        variables = self.model.variables
        for k in variables:
            print(k)
-        self.saver = tf.train.Saver(max_to_keep=1)
+    def eval_state(self, state):
        """
        Evaluates a single state
        :param state:
        :return:
        """
        return self.model(state.reshape(1,-1))
-    def eval_state(self, sess, state):
+    def save_model(self, episode_count):
-        return sess.run(self.value, feed_dict={self.x: state})
+        """
-
+        Saves the model of the network, it references global_step as self.global_step
-    def save_model(self, sess, episode_count, global_step):
+        :param episode_count:
-        self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
+        :return:
        """
        tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))
        #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
        with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
            f.write(str(episode_count) + "\n")
-    def restore_model(self, sess):
+        with open(os.path.join(self.checkpoint_path, "global_step"), 'w+') as f:
            print("[NETWK] ({name}) Saving global step to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
            f.write(str(self.global_step) + "\n")
        if self.config['verbose']:
            self.print_variables()
    def calc_vals(self, states):
        """
        Calculate a score of each state in states
        :param states: A number of states. The states have to be transformed before being given to this function.
        :return:
        """
        values = self.model.predict_on_batch(states)
        return values
    def restore_model(self):
        """
        Restore a model for a session, such that a trained model and either be further trained or
        used for evaluation
@ -121,47 +178,38 @@ class Network:
        :return: Nothing. It's a side-effect that a model gets restored for the network.
        """
        if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')):
            latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
            print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
                  str(latest_checkpoint))
-            self.saver.restore(sess, latest_checkpoint)
+            tfe.Saver(self.model.variables).restore(latest_checkpoint)
-            variables_names = [v.name for v in tf.trainable_variables()]
+
-            values = sess.run(variables_names)
+            # variables_names = [v.name for v in self.model.variables]
-            for k, v in zip(variables_names, values):
+
                print("Variable: ", k)
                print("Shape: ", v.shape)
                print(v)
            # Restore trained episode count for model
            episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
            if os.path.isfile(episode_count_path):
                with open(episode_count_path, 'r') as f:
                    self.config['start_episode'] = int(f.read())
        elif self.config['use_baseline'] and glob.glob(os.path.join(os.path.join(self.config['model_storage_path'], "baseline_model"), 'model.ckpt*.index')):
            checkpoint_path = os.path.join(self.config['model_storage_path'], "baseline_model")
            latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
            print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
                  str(latest_checkpoint))
            self.saver.restore(sess, latest_checkpoint)
-            variables_names = [v.name for v in tf.trainable_variables()]
+            global_step_path = os.path.join(self.checkpoint_path, "global_step")
-            values = sess.run(variables_names)
+            if os.path.isfile(global_step_path):
-            for k, v in zip(variables_names, values):
+                with open(global_step_path, 'r') as f:
-                print("Variable: ", k)
+                    self.config['global_step'] = int(f.read())
-                print("Shape: ", v.shape)
+
-                print(v)
+            if self.config['verbose']:
-        elif not self.config['force_creation']:
+                self.print_variables()
            print("You need to have baseline_model inside models")
            exit()
-    def make_move(self, sess, board, roll, player):
+
    def make_move_0_ply(self, board, roll, player):
        """
        Find the best move given a board, roll and a player, by finding all possible states one can go to
-        and then picking the best, by using the network to evaluate each state. The highest score is picked
+        and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
-        for the 1-player and the max(1-score) is picked for the -1-player.
+        The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.
        :param sess:
        :param board: Current board
@ -169,23 +217,37 @@ class Network:
        :param player: Current player
        :return: A pair of the best state to go to, together with the score of that state
        """
-        legal_moves = Board.calculate_legal_states(board, player, roll)
+        legal_moves = list(Board.calculate_legal_states(board, player, roll))
-        moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
+        legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])
        scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
        best_score_index = np.array(scores).argmax()
        best_move_pair = moves_and_scores[best_score_index]
        return best_move_pair
-    def make_move_n_ply(self, sess, board, roll, player, n = 1):
+        scores = self.model.predict_on_batch(legal_states)
-        best_pair = self.calc_n_ply(n, sess, board, player, roll)
+        transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores]
        best_score_idx = np.argmax(np.array(transformed_scores))
        best_move = legal_moves[best_score_idx]
        best_score = scores[best_score_idx]
        return [best_move, best_score]
    def make_move_1_ply(self, board, roll, player):
        """
        Return the best board and best score based on a 1-ply look-ahead.
        :param board:
        :param roll:
        :param player:
        :return:
        """
        # start = time.time()
        best_pair = self.calculate_1_ply(board, roll, player)
        # print(time.time() - start)
        return best_pair
-    def calculate_1_ply(self, sess, board, roll, player):
+    def calculate_1_ply(self, board, roll, player):
        """
-        Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
+        Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
-        exhaustive search is performed on the best 15 moves from the single ply.
+        all moves and scores are found for them. The expected score is then calculated for each of the boards from the
-
+        0-ply.
        :param sess:
        :param board:
        :param roll: The original roll
@ -197,23 +259,91 @@ class Network:
        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
-        # find all values for the above boards
+        legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])
        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
-        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
+        scores = self.calc_vals(legal_states)
-        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
+        scores = [score.numpy() for score in scores]
-        best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
+        moves_and_scores = list(zip(init_legal_states, scores))
-        all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
+        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1)
        best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
        best_score_index = np.array(all_rolls_scores).argmax()
        best_board = best_fifteen_boards[best_score_index]
-        return [best_board, max(all_rolls_scores)]
+        scores, trans_scores = self.do_ply(best_boards, player)
        best_score_idx = np.array(trans_scores).argmax()
        return [best_boards[best_score_idx], scores[best_score_idx]]
    def do_ply(self, boards, player):
        """
        Calculates a single extra ply, resulting in a larger search space for our best move.
        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
        :param sess:
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
        to this function.
        """
        all_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
                      (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
                      (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
                      (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
                      (6, 6) ]
        # start = time.time()
        # print("/"*50)
        length_list = []
        test_list = []
        # Prepping of data
        start= time.time()
        for board in boards:
            length = 0
            for roll in all_rolls:
                all_states = list(Board.calculate_legal_states(board, player*-1, roll))
                for state in all_states:
                    state = np.array(self.board_trans_func(state, player*-1)[0])
                    test_list.append(state)
                    length += 1
            length_list.append(length)
        # print(time.time() - start)
        start = time.time()
        all_scores_legit = self.model.predict_on_batch(np.array(test_list))
        split_scores = []
        from_idx = 0
        for length in length_list:
            split_scores.append(all_scores_legit[from_idx:from_idx+length])
            from_idx += length
        means_splits = [tf.reduce_mean(scores) for scores in split_scores]
        transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits]
        # print(time.time() - start)
        return ([means_splits, transformed_means_splits])
    def calc_n_ply(self, n_init, sess, board, player, roll):
        """
        :param n_init:
        :param sess:
        :param board:
        :param player:
        :param roll:
        :return:
        """
        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
@ -233,6 +363,13 @@ class Network:
    def n_ply(self, n_init, sess, boards_init, player_init):
        """
        :param n_init:
        :param sess:
        :param boards_init:
        :param player_init:
        :return:
        """
        def ply(n, boards, player):
            def calculate_possible_states(board):
                possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
@ -324,69 +461,8 @@ class Network:
        best_score_pair = boards_with_scores[np.array(scores).argmax()]
        return best_score_pair
    def do_ply(self, sess, boards, player):
        """
        Calculates a single extra ply, resulting in a larger search space for our best move.
        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.
-        :param sess:
+    def eval(self, episode_count, trained_eps = 0):
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
        to this function.
        """
        def gen_21_rolls():
            """
            Calculate all possible rolls, [[1,1], [1,2] ..]
            :return: All possible rolls
            """
            a = []
            for x in range(1, 7):
                for y in range(1, 7):
                    if not [x, y] in a and not [y, x] in a:
                        a.append([x, y])
            return a
        all_rolls = gen_21_rolls()
        all_rolls_scores = []
        count = 0
        # loop over boards
        for a_board in boards:
            a_board_scores = []
            # loop over all rolls, for each board
            for roll in all_rolls:
                # find all states we can get to, given the board and roll and the opposite player
                all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
                count += len(all_rolls_boards)
                # find scores for each board found above
                spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
                                    for new_board in all_rolls_boards]
                # if the original player is the -1 player, then we need to find (1-value)
                spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
                # find the best score
                best_score = max(spec_roll_scores)
                # append the best score to a_board_scores, where we keep track of the best score for each board
                a_board_scores.append(best_score)
            # save the expected average of board scores
            all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
        # return all the average scores
        print(count)
        return all_rolls_scores
    def eval(self, episode_count, trained_eps = 0, tf_session = None):
        """
        Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
        a model which has been given random weights, so it acts deterministically random.
@ -397,7 +473,7 @@ class Network:
        :return: outcomes:    The outcomes of the evaluation session
        """
-        def do_eval(sess, method, episodes = 1000, trained_eps = 0):
+        def do_eval(method, episodes = 1000, trained_eps = 0):
            """
            Do the actual evaluation
@ -434,7 +510,7 @@ class Network:
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-                        board = (self.make_move(sess, board, roll, 1))[0]
+                        board = (self.make_move(board, roll, 1))[0]
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
@ -457,7 +533,7 @@ class Network:
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-                        board = (self.make_move(sess, board, roll, 1))[0]
+                        board = (self.make_move(board, roll, 1))[0]
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
@ -476,112 +552,122 @@ class Network:
                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                return [0]
-        if tf_session == None:
+
-            with tf.Session() as session:
+        outcomes = [ (method, do_eval(method,
-                session.run(tf.global_variables_initializer())
+                                      episode_count,
-                self.restore_model(session)
+                                      trained_eps = trained_eps))
-                outcomes = [ (method, do_eval(session,
+                     for method
-                                              method,
+                     in self.config['eval_methods'] ]
-                                              episode_count,
+        return outcomes
-                                              trained_eps = trained_eps))
+
-                             for method
+
-                             in self.config['eval_methods'] ]
+    def play_against_network(self):
-                return outcomes
+        """
-        else:
+        Allows you to play against a supplied model.
-            outcomes = [ (method, do_eval(tf_session,
+        :return:
-                                          method,
+        """
-                                          episode_count,
+        self.restore_model()
-                                          trained_eps = trained_eps))
+        human_player = Player(-1)
-                         for method
+        cur_player = 1
-                         in self.config['eval_methods'] ]
+        player = 1
-            return outcomes
+        board = Board.initial_state
        i = 0
        while Board.outcome(board) is None:
            print(Board.pretty(board))
            roll = (random.randrange(1, 7), random.randrange(1, 7))
            print("Bot rolled:", roll)
            board, _ = self.make_move(board, roll, player)
            print(Board.pretty(board))
            roll = (random.randrange(1, 7), random.randrange(1, 7))
            print("You rolled:", roll)
            board = human_player.make_human_move(board, roll)
        print("DONE "*10)
        print(Board.pretty(board))
    def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
-        with tf.Session() as sess:
+        """
-            difference_in_vals = 0
+        Train a model to by self-learning.
-            writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
+        :param episodes:
        :param save_step_size:
        :param trained_eps:
        :return:
        """
-            sess.run(tf.global_variables_initializer())
+        difference_in_vals = 0
            self.restore_model(sess)
-            variables_names = [v.name for v in tf.trainable_variables()]
+        self.restore_model()
            values = sess.run(variables_names)
            for k, v in zip(variables_names, values):
                print("Variable: ", k)
                print("Shape: ", v.shape)
                print(v)
-            start_time = time.time()
+        start_time = time.time()
-            def print_time_estimate(eps_completed):
+        def print_time_estimate(eps_completed):
-                cur_time = time.time()
+            cur_time = time.time()
-                time_diff = cur_time - start_time
+            time_diff = cur_time - start_time
-                eps_per_sec = eps_completed / time_diff
+            eps_per_sec = eps_completed / time_diff
-                secs_per_ep = time_diff / eps_completed
+            secs_per_ep = time_diff / eps_completed
-                eps_remaining = (episodes - eps_completed)
+            eps_remaining = (episodes - eps_completed)
-                sys.stderr.write(
+            sys.stderr.write(
-                    "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
+                "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
-                sys.stderr.write(
+            sys.stderr.write(
-                    "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
+                "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
-                        eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
+                    eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
-            sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
+        sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
-            outcomes = []
+        outcomes = []
-            for episode in range(1, episodes + 1):
+        for episode in range(1, episodes + 1):
-                sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
+            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
-                # TODO decide which player should be here
+            # TODO decide which player should be here
-                player = 1
+            player = 1
-                prev_board = Board.initial_state
+            prev_board = Board.initial_state
-                i = 0
+            i = 0
-                while Board.outcome(prev_board) is None:
+            while Board.outcome(prev_board) is None:
-                    i += 1
+                i += 1
-
+                self.global_step += 1
                    cur_board, cur_board_value = self.make_move(sess,
                                                                prev_board,
                                                                (random.randrange(1, 7), random.randrange(1, 7)),
                                                                player)
                    difference_in_vals += abs((cur_board_value - self.eval_state(sess, self.board_trans_func(prev_board, player))))
-                    # adjust weights
+                cur_board, cur_board_value = self.make_move(prev_board,
-                    sess.run(self.training_op,
+                                                            (random.randrange(1, 7), random.randrange(1, 7)),
-                             feed_dict={self.x: self.board_trans_func(prev_board, player),
+                                                            player)
                                        self.value_next: cur_board_value})
                difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
                if self.config['verbose']:
                    print("Difference in values:", difference_in_vals)
                    print("Current board value :", cur_board_value)
                    print("Current board is    :\n",cur_board)
                # adjust weights
                if Board.outcome(cur_board) is None:
                    self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
                    player *= -1
-                    prev_board = cur_board
+                prev_board = cur_board
-                final_board = prev_board
+            final_board = prev_board
-                sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
+            sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
-                outcomes.append(Board.outcome(final_board)[1])
+            outcomes.append(Board.outcome(final_board)[1])
-                final_score = np.array([Board.outcome(final_board)[1]])
+            final_score = np.array([Board.outcome(final_board)[1]])
-                scaled_final_score = ((final_score + 2) / 4)
+            scaled_final_score = ((final_score + 2) / 4)
-                with tf.name_scope("final"):
+            self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
                    merged = tf.summary.merge_all()
                    global_step, summary, _ = sess.run([self.global_step, merged, self.training_op],
                                          feed_dict={self.x: self.board_trans_func(prev_board, player),
                                                     self.value_next: scaled_final_score.reshape((1, 1))})
                    writer.add_summary(summary, episode + trained_eps)
-                sys.stderr.write("\n")
+            sys.stderr.write("\n")
-                if episode % min(save_step_size, episodes) == 0:
+            if episode % min(save_step_size, episodes) == 0:
-                    sys.stderr.write("[TRAIN] Saving model...\n")
+                sys.stderr.write("[TRAIN] Saving model...\n")
-                    self.save_model(sess, episode + trained_eps, global_step)
+                self.save_model(episode + trained_eps)
-                if episode % 50 == 0:
+            if episode % 50 == 0:
-                    print_time_estimate(episode)
+                print_time_estimate(episode)
-            sys.stderr.write("[TRAIN] Saving model for final episode...\n")
+        sys.stderr.write("[TRAIN] Saving model for final episode...\n")
-            self.save_model(sess, episode+trained_eps, global_step)
+        self.save_model(episode+trained_eps)
-            writer.close()
+        return outcomes, difference_in_vals[0][0]
            return outcomes, difference_in_vals[0][0]
--- a/network_test.py
+++ b/network_test.py
@ -9,14 +9,12 @@ from board import Board
 import main
 config = main.config.copy()
-config['model'] = "tesauro_blah"
+config['model'] = "player_testings"
-config['force_creation'] = True
+config['ply'] = "1"
 config['board_representation'] = 'quack-fat'
 network = Network(config, config['model'])
-session = tf.Session()
+network.restore_model()
 session.run(tf.global_variables_initializer())
 network.restore_model(session)
 initial_state = Board.initial_state
 initial_state_1 = ( 0,
@ -38,65 +36,25 @@ boards = {initial_state,
          initial_state_2 }
 def gen_21_rolls():
    """
    Calculate all possible rolls, [[1,1], [1,2] ..]
    :return: All possible rolls
    """
    a = []
    for x in range(1, 7):
        for y in range(1, 7):
            if not [x, y] in a and not [y, x] in a:
                a.append([x, y])
    return a
 def calc_all_scores(board, player):
    scores = []
    trans_board = network.board_trans_func(board, player)
    rolls = gen_21_rolls()
    for roll in rolls:
        score = network.eval_state(session, trans_board)
        scores.append(score)
    return scores
 def calculate_possible_states(board):
    possible_rolls = [(1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
                      (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
                      (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
                      (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
                      (6, 6)]
    for roll in possible_rolls:
        meh = Board.calculate_legal_states(board, -1, roll)
        print(len(meh))
    return [Board.calculate_legal_states(board, -1, roll)
            for roll
            in possible_rolls]
 #for board in boards:
 #    calculate_possible_states(board)
-#print("-"*30)
+# board = network.board_trans_func(Board.initial_state, 1)
 #print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1))
 #print(" "*10 + "network_test")
 print(" "*20 + "Depth 1")
 print(network.calc_n_ply(2, session, Board.initial_state, 1, [2, 4]))
-#print(scores)
+# pair = network.make_move(Board.initial_state, [3,2], 1)
-#print(" "*20 + "Depth 2")
+# print(pair[1])
 #print(network.n_ply(2, session, boards, 1))
-# #print(x.shape)
+# network.do_backprop(board, 0.9)
 # with graph_lol.as_default():
 #     session_2 = tf.Session(graph = graph_lol)
 #     network_2 = Network(session_2)
 #     network_2.restore_model()
 #     print(network_2.eval_state(initial_state))
-# print(network.eval_state(initial_state))
+
 # network.print_variables()
 # network.save_model(2)
 # print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
 network.play_against_network()
--- a/player.py
+++ b/player.py
@ -11,19 +11,55 @@ class Player:
    def get_sym(self):
        return self.sym
-    def make_move(self, board, sym, roll):
+    def calc_move_sets(self, from_board, roll, player):
-        print(Board.pretty(board))
+        board = from_board
-        legal_moves = Board.calculate_legal_states(board, sym, roll)
+        sets = []
-        if roll[0] == roll[1]:
+        total = 0
-            print("Example of move: 4/6,6/8,12/14,13/15")
+        for r in roll:
-        else:
+            # print("Value of r:",r)
-            print("Example of move: 4/6,13/17")
+            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
            total += r
        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
        return sets
        user_moves = input("Enter your move: ").strip().split(",")
        board = Board.apply_moves_to_board(board, sym, user_moves)
        while board not in legal_moves:
            print("Move is invalid, please enter a new move")
            user_moves = input("Enter your move: ").strip().split(",")
            board = Board.apply_moves_to_board(board, sym, user_moves)
    def tmp_name(self, from_board, to_board, roll, player, total_moves):
        sets = self.calc_move_sets(from_board, roll, player)
        return_board = from_board
        for idx, board_set in enumerate(sets):
            board_set[0] = list(board_set[0])
            print(to_board)
            print(board_set)
            if to_board in board_set[0]:
                total_moves -= board_set[1]
                # if it's not the sum of the moves
                if idx < 2:
                    roll[idx] = 0
                else:
                    roll = [0,0]
                return_board = to_board
                break
        return total_moves, roll, return_board
    def make_human_move(self, board, roll):
        total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
        move = ""
        while total_moves != 0:
            while True:
                print("You have {roll} left!".format(roll=total_moves))
                move = input("Pick a move!\n")
                pot_move = move.split("/")
                if len(pot_move) == 2:
                    try:
                        pot_move[0] = int(pot_move[0])
                        pot_move[1] = int(pot_move[1])
                        move = pot_move
                        break;
                    except TypeError:
                        print("The correct syntax is: 2/5 for a move from index 2 to 5.")
            to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
            print(Board.pretty(board))
        return board
--- a/quack/quack.c
+++ b/quack/quack.c
@ -0,0 +1,484 @@
 #include <Python.h>
 static PyObject* QuackError;
 typedef struct board_list board_list;
 struct board_list {
  int size;
  PyObject* list[16];
 };
 /* Utility functions */
 int sign(int x) {
    return (x > 0) - (x < 0);
 }
 int abs(int x) {
  if (x >= 0) return x;
  else        return -x;
 }
 /* end utility functions */
 /* Helper functions */
 int *idxs_with_checkers_of_player(int board[], int player) {
  int idxs_tmp[26];
  int ctr = 0;
  for (int i = 0; i < 26; i++) {
    if (board[i] * player >= 1) {
      idxs_tmp[ctr] = i;
      ctr++;
    }
  }
  int *idxs = malloc((1 + ctr) * sizeof(int));
  if (idxs == NULL) {
    PyErr_NoMemory();
    abort();
  }
  idxs[0] = ctr;
  for (int i = 0; i < ctr; i++) {
    idxs[i+1] = idxs_tmp[i];
  }
  return idxs;
 }
 int is_forward_move(int direction, int player) {
  return direction == player;
 }
 int face_value_match_move_length(int delta, int face_value) {
  return abs(delta) == face_value;
 }
 int bear_in_if_checker_on_bar(int board[], int player, int from_idx) {
  int bar;
  if (player == 1) bar = 0;
  else             bar = 25;
  if (board[bar] != 0) return from_idx == bar;
  else                 return 1;
 }
 int checkers_at_from_idx(int from_state, int player) {
  return sign(from_state) == player;
 }
 int no_block_at_to_idx(int to_state, int player) {
  if (-sign(to_state) == player) return abs(to_state) == 1;
  else                           return 1;
 }
 int can_bear_off(int board[], int player, int from_idx, int to_idx) {
  int* checker_idxs = idxs_with_checkers_of_player(board, player);
  int moving_backmost_checker = 1;
  int bearing_directly_off = 0;
  int all_checkers_in_last_quadrant = 1;
  /* Check if bearing directly off */
  if      (player ==  1 && to_idx == 25) bearing_directly_off = 1;
  else if (player == -1 && to_idx == 0)  bearing_directly_off = 1;
  for (int i = 1; i <= checker_idxs[0]; i++) {
    if (player == 1 ) {
      /* Check if all checkers are in last quardrant */
      if (checker_idxs[i] < 19) {
 	all_checkers_in_last_quadrant = 0;
 	break;
      }
      /* Check if moving backmost checker */
      if (checker_idxs[i] < from_idx) {
 	moving_backmost_checker = 0;
 	if (!bearing_directly_off) break;
      }
    } else {
      if (checker_idxs[i] > 6) {
 	all_checkers_in_last_quadrant = 0;
 	break;
      }
      if (checker_idxs[i] > from_idx) {
 	moving_backmost_checker = 0;
 	if (!bearing_directly_off) break;
      }
    }
  }
  free(checker_idxs);
  if (all_checkers_in_last_quadrant &&
      (bearing_directly_off || moving_backmost_checker))  return 1;
  else                                                    return 0;
 }
 /* end helper functions */
 int is_move_valid(int board[], int player, int face_value, int move[]) {
  int from_idx = move[0];
  int to_idx = move[1];
  int to_state;
  int from_state = board[from_idx];
  int delta = to_idx - from_idx;
  int direction = sign(delta);
  int bearing_off;
  if (to_idx >= 1 && to_idx <= 24) {
    to_state = board[to_idx];
    bearing_off = 0;
  } else {
    to_state = 0;
    bearing_off = 1;
  }
  return is_forward_move(direction, player)
    && face_value_match_move_length(delta, face_value)
    && bear_in_if_checker_on_bar(board, player, from_idx)
    && checkers_at_from_idx(from_state, player)
    && no_block_at_to_idx(to_state, player)
    && (!bearing_off || can_bear_off(board, player, from_idx, to_idx))
    ;
 }
 void do_move(int board[], int player, int move[]) {
  int from_idx = move[0];
  int to_idx   = move[1];
  /* "lift" checker */
  board[from_idx] -= player;
  /* Return early if bearing off */
  if (to_idx < 1 || to_idx > 24) return;
  /* Hit opponent checker */
  if (board[to_idx] * player == -1) {
    /* Move checker to bar */
    if (player == 1) board[25] -= player;
    else             board[0]  -= player;
    board[to_idx] = 0;
  }
  /* Put down checker */
  board[to_idx] += player;
  return;
 }
 int* do_move_clone(int board[], int player, int move[]) {
  int* new_board = malloc(sizeof(int) * 26);
  if (new_board == NULL) {
    PyErr_NoMemory();
    abort();
  }
  for (int i = 0; i < 26; i++) {
    new_board[i] = board[i];
  }
  do_move(new_board, player, move);
  return new_board;
 }
 PyObject* store_board_to_pytuple(int board[], int size) {
  PyObject* board_tuple = PyTuple_New(size);
  for (int i = 0; i < size; i++) {
    PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i]));
  }
  return board_tuple;
 }
 board_list calc_moves(int board[], int player, int face_value) {
  int* checker_idxs = idxs_with_checkers_of_player(board, player);
  board_list boards = { .size = 0 };
  if (checker_idxs[0] == 0) { 
    boards.size = 1;
    PyObject* board_tuple = store_board_to_pytuple(board, 26);
    boards.list[0] = board_tuple;
    free(checker_idxs);
    return boards;
  }
  int ctr = 0;
  for (int i = 1; i <= checker_idxs[0]; i++) {
    int move[2];
    move[0] = checker_idxs[i];
    move[1] = checker_idxs[i] + (face_value * player);
    if (is_move_valid(board, player, face_value, move)) {
      int* new_board = do_move_clone(board, player, move);
      PyObject* board_tuple = store_board_to_pytuple(new_board, 26);
      // segfault maybe :'(
      free(new_board);
      boards.list[ctr] = board_tuple;
      ctr++;
    }
  }
  free(checker_idxs);
  boards.size = ctr;
  return boards;
 }
 int* board_features_quack_fat(int board[], int player) {
  int* new_board = malloc(sizeof(int) * 30);
  if (new_board == NULL) {
    PyErr_NoMemory();
    abort();
  }
  int pos_sum = 0;
  int neg_sum = 0;
  for (int i = 0; i < 26; i++) {
    new_board[i] = board[i];
    if (sign(new_board[i] > 0)) pos_sum += new_board[i];
    else                        neg_sum += new_board[i]; 
  }
  new_board[26] = 15 - pos_sum;
  new_board[27] = -15 - neg_sum;
  if (player == 1) {
    new_board[28] = 1;
    new_board[29] = 0;
  } else {
    new_board[28] = 0;
    new_board[29] = 1;
  }
  return new_board;
 }
 /* Meta definitions */
 int extract_board(int *board, PyObject* board_tuple_obj) {
  long numValuesBoard;
  numValuesBoard = PyTuple_Size(board_tuple_obj);
  if (numValuesBoard != 26) {
    PyErr_SetString(QuackError, "Board tuple must have 26 entries");
    return 1;
  }
  PyObject* board_val_obj;
  // Iterate over tuple to retreive positions
  for (int i=0; i<numValuesBoard; i++) {
    board_val_obj = PyTuple_GetItem(board_tuple_obj, i);
    board[i] = PyLong_AsLong(board_val_obj);
  }
  return 0;
 }
 int extract_move(int *move, PyObject* move_tuple_obj) {
  long numValuesMove;
  numValuesMove = PyTuple_Size(move_tuple_obj);
  if (numValuesMove != 2) {
    PyErr_SetString(QuackError, "Move tuple must have exactly 2 entries");
    return 1;
  }
  PyObject* move_val_obj;
  for (int i=0; i<numValuesMove; i++) {
    move_val_obj = PyTuple_GetItem(move_tuple_obj, i);
    move[i] = PyLong_AsLong(move_val_obj);
  }
  return 0;
 }
 static PyObject*
 quack_is_move_valid(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  int face_value;
  int move[2];
  PyObject* board_tuple_obj;
  PyObject* move_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!iiO!",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player,
 			 &face_value,
 			 &PyTuple_Type, &move_tuple_obj))
    return NULL;
 if (extract_board(board, board_tuple_obj)) return NULL;
 if (extract_move(move, move_tuple_obj))    return NULL;
 if (is_move_valid(board, player, face_value, move)) Py_RETURN_TRUE;
 else                                                Py_RETURN_FALSE;
 }
 static PyObject*
 quack_idxs_with_checkers_of_player(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  int* idxs;
  PyObject* board_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!i",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player))
    return NULL;
  if (extract_board(board, board_tuple_obj)) return NULL;
  idxs = idxs_with_checkers_of_player(board, player);
  PyObject* idxs_list = PyList_New(idxs[0]);
  for (int i = 0; i < idxs[0]; i++) {
    PyList_SetItem(idxs_list, i, Py_BuildValue("i", idxs[i+1]));
  }
  free(idxs);
  PyObject *result = Py_BuildValue("O", idxs_list);
  Py_DECREF(idxs_list);
  return result;
 }
 static PyObject*
 quack_do_move(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  int move[2];
  PyObject* board_tuple_obj;
  PyObject* move_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!iO!",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player,
 			 &PyTuple_Type, &move_tuple_obj))
    return NULL;
  if (extract_board(board, board_tuple_obj)) return NULL;
  if (extract_move(move, move_tuple_obj))    return NULL;
  do_move(board, player, move);
  PyObject* board_tuple = store_board_to_pytuple(board, 26);
  // This is shaky
  Py_DECREF(board);
  PyObject *result = Py_BuildValue("O", board_tuple);
  Py_DECREF(board_tuple);
  return result;
 }
 static PyObject*
 quack_calc_moves(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  int face_value;
  PyObject* board_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!ii",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player,
 			 &face_value))
    return NULL;
  if (extract_board(board, board_tuple_obj)) return NULL;
  board_list boards = calc_moves(board, player, face_value);
  PyObject* boards_list = PyList_New(boards.size);
  for (int i = 0; i < boards.size; i++) {
    if (PyList_SetItem(boards_list, i, boards.list[i])) {
      printf("list insertion failed at index %i\n",i);
      abort();
    }
  }
  PyObject *result = Py_BuildValue("O", boards_list);
  Py_DECREF(boards_list);
  return result;
 }
 static PyObject*
 quack_board_features_quack_fat(PyObject *self, PyObject *args) {
  int board[26];
  int player;
  PyObject* board_tuple_obj;
  if (! PyArg_ParseTuple(args, "O!i",
 			 &PyTuple_Type, &board_tuple_obj,
 			 &player))
    return NULL;
  if (extract_board(board, board_tuple_obj)) return NULL;
  int* new_board = board_features_quack_fat(board, player);
  PyObject* board_tuple = store_board_to_pytuple(new_board, 30);
  free(new_board);
  PyObject *result = Py_BuildValue("O", board_tuple);
  Py_DECREF(board_tuple);
  return result;
 }
 static PyMethodDef quack_methods[] = {
  {
    "is_move_valid", quack_is_move_valid, METH_VARARGS,
    "Evaluates the validity of the proposed move."
  },
  {
    "idxs_with_checkers_of_player", quack_idxs_with_checkers_of_player, METH_VARARGS,
    "Returns a list of indexes with checkers of the specified player"
  },
  {
    "do_move", quack_do_move, METH_VARARGS,
    "Returns the board after doing the specified move"
  },
  {
    "calc_moves", quack_calc_moves, METH_VARARGS,
    "Calculates all legal moves from board with specified face value"
  },
  {
    "board_features_quack_fat", quack_board_features_quack_fat, METH_VARARGS,
    "Transforms a board to the quack-fat board representation"
  },
  {NULL, NULL, 0, NULL}
 };
 static struct PyModuleDef quack_definition = {
  PyModuleDef_HEAD_INIT,
  "quack",
  "A Python module that provides various useful Backgammon-related functions.",
  -1,
  quack_methods
 };
 PyMODINIT_FUNC PyInit_quack(void) {
  PyObject* module;
  module = PyModule_Create(&quack_definition);
  if (module == NULL)
    return NULL;
  QuackError = PyErr_NewException("quack.error", NULL, NULL);
  Py_INCREF(QuackError);
  PyModule_AddObject(module, "error", QuackError);
  return module;
 }
--- a/quack/setup.py
+++ b/quack/setup.py
@ -0,0 +1,9 @@
 from distutils.core import setup, Extension
 quack = Extension('quack',
                  sources = ['quack.c'])
 setup (name = 'quack',
       version = '0.1',
       description = 'Quack Backgammon Tools',
       ext_modules = [quack])
--- a/report_docs.txt
+++ b/report_docs.txt
@ -0,0 +1,28 @@
 <christoffer> Alexander og jeg skrev noget af vores bachelorprojekt om til C her i fredags.
 <christoffer> Man skal virkelig passe på sine hukommelsesallokeringer.
 <Jmaa> Ja, helt klart.
 <christoffer> Jeg fandt et memory leak, der lækkede 100 MiB hukommelse i sekundet.
 <Jmaa> Hvilken del blev C-ificeret?
 <Jmaa> Damned
 <christoffer> Årsagen var at vi gav et objekt med tilbage til Python uden at dekrementere dets ref-count, så fortolkeren stadig troede at nogen havde brug for det.
 <christoffer> Den del af spillogikken, der tjekker om træk er gyldige.
 <christoffer> Det bliver kaldt ret mange tusinde gange pr. spil, så vi tænkte at der måske kunne være lidt optimering at hente i at omskrive det til C.
 <Jmaa> Ok, så I har ikke selv brugt alloc og free. Det er alligevel noget.
 <christoffer> Metoden selv blev 7 gange hurtigere!
 <Jmaa> Wow!
 <christoffer> Jo. Det endte vi også med at gøre.
 <christoffer> Vi havde brug for lister af variabel størrelse. Det endte med en struct med et "size" felt og et "list" felt.
 <Jmaa> Inkluderer det speedup, frem og tilbagen mellem C og python?
 <christoffer> Det burde det gøre, ja!
 <Jmaa> Gjorde det nogen stor effekt for hvor hurtigt I kan evaluere?
 <christoffer> Jeg tror ikke at der er særligt meget "frem og tilbage"-stads. Det ser ud til at det kode man skriver bliver kastet ret direkte ind i fortolkeren.
 <christoffer> Det gjorde en stor forskel for når vi laver 1-ply.
 <christoffer> "ply" er hvor mange træk man kigger fremad.
 <christoffer> Så kun at kigge på det umiddelbart næste træk er 0-ply, hvilket er det vi har gjort indtil nu
 <christoffer> 1-ply var for langsomt. Det tog ca. 6-7 sekunder at evaluere ét træk.
 <christoffer> Alexander lavede lidt omskrivninger, så TensorFlow udregnede det hurtigere og fik det ned på ca. 3-4 sekunder *pr. spil*.
 <christoffer> Så skrev vi noget af det om til C, og nu er vi så på ca. 2 sekunder pr. spil med 1-ply, hvilket er ret vildt.
 <christoffer> Det er så godt at Python-fortolkeren kan udvides med C!
 <christoffer> caspervk, kan I optimere jeres bachelorprojekt med et par C-moduler?
 <Jmaa> Det er en hel lille sektion til rapporten det der.
 <christoffer> Yeah. Kopierer bare det her verbatim ind.
--- a/requirements.txt
+++ b/requirements.txt
@ -16,8 +16,8 @@ pyparsing==2.2.0
 python-dateutil==2.7.2
 pytz==2018.3
 six==1.11.0
-tensorboard==1.6.0
+tensorboard==1.8.0
-tensorflow==1.6.0
+tensorflow==1.8.0
 termcolor==1.1.0
 Werkzeug==0.14.1
 pygame==1.9.3
--- a/tensorflow_impl_tests/eager_main.py
+++ b/tensorflow_impl_tests/eager_main.py
@ -1,41 +1,94 @@
 import time
 import numpy as np
 import tensorflow as tf
 from board import Board
 import tensorflow.contrib.eager as tfe
 tf.enable_eager_execution()
 xavier_init = tf.contrib.layers.xavier_initializer()
 opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1)
 output_size = 1
 hidden_size = 40
 input_size = 30
 model = tf.keras.Sequential([
-    tf.keras.layers.Dense(40, activation="sigmoid", input_shape=(1,30)),
+    tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)),
-    tf.keras.layers.Dense(1, activation="sigmoid")
+    tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2))
 ])
 input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0]
-all_input = np.array([input for _ in range(8500)])
+# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
-single_in = np.array(input).reshape(1,-1)
+input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0]
 all_input = np.array([Board.board_features_quack_fat(input, 1) for _ in range(20)])
 single_in = Board.board_features_quack_fat(input, 1)
 start = time.time()
 all_predictions = model.predict_on_batch(all_input)
-print(all_predictions)
+
-print(time.time() - start)
+learning_rate = 0.1
 with tf.GradientTape() as tape:
    value = model(single_in)
 print("Before:", value)
-start = time.time()
+grads = tape.gradient(value, model.variables)
-all_predictions = [model(single_in) for _ in range(8500)]
+print("/"*40,"model_variables","/"*40)
 print(model.variables)
 print("/"*40,"grads","/"*40)
 print(grads)
-print(all_predictions[:10])
+difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), [])
 print(time.time() - start)
 for grad, train_var in zip(grads, model.variables):
    backprop_calc = 0.1 * difference_in_values * grad
    train_var.assign_add(backprop_calc)
 value = model(single_in)
 print("/"*40,"model_variables","/"*40)
 print(model.variables)
 print("After:", value)
 # # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
 #
 # # print(model.variables[0][0])
 # weights_before = model.weights[0]
 #
 # start = time.time()
 # #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
 #
 # start = time.time()
 # for gradient, trainable_var in zip(grads, model.variables):
 #     backprop_calc = 0.1 * (0.9 - val) * gradient
 #     trainable_var.assign_add(backprop_calc)
 #
 # # opt.apply_gradients(zip(grads, model.variables))
 #
 # print(time.time() - start)
 #
 # print(model(single_in))
 #
 # vals = model.predict_on_batch(all_input)
 # vals = list(vals)
 # vals[3] = 4
 # print(vals)
 # print(np.argmax(np.array(vals)))
 # tfe.Saver(model.variables).save("./tmp_ckpt")
--- a/tensorflow_impl_tests/normal_main.py
+++ b/tensorflow_impl_tests/normal_main.py
@ -16,9 +16,9 @@ class Everything:
        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
-                              initializer=xavier_init)
+                              initializer=tf.constant_initializer(-2))
        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
-                              initializer=xavier_init)
+                              initializer=tf.constant_initializer(0.2))
        b_1 = tf.get_variable("b_1", (self.hidden_size,),
                              initializer=tf.zeros_initializer)
@ -29,16 +29,37 @@ class Everything:
        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
        apply_gradients = []
        trainable_vars = tf.trainable_variables()
        gradients = tf.gradients(self.value, trainable_vars)
        difference_in_values = tf.reshape(tf.subtract(0.9, self.value, name='difference_in_values'), [])
        with tf.variable_scope('apply_gradients'):
            for gradient, trainable_var in zip(gradients, trainable_vars):
                backprop_calc = 0.1 * difference_in_values * gradient
                grad_apply = trainable_var.assign_add(backprop_calc)
                apply_gradients.append(grad_apply)
        self.training_op = tf.group(*apply_gradients, name='training_op')
    def eval(self):
        input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0])
        start = time.time()
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
-        for i in range(8500):
+        for i in range(20):
            val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)})
        print(time.time() - start)
        print(val)
-
+        sess.run(self.training_op, feed_dict={self.input: input.reshape(1,-1)})
        val = sess.run(self.value, feed_dict={self.input: input.reshape(1, -1)})
        print(val)
 everything = Everything()
 everything.eval()
--- a/test.py
+++ b/test.py
@ -141,6 +141,56 @@ class TestIsMoveValid(unittest.TestCase):
    # TODO: More tests for bearing off are needed
    def test_bear_off_non_backmost(self):
        board = ( 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 1, 1,
                  0 )
        self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), True)
        self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True)
        self.assertEqual(Board.is_move_valid(board, 1, 2, (24, 26)), False)
    def test_bear_off_quadrant_limits_white(self):
        board = ( 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 1,
                  1, 1, 1, 1, 1, 1,
                  0 )
        self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), False)
        self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), False)
    def test_bear_off_quadrant_limits_black(self):
        board = ( 0,
                  -1, -1, -1, -1, -1, -1,
                  -1, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0 )
        self.assertEqual(Board.is_move_valid(board, -1, 2, (2, 0)), False)
        self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), False)
    def test_bear_off_quadrant_limits_white_2(self):
        board = ( 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  1, 0, 0, 0, 0, 1,
                  0 )
        self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True)
    def test_bear_off_quadrant_limits_black_2(self):
        board = ( 0,
                  -1, 0, 0, 0, 0, -1,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0, 0, 0, 0, 0, 0,
                  0 )
        self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), True)
 class TestNumOfChecker(unittest.TestCase):
    def test_simple_1(self):
        board = ( 0,