diff --git a/board.py b/board.py index d32197c..0233cf2 100644 --- a/board.py +++ b/board.py @@ -1,3 +1,4 @@ +import quack import numpy as np import itertools @@ -12,11 +13,7 @@ class Board: @staticmethod def idxs_with_checkers_of_player(board, player): - idxs = [] - for idx, checker_count in enumerate(board): - if checker_count * player >= 1: - idxs.append(idx) - return idxs + return quack.idxs_with_checkers_of_player(board, player) # TODO: Write a test for this @@ -40,18 +37,19 @@ class Board: def board_features_quack(board, player): board = list(board) board += ([1, 0] if np.sign(player) > 0 else [0, 1]) - return np.array(board).reshape(1, -1) + return np.array(board).reshape(1,28) # quack-fat @staticmethod def board_features_quack_fat(board, player): - board = list(board) - positives = [x if x > 0 else 0 for x in board] - negatives = [x if x < 0 else 0 for x in board] - board.append( 15 - sum(positives)) - board.append(-15 - sum(negatives)) - board += ([1, 0] if np.sign(player) > 0 else [0, 1]) - return np.array(board).reshape(1,-1) + return np.array(quack.board_features_quack_fat(board,player)).reshape(1,30) + # board = list(board) + # positives = [x if x > 0 else 0 for x in board] + # negatives = [x if x < 0 else 0 for x in board] + # board.append( 15 - sum(positives)) + # board.append(-15 - sum(negatives)) + # board += ([1, 0] if np.sign(player) > 0 else [0, 1]) + # return np.array(board).reshape(1,30) # quack-fatter @@ -68,7 +66,7 @@ class Board: board.append(15 - sum(positives)) board.append(-15 - sum(negatives)) board += ([1, 0] if np.sign(player) > 0 else [0, 1]) - return np.array(board).reshape(1, -1) + return np.array(board).reshape(1,30) # tesauro @staticmethod @@ -124,98 +122,15 @@ class Board: # Calculate how many pieces there must be in the home state and divide it by 15 features.append((15 - sum) / 15) features += ([1,0] if np.sign(cur_player) > 0 else [0,1]) - test = np.array(features).reshape(1,-1) + test = np.array(features) #print("TEST:",test) - return test + return test.reshape(1,198) @staticmethod def is_move_valid(board, player, face_value, move): - if face_value == 0: - return True - else: - def sign(a): - return (a > 0) - (a < 0) - - from_idx = move[0] - to_idx = move[1] - to_state = None - from_state = board[from_idx] - delta = to_idx - from_idx - direction = sign(delta) - bearing_off = None - - # FIXME: Use get instead of array-like indexing - if to_idx >= 1 and to_idx <= 24: - to_state = board[to_idx] - bearing_off = False - else: # Bearing off - to_state = 0 - bearing_off = True - - # print("_"*20) - # print("board:", board) - # print("to_idx:", to_idx, "board[to_idx]:", board[to_idx], "to_state:", to_state) - # print("+"*20) - - def is_forward_move(): - return direction == player - - def face_value_match_move_length(): - return abs(delta) == face_value - - def bear_in_if_checker_on_bar(): - if player == 1: - bar = 0 - else: - bar = 25 - - bar_state = board[bar] - - if bar_state != 0: - return from_idx == bar - else: - return True - - def checkers_at_from_idx(): - return sign(from_state) == player - - def no_block_at_to_idx(): - if -sign(to_state) == player: - return abs(to_state) == 1 - else: - return True - - def can_bear_off(): - checker_idxs = Board.idxs_with_checkers_of_player(board, player) - def is_moving_backmost_checker(): - if player == 1: - return all([(idx >= from_idx) for idx in checker_idxs]) - else: - return all([(idx <= from_idx) for idx in checker_idxs]) - - def all_checkers_in_last_quadrant(): - if player == 1: - return all([(idx >= 19) for idx in checker_idxs]) - else: - return all([(idx <= 6) for idx in checker_idxs]) - - return all([ is_moving_backmost_checker(), - all_checkers_in_last_quadrant() ]) - - # TODO: add switch here instead of wonky ternary in all - # print("is_forward:",is_forward_move()) - # print("face_value:",face_value_match_move_length()) - # print("Checkes_at_from:",checkers_at_from_idx()) - # print("no_block:",no_block_at_to_idx()) - - return all([ is_forward_move(), - face_value_match_move_length(), - bear_in_if_checker_on_bar(), - checkers_at_from_idx(), - no_block_at_to_idx(), - can_bear_off() if bearing_off else True ]) + return quack.is_move_valid(board, player, face_value, move) @staticmethod def any_move_valid(board, player, roll): @@ -255,12 +170,27 @@ class Board: @staticmethod - def apply_moves_to_board(board, player, moves): - for move in moves: - from_idx, to_idx = move.split("/") - board[int(from_idx)] -= int(player) - board[int(to_idx)] += int(player) - return board + def apply_moves_to_board(board, player, move): + from_idx = move[0] + to_idx = move[1] + board = list(board) + board[from_idx] -= player + + if (to_idx < 1 or to_idx > 24): + return + + if (board[to_idx] * player == -1): + + if (player == 1): + board[25] -= player + else: + board[0] -= player + + board[to_idx] = 0 + + board[to_idx] += player + + return tuple(board) @staticmethod def calculate_legal_states(board, player, roll): @@ -271,24 +201,9 @@ class Board: # turn and then do something with the second die def calc_moves(board, face_value): - idxs_with_checkers = Board.idxs_with_checkers_of_player(board, player) - if len(idxs_with_checkers) == 0: + if face_value == 0: return [board] - boards = [(Board.do_move(board, - player, - (idx, idx + (face_value * player))) - if Board.is_move_valid(board, - player, - face_value, - (idx, idx + (face_value * player))) - else None) - for idx in idxs_with_checkers] - # print("pls:",boards) - board_list = list(filter(None, boards)) # Remove None-values - # if len(board_list) == 0: - # return [board] - # print("board list:", board_list) - return board_list + return quack.calc_moves(board, player, face_value) # Problem with cal_moves: Method can return empty list (should always contain at least same board). # *Update*: Seems to be fixed. @@ -302,12 +217,16 @@ class Board: if not Board.any_move_valid(board, player, roll): return { board } dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4] + #print("Permuts:",dice_permutations) # print("Dice permuts:",dice_permutations) for roll in dice_permutations: # Calculate boards resulting from first move #print("initial board: ", board) #print("roll:", roll) + #print("Rest of roll:",roll[1:]) boards = calc_moves(board, roll[0]) + #print("Boards:",boards) + #print("Roll:",roll[0]) #print("boards after first die: ", boards) for die in roll[1:]: @@ -347,9 +266,9 @@ class Board: return """ 13 14 15 16 17 18 19 20 21 22 23 24 +--------------------------------------------------------------------------+ -| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO| +| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end 1: TODO| |---|---|---|---|---|---|------------|---|---|---|---|---|---| | -| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end 1: TODO| +| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO| +--------------------------------------------------------------------------+ 12 11 10 9 8 7 6 5 4 3 2 1 """.format(*temp) @@ -357,42 +276,8 @@ class Board: @staticmethod def do_move(board, player, move): # Implies that move is valid; make sure to check move validity before calling do_move(...) - - def move_to_bar(board, to_idx): - board = list(board) - if player == 1: - board[25] -= player - else: - board[0] -= player - - board[to_idx] = 0 - return board + return quack.do_move(board, player, move) - # TODO: Moving in from bar is handled by the representation - # TODONE: Handle bearing off - - from_idx = move[0] - #print("from_idx: ", from_idx) - to_idx = move[1] - #print("to_idx: ", to_idx) - # pdb.set_trace() - board = list(board) # Make mutable copy of board - - # 'Lift' checker - board[from_idx] -= player - - # Handle bearing off - if to_idx < 1 or to_idx > 24: - return tuple(board) - - # Handle hitting checkers - if board[to_idx] * player == -1: - board = move_to_bar(board, to_idx) - - # Put down checker - board[to_idx] += player - - return tuple(board) @staticmethod def flip(board): diff --git a/bot.py b/bot.py index 297f203..d1d74a6 100644 --- a/bot.py +++ b/bot.py @@ -1,24 +1,8 @@ -from cup import Cup -from network import Network from board import Board -import tensorflow as tf -import numpy as np -import random - class Bot: - def __init__(self, sym, config = None, name = "unnamed"): - self.config = config - self.cup = Cup() + def __init__(self, sym): self.sym = sym - self.graph = tf.Graph() - - self.network = Network(config, name) - self.network.restore_model() - - def restore_model(self): - with self.graph.as_default(): - self.network.restore_model() def get_session(self): return self.session @@ -26,16 +10,60 @@ class Bot: def get_sym(self): return self.sym - def get_network(self): - return self.network - # TODO: DEPRECATE - def make_move(self, board, sym, roll): - # print(Board.pretty(board)) - legal_moves = Board.calculate_legal_states(board, sym, roll) - moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ] - scores = [ x[1] for x in moves_and_scores ] - best_move_pair = moves_and_scores[np.array(scores).argmax()] - #print("Found the best state, being:", np.array(move_scores).argmax()) - return best_move_pair + def calc_move_sets(self, from_board, roll, player): + board = from_board + sets = [] + total = 0 + print("board!:",board) + for r in roll: + # print("Value of r:",r) + sets.append([Board.calculate_legal_states(board, player, [r,0]), r]) + total += r + sets.append([Board.calculate_legal_states(board, player, [total,0]), total]) + return sets + + + def handle_move(self, from_board, to_board, roll, player): + + # print("Cur board:",board) + sets = self.calc_move_sets(from_board, roll, player) + for idx, board_set in enumerate(sets): + board_set[0] = list(board_set[0]) + # print("My board_set:",board_set) + if to_board in [list(c) for c in board_set[0]]: + self.total_moves -= board_set[1] + if idx < 2: + # print("Roll object:",self.roll) + self.roll[idx] = 0 + else: + self.roll = [0,0] + break + print("Total moves left:",self.total_moves) + + + def tmp_name(self, from_board, to_board, roll, player, total_moves): + sets = self.calc_move_sets(from_board, roll, player) + return_board = from_board + for idx, board_set in enumerate(sets): + board_set = list(board_set[0]) + if to_board in [list(board) for board in board_set]: + total_moves -= board_set[1] + # if it's not the sum of the moves + if idx < 2: + roll[idx] = 0 + else: + roll = [0,0] + return_board = to_board + break + return total_moves, roll, return_board + + def make_human_move(self, board, player, roll): + total_moves = roll[0] + roll[1] + previous_board = board + while total_moves != 0: + move = input("Pick a move!\n") + to_board = Board.apply_moves_to_board(previous_board, player, move) + total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves) + diff --git a/main.py b/main.py index e2e8988..53b0444 100644 --- a/main.py +++ b/main.py @@ -31,19 +31,17 @@ parser.add_argument('--train-perpetually', action='store_true', help='start new training session as soon as the previous is finished') parser.add_argument('--list-models', action='store_true', help='list all known models') -parser.add_argument('--force-creation', action='store_true', - help='force model creation if model does not exist') parser.add_argument('--board-rep', action='store', dest='board_rep', - default='tesauro', help='name of board representation to use as input to neural network') -parser.add_argument('--use-baseline', action='store_true', - help='use the baseline model, note, has size 28') +parser.add_argument('--verbose', action='store_true', + help='If set, a lot of stuff will be printed') +parser.add_argument('--ply', action='store', dest='ply', default='0', + help='defines the amount of ply used when deciding what move to make') +parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1', + help='the amount of times the evaluation method should be repeated') args = parser.parse_args() -if args.model == "baseline_model": - print("Model name 'baseline_model' not allowed") - exit() config = { 'model': args.model, @@ -59,10 +57,13 @@ config = { 'model_storage_path': 'models', 'bench_storage_path': 'bench', 'board_representation': args.board_rep, - 'force_creation': args.force_creation, - 'use_baseline': args.use_baseline + 'global_step': 0, + 'verbose': args.verbose, + 'ply': args.ply, + 'repeat_eval': args.repeat_eval } + # Create models folder if not os.path.exists(config['model_storage_path']): os.makedirs(config['model_storage_path']) @@ -77,6 +78,14 @@ if not os.path.isdir(log_path): os.mkdir(log_path) +def save_config(): + import yaml + # checkpoint_path = os.path.join(config['model_storage_path'], config['model']) + # config_path = os.path.join(checkpoint_path, 'config') + # with open(config_path, 'a+') as f: + # print("lol") + print(yaml.dump(config)) + # Define helper functions def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")): format_vars = { 'trained_eps': trained_eps, @@ -125,6 +134,24 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0): with open(log_path, 'a+') as f: f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n") +def find_board_rep(): + checkpoint_path = os.path.join(config['model_storage_path'], config['model']) + board_rep_path = os.path.join(checkpoint_path, "board_representation") + with open(board_rep_path, 'r') as f: + return f.read() + + +def board_rep_file_exists(): + checkpoint_path = os.path.join(config['model_storage_path'], config['model']) + board_rep_path = os.path.join(checkpoint_path, "board_representation") + return os.path.isfile(board_rep_path) + +def create_board_rep(): + checkpoint_path = os.path.join(config['model_storage_path'], config['model']) + board_rep_path = os.path.join(checkpoint_path, "board_representation") + with open(board_rep_path, 'a+') as f: + f.write(config['board_representation']) + # Do actions specified by command-line if args.list_models: def get_eps_trained(folder): @@ -145,8 +172,25 @@ if __name__ == "__main__": # Set up network from network import Network + save_config() # Set up variables episode_count = config['episode_count'] + + if config['board_representation'] is None: + if board_rep_file_exists(): + config['board_representation'] = find_board_rep() + else: + sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n") + exit() + else: + if not board_rep_file_exists(): + create_board_rep() + else: + if config['board_representation'] != find_board_rep(): + sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n". + format(given = config['board_representation'], board_rep = find_board_rep())) + exit() + if args.train: network = Network(config, config['model']) @@ -161,15 +205,19 @@ if __name__ == "__main__": if not config['train_perpetually']: break - + elif args.play: + network = Network(config, config['model']) + network.play_against_network() + elif args.eval: network = Network(config, config['model']) - start_episode = network.episodes_trained - # Evaluation measures are described in `config` - outcomes = network.eval(config['episode_count']) - log_eval_outcomes(outcomes, trained_eps = start_episode) - # elif args.play: - # g.play(episodes = episode_count) + for i in range(int(config['repeat_eval'])): + start_episode = network.episodes_trained + # Evaluation measures are described in `config` + outcomes = network.eval(config['episode_count']) + log_eval_outcomes(outcomes, trained_eps = start_episode) + # elif args.play: + # g.play(episodes = episode_count) elif args.bench_eval_scores: @@ -191,7 +239,7 @@ if __name__ == "__main__": episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000, 10000, 20000] - def do_eval(sess): + def do_eval(): for eval_method in config['eval_methods']: result_path = os.path.join(config['bench_storage_path'], eval_method) + "-{}.log".format(int(time.time())) @@ -199,8 +247,7 @@ if __name__ == "__main__": for i in range(sample_count): start_time = time.time() # Evaluation measure to be benchmarked are described in `config` - outcomes = network.eval(episode_count = n, - tf_session = sess) + outcomes = network.eval(episode_count = n) time_diff = time.time() - start_time log_bench_eval_outcomes(outcomes, time = time_diff, @@ -210,8 +257,8 @@ if __name__ == "__main__": # CMM: oh no import tensorflow as tf - with tf.Session() as session: - network.restore_model(session) - do_eval(session) + + network.restore_model() + do_eval() diff --git a/network.py b/network.py index 84802e3..381197b 100644 --- a/network.py +++ b/network.py @@ -8,6 +8,8 @@ import random from eval import Eval import glob from operator import itemgetter +import tensorflow.contrib.eager as tfe +from player import Player class Network: # board_features_quack has size 28 @@ -15,21 +17,41 @@ class Network: # board_features_tesauro has size 198 board_reps = { - 'quack-fat' : (30, Board.board_features_quack_fat), - 'quack' : (28, Board.board_features_quack), - 'tesauro' : (198, Board.board_features_tesauro), - 'quack-norm': (30, Board.board_features_quack_norm) + 'quack-fat' : (30, Board.board_features_quack_fat), + 'quack' : (28, Board.board_features_quack), + 'tesauro' : (198, Board.board_features_tesauro), + 'quack-norm' : (30, Board.board_features_quack_norm), + 'tesauro-poop': (198, Board.board_features_tesauro_wrong) } + def custom_tanh(self, x, name=None): return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) def __init__(self, config, name): + """ + :param config: + :param name: + """ + + move_options = { + '1': self.make_move_1_ply, + '0': self.make_move_0_ply + } + + tf.enable_eager_execution() + + xavier_init = tf.contrib.layers.xavier_initializer() + self.config = config self.checkpoint_path = os.path.join(config['model_storage_path'], config['model']) self.name = name + self.make_move = move_options[ + self.config['ply'] + ] + # Set board representation from config self.input_size, self.board_trans_func = Network.board_reps[ self.config['board_representation'] @@ -39,16 +61,6 @@ class Network: self.max_learning_rate = 0.1 self.min_learning_rate = 0.001 - self.global_step = tf.Variable(0, trainable=False, name="global_step") - self.learning_rate = tf.maximum(self.min_learning_rate, - tf.train.exponential_decay(self.max_learning_rate, - self.global_step, 50000, - 0.96, - staircase=True), - name="learning_rate") - - - # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): @@ -57,62 +69,107 @@ class Network: else: self.episodes_trained = 0 - self.x = tf.placeholder('float', [1, self.input_size], name='input') - self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next") - - xavier_init = tf.contrib.layers.xavier_initializer() - - W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size), - initializer=xavier_init) - W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size), - initializer=xavier_init) - - b_1 = tf.get_variable("b_1", (self.hidden_size,), - initializer=tf.zeros_initializer) - b_2 = tf.get_variable("b_2", (self.output_size,), - initializer=tf.zeros_initializer) + global_step_path = os.path.join(self.checkpoint_path, "global_step") + if os.path.isfile(global_step_path): + with open(global_step_path, 'r') as f: + self.global_step = int(f.read()) + else: + self.global_step = 0 - value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer') + self.model = tf.keras.Sequential([ + tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, + input_shape=(1,self.input_size)), + tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init) + ]) - self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') - # TODO: Alexander thinks that self.value will be computed twice (instead of once) - difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), []) + def exp_decay(self, max_lr, global_step, decay_rate, decay_steps): + """ + Calculates the exponential decay on a learning rate + :param max_lr: The learning rate that the network starts at + :param global_step: The global step + :param decay_rate: The rate at which the learning rate should decay + :param decay_steps: The amount of steps between each decay + :return: The result of the exponential decay performed on the learning rate + """ + res = max_lr * decay_rate**(global_step // decay_steps) + return res + + def do_backprop(self, prev_state, value_next): + """ + Performs the Temporal-difference backpropagation step on the model + :param prev_state: The previous state of the game, this has its value recalculated + :param value_next: The value of the current move + :return: Nothing, the calculation is performed on the model of the network + """ + self.learning_rate = tf.maximum(self.min_learning_rate, + self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000), + name="learning_rate") + + with tf.GradientTape() as tape: + value = self.model(prev_state.reshape(1,-1)) + grads = tape.gradient(value, self.model.variables) + + difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) - trainable_vars = tf.trainable_variables() - gradients = tf.gradients(self.value, trainable_vars) - - apply_gradients = [] - - global_step_op = self.global_step.assign_add(1) - - with tf.variable_scope('apply_gradients'): - for gradient, trainable_var in zip(gradients, trainable_vars): - backprop_calc = self.learning_rate * difference_in_values * gradient - grad_apply = trainable_var.assign_add(backprop_calc) - apply_gradients.append(grad_apply) + for grad, train_var in zip(grads, self.model.variables): + backprop_calc = self.learning_rate * difference_in_values * grad + train_var.assign_add(backprop_calc) - - with tf.control_dependencies([global_step_op]): - self.training_op = tf.group(*apply_gradients, name='training_op') - self.saver = tf.train.Saver(max_to_keep=1) + def print_variables(self): + """ + Prints all the variables of the model + :return: + """ + variables = self.model.variables + for k in variables: + print(k) - def eval_state(self, sess, state): - return sess.run(self.value, feed_dict={self.x: state}) + def eval_state(self, state): + """ + Evaluates a single state + :param state: + :return: + """ + return self.model(state.reshape(1,-1)) - def save_model(self, sess, episode_count, global_step): - self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step) + def save_model(self, episode_count): + """ + Saves the model of the network, it references global_step as self.global_step + :param episode_count: + :return: + """ + tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt')) + #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step) with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: print("[NETWK] ({name}) Saving model to:".format(name=self.name), os.path.join(self.checkpoint_path, 'model.ckpt')) f.write(str(episode_count) + "\n") - def restore_model(self, sess): + with open(os.path.join(self.checkpoint_path, "global_step"), 'w+') as f: + print("[NETWK] ({name}) Saving global step to:".format(name=self.name), + os.path.join(self.checkpoint_path, 'model.ckpt')) + f.write(str(self.global_step) + "\n") + if self.config['verbose']: + self.print_variables() + + + def calc_vals(self, states): + """ + Calculate a score of each state in states + :param states: A number of states. The states have to be transformed before being given to this function. + :return: + """ + values = self.model.predict_on_batch(states) + return values + + + def restore_model(self): """ Restore a model for a session, such that a trained model and either be further trained or used for evaluation @@ -121,47 +178,38 @@ class Network: :return: Nothing. It's a side-effect that a model gets restored for the network. """ + if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')): latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) print("[NETWK] ({name}) Restoring model from:".format(name=self.name), str(latest_checkpoint)) - self.saver.restore(sess, latest_checkpoint) - variables_names = [v.name for v in tf.trainable_variables()] - values = sess.run(variables_names) - for k, v in zip(variables_names, values): - print("Variable: ", k) - print("Shape: ", v.shape) - print(v) + tfe.Saver(self.model.variables).restore(latest_checkpoint) + + # variables_names = [v.name for v in self.model.variables] + # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): with open(episode_count_path, 'r') as f: self.config['start_episode'] = int(f.read()) - elif self.config['use_baseline'] and glob.glob(os.path.join(os.path.join(self.config['model_storage_path'], "baseline_model"), 'model.ckpt*.index')): - checkpoint_path = os.path.join(self.config['model_storage_path'], "baseline_model") - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path) - print("[NETWK] ({name}) Restoring model from:".format(name=self.name), - str(latest_checkpoint)) - self.saver.restore(sess, latest_checkpoint) - variables_names = [v.name for v in tf.trainable_variables()] - values = sess.run(variables_names) - for k, v in zip(variables_names, values): - print("Variable: ", k) - print("Shape: ", v.shape) - print(v) - elif not self.config['force_creation']: - print("You need to have baseline_model inside models") - exit() + global_step_path = os.path.join(self.checkpoint_path, "global_step") + if os.path.isfile(global_step_path): + with open(global_step_path, 'r') as f: + self.config['global_step'] = int(f.read()) + + if self.config['verbose']: + self.print_variables() - def make_move(self, sess, board, roll, player): + + def make_move_0_ply(self, board, roll, player): """ Find the best move given a board, roll and a player, by finding all possible states one can go to - and then picking the best, by using the network to evaluate each state. The highest score is picked - for the 1-player and the max(1-score) is picked for the -1-player. + and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead. + The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player. :param sess: :param board: Current board @@ -169,23 +217,37 @@ class Network: :param player: Current player :return: A pair of the best state to go to, together with the score of that state """ - legal_moves = Board.calculate_legal_states(board, player, roll) - moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves] - scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores] - best_score_index = np.array(scores).argmax() - best_move_pair = moves_and_scores[best_score_index] - return best_move_pair + legal_moves = list(Board.calculate_legal_states(board, player, roll)) + legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves]) - def make_move_n_ply(self, sess, board, roll, player, n = 1): - best_pair = self.calc_n_ply(n, sess, board, player, roll) + scores = self.model.predict_on_batch(legal_states) + transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores] + + best_score_idx = np.argmax(np.array(transformed_scores)) + best_move = legal_moves[best_score_idx] + best_score = scores[best_score_idx] + + return [best_move, best_score] + + def make_move_1_ply(self, board, roll, player): + """ + Return the best board and best score based on a 1-ply look-ahead. + :param board: + :param roll: + :param player: + :return: + """ + # start = time.time() + best_pair = self.calculate_1_ply(board, roll, player) + # print(time.time() - start) return best_pair - def calculate_1_ply(self, sess, board, roll, player): + def calculate_1_ply(self, board, roll, player): """ - Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an - exhaustive search is performed on the best 15 moves from the single ply. - + Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then + all moves and scores are found for them. The expected score is then calculated for each of the boards from the + 0-ply. :param sess: :param board: :param roll: The original roll @@ -197,23 +259,91 @@ class Network: # find all legal states from the given board and the given roll init_legal_states = Board.calculate_legal_states(board, player, roll) - # find all values for the above boards - zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states] + legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states]) - # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck. - best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1) + scores = self.calc_vals(legal_states) + scores = [score.numpy() for score in scores] - best_fifteen_boards = [x[0] for x in best_fifteen[:10]] + moves_and_scores = list(zip(init_legal_states, scores)) - all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player) + sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1) + + best_boards = [x[0] for x in sorted_moves_and_scores[:10]] - best_score_index = np.array(all_rolls_scores).argmax() - best_board = best_fifteen_boards[best_score_index] - return [best_board, max(all_rolls_scores)] + scores, trans_scores = self.do_ply(best_boards, player) + + best_score_idx = np.array(trans_scores).argmax() + + return [best_boards[best_score_idx], scores[best_score_idx]] + + def do_ply(self, boards, player): + """ + Calculates a single extra ply, resulting in a larger search space for our best move. + This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than + allowing the function to search deeper, which could result in an even larger search space. If we wish + to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply. + + :param sess: + :param boards: The boards to try all rolls on + :param player: The player of the previous ply + :return: An array of scores where each index describes one of the boards which was given as param + to this function. + """ + + all_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), + (1, 6), (2, 2), (2, 3), (2, 4), (2, 5), + (2, 6), (3, 3), (3, 4), (3, 5), (3, 6), + (4, 4), (4, 5), (4, 6), (5, 5), (5, 6), + (6, 6) ] + + + # start = time.time() + + # print("/"*50) + length_list = [] + test_list = [] + # Prepping of data + start= time.time() + for board in boards: + length = 0 + for roll in all_rolls: + all_states = list(Board.calculate_legal_states(board, player*-1, roll)) + for state in all_states: + state = np.array(self.board_trans_func(state, player*-1)[0]) + test_list.append(state) + length += 1 + length_list.append(length) + + # print(time.time() - start) + + start = time.time() + + all_scores_legit = self.model.predict_on_batch(np.array(test_list)) + + split_scores = [] + from_idx = 0 + for length in length_list: + split_scores.append(all_scores_legit[from_idx:from_idx+length]) + from_idx += length + + means_splits = [tf.reduce_mean(scores) for scores in split_scores] + transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits] + # print(time.time() - start) + + return ([means_splits, transformed_means_splits]) + def calc_n_ply(self, n_init, sess, board, player, roll): + """ + :param n_init: + :param sess: + :param board: + :param player: + :param roll: + :return: + """ # find all legal states from the given board and the given roll init_legal_states = Board.calculate_legal_states(board, player, roll) @@ -233,6 +363,13 @@ class Network: def n_ply(self, n_init, sess, boards_init, player_init): + """ + :param n_init: + :param sess: + :param boards_init: + :param player_init: + :return: + """ def ply(n, boards, player): def calculate_possible_states(board): possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), @@ -324,69 +461,8 @@ class Network: best_score_pair = boards_with_scores[np.array(scores).argmax()] return best_score_pair - def do_ply(self, sess, boards, player): - """ - Calculates a single extra ply, resulting in a larger search space for our best move. - This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than - allowing the function to search deeper, which could result in an even larger search space. If we wish - to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply. - :param sess: - :param boards: The boards to try all rolls on - :param player: The player of the previous ply - :return: An array of scores where each index describes one of the boards which was given as param - to this function. - """ - - def gen_21_rolls(): - """ - Calculate all possible rolls, [[1,1], [1,2] ..] - :return: All possible rolls - """ - a = [] - for x in range(1, 7): - for y in range(1, 7): - if not [x, y] in a and not [y, x] in a: - a.append([x, y]) - - return a - - all_rolls = gen_21_rolls() - - all_rolls_scores = [] - count = 0 - # loop over boards - for a_board in boards: - a_board_scores = [] - - # loop over all rolls, for each board - for roll in all_rolls: - - # find all states we can get to, given the board and roll and the opposite player - all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) - count += len(all_rolls_boards) - # find scores for each board found above - spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1)) - for new_board in all_rolls_boards] - - # if the original player is the -1 player, then we need to find (1-value) - spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores] - - # find the best score - best_score = max(spec_roll_scores) - - # append the best score to a_board_scores, where we keep track of the best score for each board - a_board_scores.append(best_score) - - # save the expected average of board scores - all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores)) - - # return all the average scores - print(count) - return all_rolls_scores - - - def eval(self, episode_count, trained_eps = 0, tf_session = None): + def eval(self, episode_count, trained_eps = 0): """ Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval a model which has been given random weights, so it acts deterministically random. @@ -397,7 +473,7 @@ class Network: :return: outcomes: The outcomes of the evaluation session """ - def do_eval(sess, method, episodes = 1000, trained_eps = 0): + def do_eval(method, episodes = 1000, trained_eps = 0): """ Do the actual evaluation @@ -434,7 +510,7 @@ class Network: while Board.outcome(board) is None: roll = (random.randrange(1, 7), random.randrange(1, 7)) - board = (self.make_move(sess, board, roll, 1))[0] + board = (self.make_move(board, roll, 1))[0] roll = (random.randrange(1, 7), random.randrange(1, 7)) @@ -457,7 +533,7 @@ class Network: while Board.outcome(board) is None: roll = (random.randrange(1, 7), random.randrange(1, 7)) - board = (self.make_move(sess, board, roll, 1))[0] + board = (self.make_move(board, roll, 1))[0] roll = (random.randrange(1, 7), random.randrange(1, 7)) @@ -476,112 +552,122 @@ class Network: sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) return [0] - if tf_session == None: - with tf.Session() as session: - session.run(tf.global_variables_initializer()) - self.restore_model(session) - outcomes = [ (method, do_eval(session, - method, - episode_count, - trained_eps = trained_eps)) - for method - in self.config['eval_methods'] ] - return outcomes - else: - outcomes = [ (method, do_eval(tf_session, - method, - episode_count, - trained_eps = trained_eps)) - for method - in self.config['eval_methods'] ] - return outcomes + + outcomes = [ (method, do_eval(method, + episode_count, + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ] + return outcomes + + + def play_against_network(self): + """ + Allows you to play against a supplied model. + :return: + """ + self.restore_model() + human_player = Player(-1) + cur_player = 1 + player = 1 + board = Board.initial_state + i = 0 + while Board.outcome(board) is None: + print(Board.pretty(board)) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + print("Bot rolled:", roll) + + board, _ = self.make_move(board, roll, player) + print(Board.pretty(board)) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + print("You rolled:", roll) + board = human_player.make_human_move(board, roll) + print("DONE "*10) + print(Board.pretty(board)) + + def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): - with tf.Session() as sess: - difference_in_vals = 0 - writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph) + """ + Train a model to by self-learning. + :param episodes: + :param save_step_size: + :param trained_eps: + :return: + """ - sess.run(tf.global_variables_initializer()) - self.restore_model(sess) + difference_in_vals = 0 - variables_names = [v.name for v in tf.trainable_variables()] - values = sess.run(variables_names) - for k, v in zip(variables_names, values): - print("Variable: ", k) - print("Shape: ", v.shape) - print(v) + self.restore_model() - start_time = time.time() + start_time = time.time() - def print_time_estimate(eps_completed): - cur_time = time.time() - time_diff = cur_time - start_time - eps_per_sec = eps_completed / time_diff - secs_per_ep = time_diff / eps_completed - eps_remaining = (episodes - eps_completed) - sys.stderr.write( - "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) - sys.stderr.write( - "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( - eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) + def print_time_estimate(eps_completed): + cur_time = time.time() + time_diff = cur_time - start_time + eps_per_sec = eps_completed / time_diff + secs_per_ep = time_diff / eps_completed + eps_remaining = (episodes - eps_completed) + sys.stderr.write( + "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) + sys.stderr.write( + "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( + eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) - sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) - outcomes = [] - for episode in range(1, episodes + 1): + sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) + outcomes = [] + for episode in range(1, episodes + 1): - sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) - # TODO decide which player should be here + sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) + # TODO decide which player should be here - player = 1 - prev_board = Board.initial_state - i = 0 - while Board.outcome(prev_board) is None: - i += 1 - - cur_board, cur_board_value = self.make_move(sess, - prev_board, - (random.randrange(1, 7), random.randrange(1, 7)), - player) - - difference_in_vals += abs((cur_board_value - self.eval_state(sess, self.board_trans_func(prev_board, player)))) + player = 1 + prev_board = Board.initial_state + i = 0 + while Board.outcome(prev_board) is None: + i += 1 + self.global_step += 1 - # adjust weights - sess.run(self.training_op, - feed_dict={self.x: self.board_trans_func(prev_board, player), - self.value_next: cur_board_value}) + cur_board, cur_board_value = self.make_move(prev_board, + (random.randrange(1, 7), random.randrange(1, 7)), + player) + difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player)))) + + if self.config['verbose']: + print("Difference in values:", difference_in_vals) + print("Current board value :", cur_board_value) + print("Current board is :\n",cur_board) + + + # adjust weights + if Board.outcome(cur_board) is None: + self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value) player *= -1 - prev_board = cur_board + prev_board = cur_board - final_board = prev_board - sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i)) - outcomes.append(Board.outcome(final_board)[1]) - final_score = np.array([Board.outcome(final_board)[1]]) - scaled_final_score = ((final_score + 2) / 4) + final_board = prev_board + sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i)) + outcomes.append(Board.outcome(final_board)[1]) + final_score = np.array([Board.outcome(final_board)[1]]) + scaled_final_score = ((final_score + 2) / 4) - with tf.name_scope("final"): - merged = tf.summary.merge_all() - global_step, summary, _ = sess.run([self.global_step, merged, self.training_op], - feed_dict={self.x: self.board_trans_func(prev_board, player), - self.value_next: scaled_final_score.reshape((1, 1))}) - writer.add_summary(summary, episode + trained_eps) + self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) - sys.stderr.write("\n") + sys.stderr.write("\n") - if episode % min(save_step_size, episodes) == 0: - sys.stderr.write("[TRAIN] Saving model...\n") - self.save_model(sess, episode + trained_eps, global_step) + if episode % min(save_step_size, episodes) == 0: + sys.stderr.write("[TRAIN] Saving model...\n") + self.save_model(episode + trained_eps) - if episode % 50 == 0: - print_time_estimate(episode) + if episode % 50 == 0: + print_time_estimate(episode) - sys.stderr.write("[TRAIN] Saving model for final episode...\n") - self.save_model(sess, episode+trained_eps, global_step) - - writer.close() - - return outcomes, difference_in_vals[0][0] + sys.stderr.write("[TRAIN] Saving model for final episode...\n") + self.save_model(episode+trained_eps) + + return outcomes, difference_in_vals[0][0] diff --git a/network_test.py b/network_test.py index a514dfc..1bcb878 100644 --- a/network_test.py +++ b/network_test.py @@ -9,14 +9,12 @@ from board import Board import main config = main.config.copy() -config['model'] = "tesauro_blah" -config['force_creation'] = True +config['model'] = "player_testings" +config['ply'] = "1" +config['board_representation'] = 'quack-fat' network = Network(config, config['model']) -session = tf.Session() - -session.run(tf.global_variables_initializer()) -network.restore_model(session) +network.restore_model() initial_state = Board.initial_state initial_state_1 = ( 0, @@ -38,65 +36,25 @@ boards = {initial_state, initial_state_2 } -def gen_21_rolls(): - """ - Calculate all possible rolls, [[1,1], [1,2] ..] - :return: All possible rolls - """ - a = [] - for x in range(1, 7): - for y in range(1, 7): - if not [x, y] in a and not [y, x] in a: - a.append([x, y]) - - return a - -def calc_all_scores(board, player): - scores = [] - trans_board = network.board_trans_func(board, player) - rolls = gen_21_rolls() - for roll in rolls: - score = network.eval_state(session, trans_board) - scores.append(score) - return scores - - -def calculate_possible_states(board): - possible_rolls = [(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), - (1, 6), (2, 2), (2, 3), (2, 4), (2, 5), - (2, 6), (3, 3), (3, 4), (3, 5), (3, 6), - (4, 4), (4, 5), (4, 6), (5, 5), (5, 6), - (6, 6)] - - for roll in possible_rolls: - meh = Board.calculate_legal_states(board, -1, roll) - print(len(meh)) - return [Board.calculate_legal_states(board, -1, roll) - for roll - in possible_rolls] -#for board in boards: -# calculate_possible_states(board) -#print("-"*30) -#print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1)) +# board = network.board_trans_func(Board.initial_state, 1) -#print(" "*10 + "network_test") -print(" "*20 + "Depth 1") -print(network.calc_n_ply(2, session, Board.initial_state, 1, [2, 4])) -#print(scores) +# pair = network.make_move(Board.initial_state, [3,2], 1) -#print(" "*20 + "Depth 2") -#print(network.n_ply(2, session, boards, 1)) +# print(pair[1]) -# #print(x.shape) -# with graph_lol.as_default(): -# session_2 = tf.Session(graph = graph_lol) -# network_2 = Network(session_2) -# network_2.restore_model() -# print(network_2.eval_state(initial_state)) - -# print(network.eval_state(initial_state)) +# network.do_backprop(board, 0.9) + + +# network.print_variables() + + +# network.save_model(2) + +# print(network.calculate_1_ply(Board.initial_state, [3,2], 1)) + +network.play_against_network() \ No newline at end of file diff --git a/player.py b/player.py index 596449f..4208cdd 100644 --- a/player.py +++ b/player.py @@ -11,19 +11,55 @@ class Player: def get_sym(self): return self.sym - def make_move(self, board, sym, roll): - print(Board.pretty(board)) - legal_moves = Board.calculate_legal_states(board, sym, roll) - if roll[0] == roll[1]: - print("Example of move: 4/6,6/8,12/14,13/15") - else: - print("Example of move: 4/6,13/17") + def calc_move_sets(self, from_board, roll, player): + board = from_board + sets = [] + total = 0 + for r in roll: + # print("Value of r:",r) + sets.append([Board.calculate_legal_states(board, player, [r,0]), r]) + total += r + sets.append([Board.calculate_legal_states(board, player, [total,0]), total]) + return sets - user_moves = input("Enter your move: ").strip().split(",") - board = Board.apply_moves_to_board(board, sym, user_moves) - while board not in legal_moves: - print("Move is invalid, please enter a new move") - user_moves = input("Enter your move: ").strip().split(",") - board = Board.apply_moves_to_board(board, sym, user_moves) - return board + def tmp_name(self, from_board, to_board, roll, player, total_moves): + sets = self.calc_move_sets(from_board, roll, player) + return_board = from_board + for idx, board_set in enumerate(sets): + + board_set[0] = list(board_set[0]) + print(to_board) + print(board_set) + if to_board in board_set[0]: + total_moves -= board_set[1] + # if it's not the sum of the moves + if idx < 2: + roll[idx] = 0 + else: + roll = [0,0] + return_board = to_board + break + return total_moves, roll, return_board + + def make_human_move(self, board, roll): + total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4 + move = "" + while total_moves != 0: + while True: + print("You have {roll} left!".format(roll=total_moves)) + move = input("Pick a move!\n") + pot_move = move.split("/") + if len(pot_move) == 2: + try: + pot_move[0] = int(pot_move[0]) + pot_move[1] = int(pot_move[1]) + move = pot_move + break; + except TypeError: + print("The correct syntax is: 2/5 for a move from index 2 to 5.") + + to_board = Board.apply_moves_to_board(board, self.get_sym(), move) + total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves) + print(Board.pretty(board)) + return board \ No newline at end of file diff --git a/quack/quack.c b/quack/quack.c new file mode 100644 index 0000000..213c49c --- /dev/null +++ b/quack/quack.c @@ -0,0 +1,484 @@ +#include + +static PyObject* QuackError; + +typedef struct board_list board_list; +struct board_list { + int size; + PyObject* list[16]; +}; + +/* Utility functions */ +int sign(int x) { + return (x > 0) - (x < 0); +} + +int abs(int x) { + if (x >= 0) return x; + else return -x; +} +/* end utility functions */ + +/* Helper functions */ + +int *idxs_with_checkers_of_player(int board[], int player) { + int idxs_tmp[26]; + int ctr = 0; + + for (int i = 0; i < 26; i++) { + if (board[i] * player >= 1) { + idxs_tmp[ctr] = i; + ctr++; + } + } + + int *idxs = malloc((1 + ctr) * sizeof(int)); + if (idxs == NULL) { + PyErr_NoMemory(); + abort(); + } + + idxs[0] = ctr; + for (int i = 0; i < ctr; i++) { + idxs[i+1] = idxs_tmp[i]; + } + + return idxs; +} + +int is_forward_move(int direction, int player) { + return direction == player; +} + +int face_value_match_move_length(int delta, int face_value) { + return abs(delta) == face_value; +} + +int bear_in_if_checker_on_bar(int board[], int player, int from_idx) { + int bar; + + if (player == 1) bar = 0; + else bar = 25; + + if (board[bar] != 0) return from_idx == bar; + else return 1; +} + +int checkers_at_from_idx(int from_state, int player) { + return sign(from_state) == player; +} + +int no_block_at_to_idx(int to_state, int player) { + if (-sign(to_state) == player) return abs(to_state) == 1; + else return 1; +} + + +int can_bear_off(int board[], int player, int from_idx, int to_idx) { + int* checker_idxs = idxs_with_checkers_of_player(board, player); + + int moving_backmost_checker = 1; + int bearing_directly_off = 0; + int all_checkers_in_last_quadrant = 1; + + /* Check if bearing directly off */ + if (player == 1 && to_idx == 25) bearing_directly_off = 1; + else if (player == -1 && to_idx == 0) bearing_directly_off = 1; + + for (int i = 1; i <= checker_idxs[0]; i++) { + if (player == 1 ) { + /* Check if all checkers are in last quardrant */ + if (checker_idxs[i] < 19) { + all_checkers_in_last_quadrant = 0; + break; + } + + /* Check if moving backmost checker */ + if (checker_idxs[i] < from_idx) { + moving_backmost_checker = 0; + if (!bearing_directly_off) break; + } + } else { + if (checker_idxs[i] > 6) { + all_checkers_in_last_quadrant = 0; + break; + } + + if (checker_idxs[i] > from_idx) { + moving_backmost_checker = 0; + if (!bearing_directly_off) break; + } + } + } + + free(checker_idxs); + + if (all_checkers_in_last_quadrant && + (bearing_directly_off || moving_backmost_checker)) return 1; + else return 0; +} + + + +/* end helper functions */ + +int is_move_valid(int board[], int player, int face_value, int move[]) { + int from_idx = move[0]; + int to_idx = move[1]; + int to_state; + int from_state = board[from_idx]; + int delta = to_idx - from_idx; + int direction = sign(delta); + int bearing_off; + + if (to_idx >= 1 && to_idx <= 24) { + to_state = board[to_idx]; + bearing_off = 0; + } else { + to_state = 0; + bearing_off = 1; + } + + return is_forward_move(direction, player) + && face_value_match_move_length(delta, face_value) + && bear_in_if_checker_on_bar(board, player, from_idx) + && checkers_at_from_idx(from_state, player) + && no_block_at_to_idx(to_state, player) + && (!bearing_off || can_bear_off(board, player, from_idx, to_idx)) + ; +} + +void do_move(int board[], int player, int move[]) { + int from_idx = move[0]; + int to_idx = move[1]; + + /* "lift" checker */ + board[from_idx] -= player; + + /* Return early if bearing off */ + if (to_idx < 1 || to_idx > 24) return; + + /* Hit opponent checker */ + if (board[to_idx] * player == -1) { + /* Move checker to bar */ + if (player == 1) board[25] -= player; + else board[0] -= player; + + board[to_idx] = 0; + } + + /* Put down checker */ + board[to_idx] += player; + + return; +} + +int* do_move_clone(int board[], int player, int move[]) { + int* new_board = malloc(sizeof(int) * 26); + if (new_board == NULL) { + PyErr_NoMemory(); + abort(); + } + + for (int i = 0; i < 26; i++) { + new_board[i] = board[i]; + } + + do_move(new_board, player, move); + return new_board; +} + +PyObject* store_board_to_pytuple(int board[], int size) { + PyObject* board_tuple = PyTuple_New(size); + for (int i = 0; i < size; i++) { + PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i])); + } + return board_tuple; +} + +board_list calc_moves(int board[], int player, int face_value) { + int* checker_idxs = idxs_with_checkers_of_player(board, player); + board_list boards = { .size = 0 }; + + if (checker_idxs[0] == 0) { + boards.size = 1; + PyObject* board_tuple = store_board_to_pytuple(board, 26); + boards.list[0] = board_tuple; + free(checker_idxs); + return boards; + } + + int ctr = 0; + for (int i = 1; i <= checker_idxs[0]; i++) { + int move[2]; + move[0] = checker_idxs[i]; + move[1] = checker_idxs[i] + (face_value * player); + + if (is_move_valid(board, player, face_value, move)) { + int* new_board = do_move_clone(board, player, move); + PyObject* board_tuple = store_board_to_pytuple(new_board, 26); + + // segfault maybe :'( + free(new_board); + + boards.list[ctr] = board_tuple; + ctr++; + } + } + + free(checker_idxs); + + boards.size = ctr; + return boards; +} + +int* board_features_quack_fat(int board[], int player) { + int* new_board = malloc(sizeof(int) * 30); + if (new_board == NULL) { + PyErr_NoMemory(); + abort(); + } + + int pos_sum = 0; + int neg_sum = 0; + for (int i = 0; i < 26; i++) { + new_board[i] = board[i]; + if (sign(new_board[i] > 0)) pos_sum += new_board[i]; + else neg_sum += new_board[i]; + } + + new_board[26] = 15 - pos_sum; + new_board[27] = -15 - neg_sum; + if (player == 1) { + new_board[28] = 1; + new_board[29] = 0; + } else { + new_board[28] = 0; + new_board[29] = 1; + } + + return new_board; +} + +/* Meta definitions */ +int extract_board(int *board, PyObject* board_tuple_obj) { + long numValuesBoard; + numValuesBoard = PyTuple_Size(board_tuple_obj); + if (numValuesBoard != 26) { + PyErr_SetString(QuackError, "Board tuple must have 26 entries"); + return 1; + } + + PyObject* board_val_obj; + // Iterate over tuple to retreive positions + for (int i=0; i Alexander og jeg skrev noget af vores bachelorprojekt om til C her i fredags. + Man skal virkelig passe på sine hukommelsesallokeringer. + Ja, helt klart. + Jeg fandt et memory leak, der lækkede 100 MiB hukommelse i sekundet. + Hvilken del blev C-ificeret? + Damned + Årsagen var at vi gav et objekt med tilbage til Python uden at dekrementere dets ref-count, så fortolkeren stadig troede at nogen havde brug for det. + Den del af spillogikken, der tjekker om træk er gyldige. + Det bliver kaldt ret mange tusinde gange pr. spil, så vi tænkte at der måske kunne være lidt optimering at hente i at omskrive det til C. + Ok, så I har ikke selv brugt alloc og free. Det er alligevel noget. + Metoden selv blev 7 gange hurtigere! + Wow! + Jo. Det endte vi også med at gøre. + Vi havde brug for lister af variabel størrelse. Det endte med en struct med et "size" felt og et "list" felt. + Inkluderer det speedup, frem og tilbagen mellem C og python? + Det burde det gøre, ja! + Gjorde det nogen stor effekt for hvor hurtigt I kan evaluere? + Jeg tror ikke at der er særligt meget "frem og tilbage"-stads. Det ser ud til at det kode man skriver bliver kastet ret direkte ind i fortolkeren. + Det gjorde en stor forskel for når vi laver 1-ply. + "ply" er hvor mange træk man kigger fremad. + Så kun at kigge på det umiddelbart næste træk er 0-ply, hvilket er det vi har gjort indtil nu + 1-ply var for langsomt. Det tog ca. 6-7 sekunder at evaluere ét træk. + Alexander lavede lidt omskrivninger, så TensorFlow udregnede det hurtigere og fik det ned på ca. 3-4 sekunder *pr. spil*. + Så skrev vi noget af det om til C, og nu er vi så på ca. 2 sekunder pr. spil med 1-ply, hvilket er ret vildt. + Det er så godt at Python-fortolkeren kan udvides med C! + caspervk, kan I optimere jeres bachelorprojekt med et par C-moduler? + Det er en hel lille sektion til rapporten det der. + Yeah. Kopierer bare det her verbatim ind. diff --git a/requirements.txt b/requirements.txt index e7ac94a..2738d5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,8 +16,8 @@ pyparsing==2.2.0 python-dateutil==2.7.2 pytz==2018.3 six==1.11.0 -tensorboard==1.6.0 -tensorflow==1.6.0 +tensorboard==1.8.0 +tensorflow==1.8.0 termcolor==1.1.0 Werkzeug==0.14.1 pygame==1.9.3 diff --git a/tensorflow_impl_tests/eager_main.py b/tensorflow_impl_tests/eager_main.py index 1b58abc..0cce81f 100644 --- a/tensorflow_impl_tests/eager_main.py +++ b/tensorflow_impl_tests/eager_main.py @@ -1,41 +1,94 @@ import time import numpy as np import tensorflow as tf +from board import Board +import tensorflow.contrib.eager as tfe + tf.enable_eager_execution() +xavier_init = tf.contrib.layers.xavier_initializer() +opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1) + output_size = 1 hidden_size = 40 input_size = 30 - model = tf.keras.Sequential([ - tf.keras.layers.Dense(40, activation="sigmoid", input_shape=(1,30)), - tf.keras.layers.Dense(1, activation="sigmoid") + tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)), + tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2)) ]) -input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0] -all_input = np.array([input for _ in range(8500)]) +# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./")) -single_in = np.array(input).reshape(1,-1) +input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0] + + + +all_input = np.array([Board.board_features_quack_fat(input, 1) for _ in range(20)]) + + +single_in = Board.board_features_quack_fat(input, 1) start = time.time() all_predictions = model.predict_on_batch(all_input) -print(all_predictions) -print(time.time() - start) + +learning_rate = 0.1 + +with tf.GradientTape() as tape: + value = model(single_in) +print("Before:", value) -start = time.time() -all_predictions = [model(single_in) for _ in range(8500)] +grads = tape.gradient(value, model.variables) +print("/"*40,"model_variables","/"*40) +print(model.variables) +print("/"*40,"grads","/"*40) +print(grads) -print(all_predictions[:10]) -print(time.time() - start) +difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), []) +for grad, train_var in zip(grads, model.variables): + backprop_calc = 0.1 * difference_in_values * grad + train_var.assign_add(backprop_calc) + +value = model(single_in) +print("/"*40,"model_variables","/"*40) +print(model.variables) +print("After:", value) + + +# # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)] +# +# # print(model.variables[0][0]) +# weights_before = model.weights[0] +# +# start = time.time() +# #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)] +# +# start = time.time() +# for gradient, trainable_var in zip(grads, model.variables): +# backprop_calc = 0.1 * (0.9 - val) * gradient +# trainable_var.assign_add(backprop_calc) +# +# # opt.apply_gradients(zip(grads, model.variables)) +# +# print(time.time() - start) +# +# print(model(single_in)) +# +# vals = model.predict_on_batch(all_input) +# vals = list(vals) +# vals[3] = 4 +# print(vals) +# print(np.argmax(np.array(vals))) + +# tfe.Saver(model.variables).save("./tmp_ckpt") diff --git a/tensorflow_impl_tests/normal_main.py b/tensorflow_impl_tests/normal_main.py index acfc044..a8b106c 100644 --- a/tensorflow_impl_tests/normal_main.py +++ b/tensorflow_impl_tests/normal_main.py @@ -16,9 +16,9 @@ class Everything: W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size), - initializer=xavier_init) + initializer=tf.constant_initializer(-2)) W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size), - initializer=xavier_init) + initializer=tf.constant_initializer(0.2)) b_1 = tf.get_variable("b_1", (self.hidden_size,), initializer=tf.zeros_initializer) @@ -29,16 +29,37 @@ class Everything: self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') + apply_gradients = [] + + + trainable_vars = tf.trainable_variables() + gradients = tf.gradients(self.value, trainable_vars) + + difference_in_values = tf.reshape(tf.subtract(0.9, self.value, name='difference_in_values'), []) + + with tf.variable_scope('apply_gradients'): + for gradient, trainable_var in zip(gradients, trainable_vars): + backprop_calc = 0.1 * difference_in_values * gradient + grad_apply = trainable_var.assign_add(backprop_calc) + apply_gradients.append(grad_apply) + + + self.training_op = tf.group(*apply_gradients, name='training_op') + + + def eval(self): input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0]) start = time.time() sess = tf.Session() sess.run(tf.global_variables_initializer()) - for i in range(8500): + for i in range(20): val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)}) print(time.time() - start) print(val) - + sess.run(self.training_op, feed_dict={self.input: input.reshape(1,-1)}) + val = sess.run(self.value, feed_dict={self.input: input.reshape(1, -1)}) + print(val) everything = Everything() everything.eval() diff --git a/test.py b/test.py index 6c9c130..90cea23 100644 --- a/test.py +++ b/test.py @@ -141,6 +141,56 @@ class TestIsMoveValid(unittest.TestCase): # TODO: More tests for bearing off are needed + def test_bear_off_non_backmost(self): + board = ( 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, + 0 ) + self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), True) + self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True) + self.assertEqual(Board.is_move_valid(board, 1, 2, (24, 26)), False) + + def test_bear_off_quadrant_limits_white(self): + board = ( 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, + 0 ) + self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), False) + self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), False) + + def test_bear_off_quadrant_limits_black(self): + board = ( 0, + -1, -1, -1, -1, -1, -1, + -1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0 ) + self.assertEqual(Board.is_move_valid(board, -1, 2, (2, 0)), False) + self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), False) + + def test_bear_off_quadrant_limits_white_2(self): + board = ( 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 1, + 0 ) + self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True) + + def test_bear_off_quadrant_limits_black_2(self): + board = ( 0, + -1, 0, 0, 0, 0, -1, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0 ) + self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), True) + + class TestNumOfChecker(unittest.TestCase): def test_simple_1(self): board = ( 0,