From 9a2d87516e68f687bea4dd53251cc12e70d52713 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Wed, 9 May 2018 00:33:05 +0200 Subject: [PATCH 01/29] Ongoing rewrite of network to use an eager model. We're now capable of evaluating a list of states with network.py. We can also save and restore models. --- network.py | 128 ++++++++++++--------------- network_test.py | 26 +++--- tensorflow_impl_tests/eager_main.py | 44 +++++++-- tensorflow_impl_tests/normal_main.py | 20 ++++- 4 files changed, 127 insertions(+), 91 deletions(-) diff --git a/network.py b/network.py index 84802e3..4f63b75 100644 --- a/network.py +++ b/network.py @@ -8,6 +8,7 @@ import random from eval import Eval import glob from operator import itemgetter +import tensorflow.contrib.eager as tfe class Network: # board_features_quack has size 28 @@ -25,6 +26,10 @@ class Network: return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) def __init__(self, config, name): + tf.enable_eager_execution() + + xavier_init = tf.contrib.layers.xavier_initializer() + self.config = config self.checkpoint_path = os.path.join(config['model_storage_path'], config['model']) @@ -38,17 +43,7 @@ class Network: self.hidden_size = 40 self.max_learning_rate = 0.1 self.min_learning_rate = 0.001 - - self.global_step = tf.Variable(0, trainable=False, name="global_step") - self.learning_rate = tf.maximum(self.min_learning_rate, - tf.train.exponential_decay(self.max_learning_rate, - self.global_step, 50000, - 0.96, - staircase=True), - name="learning_rate") - - - + self.global_step = "lol" # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): @@ -57,62 +52,61 @@ class Network: else: self.episodes_trained = 0 - self.x = tf.placeholder('float', [1, self.input_size], name='input') - self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next") - xavier_init = tf.contrib.layers.xavier_initializer() - - W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size), - initializer=xavier_init) - W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size), - initializer=xavier_init) - - b_1 = tf.get_variable("b_1", (self.hidden_size,), - initializer=tf.zeros_initializer) - b_2 = tf.get_variable("b_2", (self.output_size,), - initializer=tf.zeros_initializer) + self.model = tf.keras.Sequential([ + tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, + input_shape=(1,30)), + tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init) + ]) - value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer') - self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') - # TODO: Alexander thinks that self.value will be computed twice (instead of once) - difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), []) + + def do_backprop(self, prev_state, value_next): + self.learning_rate = tf.maximum(self.min_learning_rate, + tf.train.exponential_decay(self.max_learning_rate, + self.global_step, 50000, + 0.96, + staircase=True), + name="learning_rate") + + + with tf.GradientTape() as tape: + value = self.model(np.array(input).reshape(1, -1)) + grads = tape.gradient(value, self.model.variables) + + difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) - trainable_vars = tf.trainable_variables() - gradients = tf.gradients(self.value, trainable_vars) - - apply_gradients = [] - global_step_op = self.global_step.assign_add(1) - with tf.variable_scope('apply_gradients'): - for gradient, trainable_var in zip(gradients, trainable_vars): - backprop_calc = self.learning_rate * difference_in_values * gradient - grad_apply = trainable_var.assign_add(backprop_calc) - apply_gradients.append(grad_apply) + for grad, train_var in zip(grads, self.model.variables): + backprop_calc = self.learning_rate * difference_in_values * grad + train_var.assign_add(backprop_calc) - - with tf.control_dependencies([global_step_op]): - self.training_op = tf.group(*apply_gradients, name='training_op') - - self.saver = tf.train.Saver(max_to_keep=1) def eval_state(self, sess, state): return sess.run(self.value, feed_dict={self.x: state}) - def save_model(self, sess, episode_count, global_step): - self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step) - with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: - print("[NETWK] ({name}) Saving model to:".format(name=self.name), - os.path.join(self.checkpoint_path, 'model.ckpt')) - f.write(str(episode_count) + "\n") + def save_model(self, episode_count, global_step): + tfe.Saver(self.model.variables).save("./tmp_ckpt", global_step=global_step) + #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step) + #with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: + # print("[NETWK] ({name}) Saving model to:".format(name=self.name), + # os.path.join(self.checkpoint_path, 'model.ckpt')) + # f.write(str(episode_count) + "\n") - def restore_model(self, sess): + + def calc_vals(self, states): + values = self.model.predict_on_batch(states) + self.save_model(0, 432) + return values + + + def restore_model(self): """ Restore a model for a session, such that a trained model and either be further trained or used for evaluation @@ -126,35 +120,29 @@ class Network: latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) print("[NETWK] ({name}) Restoring model from:".format(name=self.name), str(latest_checkpoint)) - self.saver.restore(sess, latest_checkpoint) - variables_names = [v.name for v in tf.trainable_variables()] - values = sess.run(variables_names) - for k, v in zip(variables_names, values): - print("Variable: ", k) - print("Shape: ", v.shape) - print(v) + tfe.Saver(model.variables).restore(latest_checkpoint) + + variables_names = [v.name for v in self.model.variables] + # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): with open(episode_count_path, 'r') as f: self.config['start_episode'] = int(f.read()) - elif self.config['use_baseline'] and glob.glob(os.path.join(os.path.join(self.config['model_storage_path'], "baseline_model"), 'model.ckpt*.index')): - checkpoint_path = os.path.join(self.config['model_storage_path'], "baseline_model") - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path) + else: + latest_checkpoint = tf.train.latest_checkpoint("./") print("[NETWK] ({name}) Restoring model from:".format(name=self.name), str(latest_checkpoint)) - self.saver.restore(sess, latest_checkpoint) + tfe.Saver(self.model.variables).restore(latest_checkpoint) - variables_names = [v.name for v in tf.trainable_variables()] - values = sess.run(variables_names) - for k, v in zip(variables_names, values): - print("Variable: ", k) - print("Shape: ", v.shape) - print(v) - elif not self.config['force_creation']: - print("You need to have baseline_model inside models") - exit() + #variables_names = [v.name for v in self.model.variables] + + # Restore trained episode count for model + #episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") + #if os.path.isfile(episode_count_path): + # with open(episode_count_path, 'r') as f: + # self.config['start_episode'] = int(f.read()) def make_move(self, sess, board, roll, player): diff --git a/network_test.py b/network_test.py index a514dfc..58fec8a 100644 --- a/network_test.py +++ b/network_test.py @@ -11,12 +11,10 @@ import main config = main.config.copy() config['model'] = "tesauro_blah" config['force_creation'] = True +config['board_representation'] = 'quack-fat' network = Network(config, config['model']) -session = tf.Session() - -session.run(tf.global_variables_initializer()) -network.restore_model(session) +network.restore_model() initial_state = Board.initial_state initial_state_1 = ( 0, @@ -51,14 +49,7 @@ def gen_21_rolls(): return a -def calc_all_scores(board, player): - scores = [] - trans_board = network.board_trans_func(board, player) - rolls = gen_21_rolls() - for roll in rolls: - score = network.eval_state(session, trans_board) - scores.append(score) - return scores + def calculate_possible_states(board): @@ -83,9 +74,16 @@ def calculate_possible_states(board): #print("-"*30) #print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1)) +board = network.board_trans_func(Board.initial_state, 1) + +input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0] +all_input = np.array([input for _ in range(20)]) +print(network.calc_vals(all_input)) + + #print(" "*10 + "network_test") -print(" "*20 + "Depth 1") -print(network.calc_n_ply(2, session, Board.initial_state, 1, [2, 4])) +#print(" "*20 + "Depth 1") +#print(network.calc_n_ply(1, session, Board.initial_state, 1, [2, 4])) #print(scores) diff --git a/tensorflow_impl_tests/eager_main.py b/tensorflow_impl_tests/eager_main.py index 1b58abc..b2da143 100644 --- a/tensorflow_impl_tests/eager_main.py +++ b/tensorflow_impl_tests/eager_main.py @@ -1,25 +1,32 @@ import time import numpy as np import tensorflow as tf +import tensorflow.contrib.eager as tfe + tf.enable_eager_execution() +xavier_init = tf.contrib.layers.xavier_initializer() +opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1) + output_size = 1 hidden_size = 40 input_size = 30 - model = tf.keras.Sequential([ - tf.keras.layers.Dense(40, activation="sigmoid", input_shape=(1,30)), - tf.keras.layers.Dense(1, activation="sigmoid") + tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, input_shape=(1,input_size)), + tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init) ]) +#tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./")) + input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0] -all_input = np.array([input for _ in range(8500)]) +all_input = np.array([input for _ in range(20)]) + single_in = np.array(input).reshape(1,-1) @@ -34,8 +41,33 @@ print(time.time() - start) start = time.time() -all_predictions = [model(single_in) for _ in range(8500)] +all_predictions = [model(single_in) for _ in range(20)] -print(all_predictions[:10]) +#print(all_predictions[:10]) print(time.time() - start) +print("-"*30) +with tf.GradientTape() as tape: + val = model(np.array(input).reshape(1,-1)) +grads = tape.gradient(val, model.variables) + +grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)] + +# print(model.variables[0][0]) +weights_before = model.weights[0] + +start = time.time() +#[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)] + +start = time.time() +#for gradient, trainable_var in zip(grads, model.variables): +# backprop_calc = 0.1 * (val - np.random.uniform(-1, 1)) * gradient +# trainable_var.assign_add(backprop_calc) + +opt.apply_gradients(zip(grads, model.variables)) + +print(time.time() - start) + +print(model(np.array(input).reshape(1,-1))) + +tfe.Saver(model.variables).save("./tmp_ckpt") diff --git a/tensorflow_impl_tests/normal_main.py b/tensorflow_impl_tests/normal_main.py index acfc044..865f017 100644 --- a/tensorflow_impl_tests/normal_main.py +++ b/tensorflow_impl_tests/normal_main.py @@ -29,12 +29,30 @@ class Everything: self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') + apply_gradients = [] + + + trainable_vars = tf.trainable_variables() + gradients = tf.gradients(self.value, trainable_vars) + + + with tf.variable_scope('apply_gradients'): + for gradient, trainable_var in zip(gradients, trainable_vars): + backprop_calc = self.learning_rate * difference_in_values * gradient + grad_apply = trainable_var.assign_add(backprop_calc) + apply_gradients.append(grad_apply) + + with tf.control_dependencies([global_step_op]): + self.training_op = tf.group(*apply_gradients, name='training_op') + + + def eval(self): input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0]) start = time.time() sess = tf.Session() sess.run(tf.global_variables_initializer()) - for i in range(8500): + for i in range(20): val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)}) print(time.time() - start) print(val) From cb7e7b519c0123c0cf4c0dee41de3b3f00305e04 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Wed, 9 May 2018 22:22:12 +0200 Subject: [PATCH 02/29] Getting closer to functionality. We're capable of evaluating moves and a rework of global_step has begun, such that we now use episode_count as a way of calculating exp_decay, which have been implemented as a function. --- network.py | 73 ++++++++++++++++------------ network_test.py | 16 ++++-- tensorflow_impl_tests/eager_main.py | 33 ++++++++----- tensorflow_impl_tests/normal_main.py | 11 +++-- 4 files changed, 82 insertions(+), 51 deletions(-) diff --git a/network.py b/network.py index 4f63b75..818b886 100644 --- a/network.py +++ b/network.py @@ -43,7 +43,10 @@ class Network: self.hidden_size = 40 self.max_learning_rate = 0.1 self.min_learning_rate = 0.001 - self.global_step = "lol" + + self.global_step = tf.train.get_or_create_global_step() + + #tf.train.get_or_create_global_step() # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): @@ -62,47 +65,48 @@ class Network: + def exp_decay(self, max_lr, epi_counter, decay_rate, decay_steps): + res = max_lr * decay_rate**(epi_counter // decay_steps) + return res def do_backprop(self, prev_state, value_next): + self.learning_rate = tf.maximum(self.min_learning_rate, - tf.train.exponential_decay(self.max_learning_rate, - self.global_step, 50000, - 0.96, - staircase=True), - name="learning_rate") - + self.exp_decay(self.max_learning_rate, self.episodes_trained, 0.96, 50000), + name="learning_rate") + # self.learning_rate = 0.1 + print(tf.train.get_global_step()) with tf.GradientTape() as tape: - value = self.model(np.array(input).reshape(1, -1)) + value = self.model(prev_state.reshape(1,-1)) grads = tape.gradient(value, self.model.variables) difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) - global_step_op = self.global_step.assign_add(1) + # global_step_op = self.global_step.assign_add(1) with tf.variable_scope('apply_gradients'): for grad, train_var in zip(grads, self.model.variables): backprop_calc = self.learning_rate * difference_in_values * grad train_var.assign_add(backprop_calc) - + print(self.episodes_trained) def eval_state(self, sess, state): return sess.run(self.value, feed_dict={self.x: state}) def save_model(self, episode_count, global_step): - tfe.Saver(self.model.variables).save("./tmp_ckpt", global_step=global_step) + tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=self.global_step) #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step) - #with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: - # print("[NETWK] ({name}) Saving model to:".format(name=self.name), - # os.path.join(self.checkpoint_path, 'model.ckpt')) - # f.write(str(episode_count) + "\n") + with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: + print("[NETWK] ({name}) Saving model to:".format(name=self.name), + os.path.join(self.checkpoint_path, 'model.ckpt')) + f.write(str(episode_count) + "\n") def calc_vals(self, states): values = self.model.predict_on_batch(states) - self.save_model(0, 432) return values @@ -120,9 +124,9 @@ class Network: latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) print("[NETWK] ({name}) Restoring model from:".format(name=self.name), str(latest_checkpoint)) - tfe.Saver(model.variables).restore(latest_checkpoint) + tfe.Saver(self.model.variables).restore(latest_checkpoint) - variables_names = [v.name for v in self.model.variables] + # variables_names = [v.name for v in self.model.variables] # Restore trained episode count for model @@ -130,11 +134,11 @@ class Network: if os.path.isfile(episode_count_path): with open(episode_count_path, 'r') as f: self.config['start_episode'] = int(f.read()) - else: - latest_checkpoint = tf.train.latest_checkpoint("./") - print("[NETWK] ({name}) Restoring model from:".format(name=self.name), - str(latest_checkpoint)) - tfe.Saver(self.model.variables).restore(latest_checkpoint) + # else: + # latest_checkpoint = tf.train.latest_checkpoint("./") + # print("[NETWK] ({name}) Restoring model from:".format(name=self.name), + # str(latest_checkpoint)) + # tfe.Saver(self.model.variables).restore(latest_checkpoint) #variables_names = [v.name for v in self.model.variables] @@ -143,9 +147,9 @@ class Network: #if os.path.isfile(episode_count_path): # with open(episode_count_path, 'r') as f: # self.config['start_episode'] = int(f.read()) + tf.train.get_or_create_global_step() - - def make_move(self, sess, board, roll, player): + def make_move(self, board, roll, player): """ Find the best move given a board, roll and a player, by finding all possible states one can go to and then picking the best, by using the network to evaluate each state. The highest score is picked @@ -157,12 +161,19 @@ class Network: :param player: Current player :return: A pair of the best state to go to, together with the score of that state """ - legal_moves = Board.calculate_legal_states(board, player, roll) - moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves] - scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores] - best_score_index = np.array(scores).argmax() - best_move_pair = moves_and_scores[best_score_index] - return best_move_pair + legal_states = list(Board.calculate_legal_states(board, player, roll)) + legal_states = [list(tmp) for tmp in legal_states] + legal_states = np.array([Board.board_features_quack_fat(tmp, player)[0] for tmp in legal_states]) + legal_moves = [self.board_trans_func(board, player) for board in Board.calculate_legal_states(board, player, roll)] + + scores = self.model.predict_on_batch(legal_states) + transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores] + + best_score_idx = np.argmax(np.array(transformed_scores)) + best_move = legal_moves[best_score_idx] + best_score = scores[best_score_idx] + self.episodes_trained += 1 + return [best_move, best_score] def make_move_n_ply(self, sess, board, roll, player, n = 1): best_pair = self.calc_n_ply(n, sess, board, player, roll) diff --git a/network_test.py b/network_test.py index 58fec8a..5fb6d6e 100644 --- a/network_test.py +++ b/network_test.py @@ -9,7 +9,7 @@ from board import Board import main config = main.config.copy() -config['model'] = "tesauro_blah" +config['model'] = "eager_testings" config['force_creation'] = True config['board_representation'] = 'quack-fat' network = Network(config, config['model']) @@ -75,10 +75,18 @@ def calculate_possible_states(board): #print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1)) board = network.board_trans_func(Board.initial_state, 1) +#print(board) -input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0] -all_input = np.array([input for _ in range(20)]) -print(network.calc_vals(all_input)) +pair = network.make_move(Board.initial_state, [3,2], 1) + +print(pair[1]) + +network.do_backprop(board, 0.9) + +network.save_model(2, 342) + +# all_input = np.array([input for _ in range(20)]) +# print(network.calc_vals(all_input)) #print(" "*10 + "network_test") diff --git a/tensorflow_impl_tests/eager_main.py b/tensorflow_impl_tests/eager_main.py index b2da143..f68f65f 100644 --- a/tensorflow_impl_tests/eager_main.py +++ b/tensorflow_impl_tests/eager_main.py @@ -1,6 +1,7 @@ import time import numpy as np import tensorflow as tf +from board import Board import tensorflow.contrib.eager as tfe @@ -23,12 +24,14 @@ model = tf.keras.Sequential([ #tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./")) -input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0] - -all_input = np.array([input for _ in range(20)]) +input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0] -single_in = np.array(input).reshape(1,-1) + +all_input = np.array([Board.board_features_quack_fat(input, 1) for _ in range(20)]) + + +single_in = Board.board_features_quack_fat(input, 1) start = time.time() @@ -48,10 +51,10 @@ print(time.time() - start) print("-"*30) with tf.GradientTape() as tape: - val = model(np.array(input).reshape(1,-1)) + val = model(single_in) grads = tape.gradient(val, model.variables) -grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)] +# grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)] # print(model.variables[0][0]) weights_before = model.weights[0] @@ -60,14 +63,20 @@ start = time.time() #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)] start = time.time() -#for gradient, trainable_var in zip(grads, model.variables): -# backprop_calc = 0.1 * (val - np.random.uniform(-1, 1)) * gradient -# trainable_var.assign_add(backprop_calc) +for gradient, trainable_var in zip(grads, model.variables): + backprop_calc = 0.1 * (0.9 - val) * gradient + trainable_var.assign_add(backprop_calc) -opt.apply_gradients(zip(grads, model.variables)) +# opt.apply_gradients(zip(grads, model.variables)) print(time.time() - start) -print(model(np.array(input).reshape(1,-1))) +print(model(single_in)) -tfe.Saver(model.variables).save("./tmp_ckpt") +vals = model.predict_on_batch(all_input) +vals = list(vals) +vals[3] = 4 +print(vals) +print(np.argmax(np.array(vals))) + +# tfe.Saver(model.variables).save("./tmp_ckpt") diff --git a/tensorflow_impl_tests/normal_main.py b/tensorflow_impl_tests/normal_main.py index 865f017..8e3887d 100644 --- a/tensorflow_impl_tests/normal_main.py +++ b/tensorflow_impl_tests/normal_main.py @@ -35,15 +35,16 @@ class Everything: trainable_vars = tf.trainable_variables() gradients = tf.gradients(self.value, trainable_vars) + difference_in_values = tf.reshape(tf.subtract(0.9, self.value, name='difference_in_values'), []) with tf.variable_scope('apply_gradients'): for gradient, trainable_var in zip(gradients, trainable_vars): - backprop_calc = self.learning_rate * difference_in_values * gradient + backprop_calc = 0.1 * difference_in_values * gradient grad_apply = trainable_var.assign_add(backprop_calc) apply_gradients.append(grad_apply) - with tf.control_dependencies([global_step_op]): - self.training_op = tf.group(*apply_gradients, name='training_op') + + self.training_op = tf.group(*apply_gradients, name='training_op') @@ -56,7 +57,9 @@ class Everything: val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)}) print(time.time() - start) print(val) - + sess.run(self.training_op, feed_dict={self.input: input.reshape(1,-1)}) + val = sess.run(self.value, feed_dict={self.input: input.reshape(1, -1)}) + print(val) everything = Everything() everything.eval() From 6429e0732c7606a60ea5d8fb4765c5e98c6180cc Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Wed, 9 May 2018 23:15:35 +0200 Subject: [PATCH 03/29] We should now be able to both train and eval as per usual. I've added a file "global_step", which works as the new global_step counter, so we can use it for exp_decay. --- main.py | 14 +++--- network.py | 144 ++++++++++++++++++++++++----------------------------- 2 files changed, 73 insertions(+), 85 deletions(-) diff --git a/main.py b/main.py index e2e8988..eb73a98 100644 --- a/main.py +++ b/main.py @@ -60,7 +60,8 @@ config = { 'bench_storage_path': 'bench', 'board_representation': args.board_rep, 'force_creation': args.force_creation, - 'use_baseline': args.use_baseline + 'use_baseline': args.use_baseline, + 'global_step': 0 } # Create models folder @@ -191,7 +192,7 @@ if __name__ == "__main__": episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000, 10000, 20000] - def do_eval(sess): + def do_eval(): for eval_method in config['eval_methods']: result_path = os.path.join(config['bench_storage_path'], eval_method) + "-{}.log".format(int(time.time())) @@ -199,8 +200,7 @@ if __name__ == "__main__": for i in range(sample_count): start_time = time.time() # Evaluation measure to be benchmarked are described in `config` - outcomes = network.eval(episode_count = n, - tf_session = sess) + outcomes = network.eval(episode_count = n) time_diff = time.time() - start_time log_bench_eval_outcomes(outcomes, time = time_diff, @@ -210,8 +210,8 @@ if __name__ == "__main__": # CMM: oh no import tensorflow as tf - with tf.Session() as session: - network.restore_model(session) - do_eval(session) + + network.restore_model() + do_eval() diff --git a/network.py b/network.py index 818b886..d1d4a42 100644 --- a/network.py +++ b/network.py @@ -44,8 +44,6 @@ class Network: self.max_learning_rate = 0.1 self.min_learning_rate = 0.001 - self.global_step = tf.train.get_or_create_global_step() - #tf.train.get_or_create_global_step() # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") @@ -55,10 +53,18 @@ class Network: else: self.episodes_trained = 0 + global_step_path = os.path.join(self.checkpoint_path, "global_step") + if os.path.isfile(global_step_path): + with open(global_step_path, 'r') as f: + self.global_step = int(f.read()) + else: + self.global_step = 0 + + self.model = tf.keras.Sequential([ tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, - input_shape=(1,30)), + input_shape=(1,self.input_size)), tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init) ]) @@ -72,11 +78,10 @@ class Network: def do_backprop(self, prev_state, value_next): self.learning_rate = tf.maximum(self.min_learning_rate, - self.exp_decay(self.max_learning_rate, self.episodes_trained, 0.96, 50000), + self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000), name="learning_rate") - # self.learning_rate = 0.1 - print(tf.train.get_global_step()) + with tf.GradientTape() as tape: value = self.model(prev_state.reshape(1,-1)) grads = tape.gradient(value, self.model.variables) @@ -91,19 +96,24 @@ class Network: backprop_calc = self.learning_rate * difference_in_values * grad train_var.assign_add(backprop_calc) - print(self.episodes_trained) - def eval_state(self, sess, state): - return sess.run(self.value, feed_dict={self.x: state}) - def save_model(self, episode_count, global_step): - tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=self.global_step) + def eval_state(self, state): + return self.model(state.reshape(1,-1)) + + def save_model(self, episode_count): + tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt')) #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step) with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: print("[NETWK] ({name}) Saving model to:".format(name=self.name), os.path.join(self.checkpoint_path, 'model.ckpt')) f.write(str(episode_count) + "\n") + with open(os.path.join(self.checkpoint_path, "global_step"), 'w+') as f: + print("[NETWK] ({name}) Saving global step to:".format(name=self.name), + os.path.join(self.checkpoint_path, 'model.ckpt')) + f.write(str(self.global_step) + "\n") + def calc_vals(self, states): values = self.model.predict_on_batch(states) @@ -134,20 +144,14 @@ class Network: if os.path.isfile(episode_count_path): with open(episode_count_path, 'r') as f: self.config['start_episode'] = int(f.read()) - # else: - # latest_checkpoint = tf.train.latest_checkpoint("./") - # print("[NETWK] ({name}) Restoring model from:".format(name=self.name), - # str(latest_checkpoint)) - # tfe.Saver(self.model.variables).restore(latest_checkpoint) - #variables_names = [v.name for v in self.model.variables] + global_step_path = os.path.join(self.checkpoint_path, "global_step") + if os.path.isfile(global_step_path): + with open(global_step_path, 'r') as f: + self.config['global_step'] = int(f.read()) + + - # Restore trained episode count for model - #episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") - #if os.path.isfile(episode_count_path): - # with open(episode_count_path, 'r') as f: - # self.config['start_episode'] = int(f.read()) - tf.train.get_or_create_global_step() def make_move(self, board, roll, player): """ @@ -161,10 +165,12 @@ class Network: :param player: Current player :return: A pair of the best state to go to, together with the score of that state """ - legal_states = list(Board.calculate_legal_states(board, player, roll)) - legal_states = [list(tmp) for tmp in legal_states] + legal_moves = list(Board.calculate_legal_states(board, player, roll)) + + legal_states = [list(tmp) for tmp in legal_moves] + legal_states = np.array([Board.board_features_quack_fat(tmp, player)[0] for tmp in legal_states]) - legal_moves = [self.board_trans_func(board, player) for board in Board.calculate_legal_states(board, player, roll)] + scores = self.model.predict_on_batch(legal_states) transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores] @@ -172,7 +178,7 @@ class Network: best_score_idx = np.argmax(np.array(transformed_scores)) best_move = legal_moves[best_score_idx] best_score = scores[best_score_idx] - self.episodes_trained += 1 + return [best_move, best_score] def make_move_n_ply(self, sess, board, roll, player, n = 1): @@ -385,7 +391,7 @@ class Network: return all_rolls_scores - def eval(self, episode_count, trained_eps = 0, tf_session = None): + def eval(self, episode_count, trained_eps = 0): """ Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval a model which has been given random weights, so it acts deterministically random. @@ -396,7 +402,7 @@ class Network: :return: outcomes: The outcomes of the evaluation session """ - def do_eval(sess, method, episodes = 1000, trained_eps = 0): + def do_eval(method, episodes = 1000, trained_eps = 0): """ Do the actual evaluation @@ -433,7 +439,7 @@ class Network: while Board.outcome(board) is None: roll = (random.randrange(1, 7), random.randrange(1, 7)) - board = (self.make_move(sess, board, roll, 1))[0] + board = (self.make_move(board, roll, 1))[0] roll = (random.randrange(1, 7), random.randrange(1, 7)) @@ -456,7 +462,7 @@ class Network: while Board.outcome(board) is None: roll = (random.randrange(1, 7), random.randrange(1, 7)) - board = (self.make_move(sess, board, roll, 1))[0] + board = (self.make_move(board, roll, 1))[0] roll = (random.randrange(1, 7), random.randrange(1, 7)) @@ -475,40 +481,26 @@ class Network: sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) return [0] - if tf_session == None: - with tf.Session() as session: - session.run(tf.global_variables_initializer()) - self.restore_model(session) - outcomes = [ (method, do_eval(session, - method, - episode_count, - trained_eps = trained_eps)) - for method - in self.config['eval_methods'] ] - return outcomes - else: - outcomes = [ (method, do_eval(tf_session, - method, - episode_count, - trained_eps = trained_eps)) - for method - in self.config['eval_methods'] ] - return outcomes + + outcomes = [ (method, do_eval(method, + episode_count, + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ] + return outcomes def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): with tf.Session() as sess: difference_in_vals = 0 - writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph) - sess.run(tf.global_variables_initializer()) - self.restore_model(sess) + self.restore_model() - variables_names = [v.name for v in tf.trainable_variables()] - values = sess.run(variables_names) - for k, v in zip(variables_names, values): - print("Variable: ", k) - print("Shape: ", v.shape) - print(v) + #variables_names = [v.name for v in tf.trainable_variables()] + #values = sess.run(variables_names) + #for k, v in zip(variables_names, values): + # print("Variable: ", k) + # print("Shape: ", v.shape) + # print(v) start_time = time.time() @@ -536,21 +528,21 @@ class Network: i = 0 while Board.outcome(prev_board) is None: i += 1 + self.global_step += 1 - cur_board, cur_board_value = self.make_move(sess, - prev_board, + + cur_board, cur_board_value = self.make_move(prev_board, (random.randrange(1, 7), random.randrange(1, 7)), player) - difference_in_vals += abs((cur_board_value - self.eval_state(sess, self.board_trans_func(prev_board, player)))) + difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player)))) # adjust weights - sess.run(self.training_op, - feed_dict={self.x: self.board_trans_func(prev_board, player), - self.value_next: cur_board_value}) - - player *= -1 + #print(cur_board) + if Board.outcome(cur_board) is None: + self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value) + player *= -1 prev_board = cur_board @@ -560,27 +552,23 @@ class Network: final_score = np.array([Board.outcome(final_board)[1]]) scaled_final_score = ((final_score + 2) / 4) - with tf.name_scope("final"): - merged = tf.summary.merge_all() - global_step, summary, _ = sess.run([self.global_step, merged, self.training_op], - feed_dict={self.x: self.board_trans_func(prev_board, player), - self.value_next: scaled_final_score.reshape((1, 1))}) - writer.add_summary(summary, episode + trained_eps) + + self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) + + sys.stderr.write("\n") if episode % min(save_step_size, episodes) == 0: sys.stderr.write("[TRAIN] Saving model...\n") - self.save_model(sess, episode + trained_eps, global_step) + self.save_model(episode + trained_eps) if episode % 50 == 0: print_time_estimate(episode) sys.stderr.write("[TRAIN] Saving model for final episode...\n") - self.save_model(sess, episode+trained_eps, global_step) - - writer.close() - + self.save_model(episode+trained_eps) + return outcomes, difference_in_vals[0][0] From 9cfdd7e2b272e665488412dea558a6e5a37e1fdd Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 10:39:22 +0200 Subject: [PATCH 04/29] Added a verbosity flag, --verbose, which allows for printing of variables and such. --- main.py | 6 +++++- network.py | 28 ++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index eb73a98..a276220 100644 --- a/main.py +++ b/main.py @@ -38,6 +38,8 @@ parser.add_argument('--board-rep', action='store', dest='board_rep', help='name of board representation to use as input to neural network') parser.add_argument('--use-baseline', action='store_true', help='use the baseline model, note, has size 28') +parser.add_argument('--verbose', action='store_true', + help='If set, a lot of stuff will be printed') args = parser.parse_args() @@ -61,7 +63,9 @@ config = { 'board_representation': args.board_rep, 'force_creation': args.force_creation, 'use_baseline': args.use_baseline, - 'global_step': 0 + 'global_step': 0, + 'verbose': args.verbose + } # Create models folder diff --git a/network.py b/network.py index d1d4a42..56e183b 100644 --- a/network.py +++ b/network.py @@ -98,10 +98,20 @@ class Network: + def print_variables(self): + variables = self.model.variables + + for k in variables: + print(k) + def eval_state(self, state): return self.model(state.reshape(1,-1)) def save_model(self, episode_count): + """ + :param episode_count: + :return: + """ tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt')) #self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step) with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: @@ -113,6 +123,8 @@ class Network: print("[NETWK] ({name}) Saving global step to:".format(name=self.name), os.path.join(self.checkpoint_path, 'model.ckpt')) f.write(str(self.global_step) + "\n") + if self.config['verbose']: + self.print_variables() def calc_vals(self, states): @@ -150,6 +162,8 @@ class Network: with open(global_step_path, 'r') as f: self.config['global_step'] = int(f.read()) + if self.config['verbose']: + self.print_variables() @@ -489,19 +503,13 @@ class Network: in self.config['eval_methods'] ] return outcomes + def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): with tf.Session() as sess: difference_in_vals = 0 self.restore_model() - #variables_names = [v.name for v in tf.trainable_variables()] - #values = sess.run(variables_names) - #for k, v in zip(variables_names, values): - # print("Variable: ", k) - # print("Shape: ", v.shape) - # print(v) - start_time = time.time() def print_time_estimate(eps_completed): @@ -537,9 +545,13 @@ class Network: difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player)))) + if self.config['verbose']: + print("Difference in values:", difference_in_vals) + print("Current board value :", cur_board_value) + print("Current board is :\n",cur_board) + # adjust weights - #print(cur_board) if Board.outcome(cur_board) is None: self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value) player *= -1 From f2a67ca92e4a95e811b3cc01744652481a4828f8 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 10:49:25 +0200 Subject: [PATCH 05/29] All board reps should now work as input. --- board.py | 10 ++++----- network.py | 3 +-- network_test.py | 57 +++---------------------------------------------- 3 files changed, 9 insertions(+), 61 deletions(-) diff --git a/board.py b/board.py index d32197c..56c2737 100644 --- a/board.py +++ b/board.py @@ -40,7 +40,7 @@ class Board: def board_features_quack(board, player): board = list(board) board += ([1, 0] if np.sign(player) > 0 else [0, 1]) - return np.array(board).reshape(1, -1) + return np.array(board).reshape(1,28) # quack-fat @staticmethod @@ -51,7 +51,7 @@ class Board: board.append( 15 - sum(positives)) board.append(-15 - sum(negatives)) board += ([1, 0] if np.sign(player) > 0 else [0, 1]) - return np.array(board).reshape(1,-1) + return np.array(board).reshape(1,30) # quack-fatter @@ -68,7 +68,7 @@ class Board: board.append(15 - sum(positives)) board.append(-15 - sum(negatives)) board += ([1, 0] if np.sign(player) > 0 else [0, 1]) - return np.array(board).reshape(1, -1) + return np.array(board).reshape(1,30) # tesauro @staticmethod @@ -124,9 +124,9 @@ class Board: # Calculate how many pieces there must be in the home state and divide it by 15 features.append((15 - sum) / 15) features += ([1,0] if np.sign(cur_player) > 0 else [0,1]) - test = np.array(features).reshape(1,-1) + test = np.array(features) #print("TEST:",test) - return test + return test.reshape(1,198) diff --git a/network.py b/network.py index 56e183b..d14e1ea 100644 --- a/network.py +++ b/network.py @@ -183,8 +183,7 @@ class Network: legal_states = [list(tmp) for tmp in legal_moves] - legal_states = np.array([Board.board_features_quack_fat(tmp, player)[0] for tmp in legal_states]) - + legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states]) scores = self.model.predict_on_batch(legal_states) transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores] diff --git a/network_test.py b/network_test.py index 5fb6d6e..4f64612 100644 --- a/network_test.py +++ b/network_test.py @@ -36,46 +36,12 @@ boards = {initial_state, initial_state_2 } -def gen_21_rolls(): - """ - Calculate all possible rolls, [[1,1], [1,2] ..] - :return: All possible rolls - """ - a = [] - for x in range(1, 7): - for y in range(1, 7): - if not [x, y] in a and not [y, x] in a: - a.append([x, y]) - - return a -def calculate_possible_states(board): - possible_rolls = [(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), - (1, 6), (2, 2), (2, 3), (2, 4), (2, 5), - (2, 6), (3, 3), (3, 4), (3, 5), (3, 6), - (4, 4), (4, 5), (4, 6), (5, 5), (5, 6), - (6, 6)] - - for roll in possible_rolls: - meh = Board.calculate_legal_states(board, -1, roll) - print(len(meh)) - return [Board.calculate_legal_states(board, -1, roll) - for roll - in possible_rolls] - - - -#for board in boards: -# calculate_possible_states(board) - -#print("-"*30) -#print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1)) - board = network.board_trans_func(Board.initial_state, 1) -#print(board) + pair = network.make_move(Board.initial_state, [3,2], 1) @@ -83,26 +49,9 @@ print(pair[1]) network.do_backprop(board, 0.9) -network.save_model(2, 342) -# all_input = np.array([input for _ in range(20)]) -# print(network.calc_vals(all_input)) +network.print_variables() -#print(" "*10 + "network_test") -#print(" "*20 + "Depth 1") -#print(network.calc_n_ply(1, session, Board.initial_state, 1, [2, 4])) +network.save_model(2) -#print(scores) - -#print(" "*20 + "Depth 2") -#print(network.n_ply(2, session, boards, 1)) - -# #print(x.shape) -# with graph_lol.as_default(): -# session_2 = tf.Session(graph = graph_lol) -# network_2 = Network(session_2) -# network_2.restore_model() -# print(network_2.eval_state(initial_state)) - -# print(network.eval_state(initial_state)) From 4efb229d34746703935830981e931caca128cc78 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 15:28:33 +0200 Subject: [PATCH 06/29] Added a lot of comments --- network.py | 90 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 18 deletions(-) diff --git a/network.py b/network.py index d14e1ea..c46f291 100644 --- a/network.py +++ b/network.py @@ -16,16 +16,21 @@ class Network: # board_features_tesauro has size 198 board_reps = { - 'quack-fat' : (30, Board.board_features_quack_fat), - 'quack' : (28, Board.board_features_quack), - 'tesauro' : (198, Board.board_features_tesauro), - 'quack-norm': (30, Board.board_features_quack_norm) + 'quack-fat' : (30, Board.board_features_quack_fat), + 'quack' : (28, Board.board_features_quack), + 'tesauro' : (198, Board.board_features_tesauro), + 'quack-norm' : (30, Board.board_features_quack_norm), + 'tesauro-poop': (198, Board.board_features_tesauro_wrong) } def custom_tanh(self, x, name=None): return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) def __init__(self, config, name): + """ + :param config: + :param name: + """ tf.enable_eager_execution() xavier_init = tf.contrib.layers.xavier_initializer() @@ -44,7 +49,6 @@ class Network: self.max_learning_rate = 0.1 self.min_learning_rate = 0.001 - #tf.train.get_or_create_global_step() # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): @@ -61,7 +65,6 @@ class Network: self.global_step = 0 - self.model = tf.keras.Sequential([ tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, input_shape=(1,self.input_size)), @@ -69,19 +72,29 @@ class Network: ]) - - - def exp_decay(self, max_lr, epi_counter, decay_rate, decay_steps): - res = max_lr * decay_rate**(epi_counter // decay_steps) + def exp_decay(self, max_lr, global_step, decay_rate, decay_steps): + """ + Calculates the exponential decay on a learning rate + :param max_lr: The learning rate that the network starts at + :param global_step: The global step + :param decay_rate: The rate at which the learning rate should decay + :param decay_steps: The amount of steps between each decay + :return: The result of the exponential decay performed on the learning rate + """ + res = max_lr * decay_rate**(global_step // decay_steps) return res def do_backprop(self, prev_state, value_next): - + """ + Performs the Temporal-difference backpropagation step on the model + :param prev_state: The previous state of the game, this has its value recalculated + :param value_next: The value of the current move + :return: Nothing, the calculation is performed on the model of the network + """ self.learning_rate = tf.maximum(self.min_learning_rate, self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000), name="learning_rate") - with tf.GradientTape() as tape: value = self.model(prev_state.reshape(1,-1)) grads = tape.gradient(value, self.model.variables) @@ -89,8 +102,6 @@ class Network: difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) - # global_step_op = self.global_step.assign_add(1) - with tf.variable_scope('apply_gradients'): for grad, train_var in zip(grads, self.model.variables): backprop_calc = self.learning_rate * difference_in_values * grad @@ -99,16 +110,25 @@ class Network: def print_variables(self): + """ + Prints all the variables of the model + :return: + """ variables = self.model.variables - for k in variables: print(k) def eval_state(self, state): + """ + Evaluates a single state + :param state: + :return: + """ return self.model(state.reshape(1,-1)) def save_model(self, episode_count): """ + Saves the model of the network, it references global_step as self.global_step :param episode_count: :return: """ @@ -128,6 +148,10 @@ class Network: def calc_vals(self, states): + """ + :param states: + :return: + """ values = self.model.predict_on_batch(states) return values @@ -195,6 +219,15 @@ class Network: return [best_move, best_score] def make_move_n_ply(self, sess, board, roll, player, n = 1): + """ + + :param sess: + :param board: + :param roll: + :param player: + :param n: + :return: + """ best_pair = self.calc_n_ply(n, sess, board, player, roll) return best_pair @@ -232,6 +265,15 @@ class Network: return [best_board, max(all_rolls_scores)] def calc_n_ply(self, n_init, sess, board, player, roll): + """ + + :param n_init: + :param sess: + :param board: + :param player: + :param roll: + :return: + """ # find all legal states from the given board and the given roll init_legal_states = Board.calculate_legal_states(board, player, roll) @@ -251,6 +293,14 @@ class Network: def n_ply(self, n_init, sess, boards_init, player_init): + """ + + :param n_init: + :param sess: + :param boards_init: + :param player_init: + :return: + """ def ply(n, boards, player): def calculate_possible_states(board): possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), @@ -504,6 +554,13 @@ class Network: def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): + """ + + :param episodes: + :param save_step_size: + :param trained_eps: + :return: + """ with tf.Session() as sess: difference_in_vals = 0 @@ -563,11 +620,8 @@ class Network: final_score = np.array([Board.outcome(final_board)[1]]) scaled_final_score = ((final_score + 2) / 4) - self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) - - sys.stderr.write("\n") if episode % min(save_step_size, episodes) == 0: From 396d5b036d5fc5350fa9362c2558b74eb49c5e60 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 18:41:21 +0200 Subject: [PATCH 07/29] All values for boards and all rolls can now be calculated --- network.py | 182 +++++++++++++++++++++++++++++------------------- network_test.py | 1 + 2 files changed, 112 insertions(+), 71 deletions(-) diff --git a/network.py b/network.py index c46f291..d84036f 100644 --- a/network.py +++ b/network.py @@ -232,7 +232,7 @@ class Network: return best_pair - def calculate_1_ply(self, sess, board, roll, player): + def calculate_1_ply(self, board, roll, player): """ Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an exhaustive search is performed on the best 15 moves from the single ply. @@ -248,21 +248,122 @@ class Network: # find all legal states from the given board and the given roll init_legal_states = Board.calculate_legal_states(board, player, roll) - # find all values for the above boards - zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states] - # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck. - best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1) + legal_moves = list(Board.calculate_legal_states(board, player, roll)) - best_fifteen_boards = [x[0] for x in best_fifteen[:10]] + legal_states = [list(tmp) for tmp in legal_moves] - all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player) + legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states]) + + scores = self.calc_vals(legal_states) + scores = [score.numpy() for score in scores] + + moves_and_scores = list(zip(init_legal_states, scores)) + + sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1) + + best_boards = [x[0] for x in sorted_moves_and_scores] - best_score_index = np.array(all_rolls_scores).argmax() - best_board = best_fifteen_boards[best_score_index] - return [best_board, max(all_rolls_scores)] + self.do_ply(best_boards, player) + + + #best_score_index = np.array(all_rolls_scores).argmax() + #best_board = best_fifteen_boards[best_score_index] + + #return [best_board, max(all_rolls_scores)] + + def do_ply(self, boards, player): + """ + Calculates a single extra ply, resulting in a larger search space for our best move. + This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than + allowing the function to search deeper, which could result in an even larger search space. If we wish + to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply. + + :param sess: + :param boards: The boards to try all rolls on + :param player: The player of the previous ply + :return: An array of scores where each index describes one of the boards which was given as param + to this function. + """ + + import time + + def gen_21_rolls(): + """ + Calculate all possible rolls, [[1,1], [1,2] ..] + :return: All possible rolls + """ + a = [] + for x in range(1, 7): + for y in range(1, 7): + if not [x, y] in a and not [y, x] in a: + a.append([x, y]) + + return a + + all_rolls = gen_21_rolls() + + all_rolls_scores = [] + + start = time.time() + + list_of_moves = [] + + for idx, board in enumerate(boards): + list_of_moves.append([]) + for roll in all_rolls: + all_states = list(Board.calculate_legal_states(board, player, roll)) + list_of_moves[idx].append(all_states) + + tmp = [] + for board in list_of_moves: + all_board_moves = [] + for roll in board: + for spec in roll: + legal_state = np.array(self.board_trans_func(spec, player)[0]) + all_board_moves.append(legal_state) + tmp.append(np.array(all_board_moves)) + + # print(tmp) + + for board in tmp: + print(self.model.predict_on_batch(board)) + + print(time.time() - start) + + # count = 0 + # # loop over boards + # for a_board in boards: + # a_board_scores = [] + # + # # loop over all rolls, for each board + # for roll in all_rolls: + # + # # find all states we can get to, given the board and roll and the opposite player + # all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) + # count += len(all_rolls_boards) + # # find scores for each board found above + # spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1)) + # for new_board in all_rolls_boards] + # + # # if the original player is the -1 player, then we need to find (1-value) + # spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores] + # + # # find the best score + # best_score = max(spec_roll_scores) + # + # # append the best score to a_board_scores, where we keep track of the best score for each board + # a_board_scores.append(best_score) + # + # # save the expected average of board scores + # all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores)) + # + # # return all the average scores + # print(count) + # return all_rolls_scores + def calc_n_ply(self, n_init, sess, board, player, roll): """ @@ -392,67 +493,6 @@ class Network: best_score_pair = boards_with_scores[np.array(scores).argmax()] return best_score_pair - def do_ply(self, sess, boards, player): - """ - Calculates a single extra ply, resulting in a larger search space for our best move. - This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than - allowing the function to search deeper, which could result in an even larger search space. If we wish - to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply. - - :param sess: - :param boards: The boards to try all rolls on - :param player: The player of the previous ply - :return: An array of scores where each index describes one of the boards which was given as param - to this function. - """ - - def gen_21_rolls(): - """ - Calculate all possible rolls, [[1,1], [1,2] ..] - :return: All possible rolls - """ - a = [] - for x in range(1, 7): - for y in range(1, 7): - if not [x, y] in a and not [y, x] in a: - a.append([x, y]) - - return a - - all_rolls = gen_21_rolls() - - all_rolls_scores = [] - count = 0 - # loop over boards - for a_board in boards: - a_board_scores = [] - - # loop over all rolls, for each board - for roll in all_rolls: - - # find all states we can get to, given the board and roll and the opposite player - all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) - count += len(all_rolls_boards) - # find scores for each board found above - spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1)) - for new_board in all_rolls_boards] - - # if the original player is the -1 player, then we need to find (1-value) - spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores] - - # find the best score - best_score = max(spec_roll_scores) - - # append the best score to a_board_scores, where we keep track of the best score for each board - a_board_scores.append(best_score) - - # save the expected average of board scores - all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores)) - - # return all the average scores - print(count) - return all_rolls_scores - def eval(self, episode_count, trained_eps = 0): """ diff --git a/network_test.py b/network_test.py index 4f64612..243d2df 100644 --- a/network_test.py +++ b/network_test.py @@ -55,3 +55,4 @@ network.print_variables() network.save_model(2) +network.calculate_1_ply(Board.initial_state, [3,2], 1) \ No newline at end of file From 2d84cd5a0b80c3ff46e94bcd08c7fc9dab9176e1 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 19:06:53 +0200 Subject: [PATCH 08/29] 1-ply now works again. --- network.py | 42 +++++++++++++++++++----------------------- network_test.py | 2 +- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/network.py b/network.py index d84036f..657b924 100644 --- a/network.py +++ b/network.py @@ -262,17 +262,15 @@ class Network: sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=player==1) - best_boards = [x[0] for x in sorted_moves_and_scores] + best_boards = [x[0] for x in sorted_moves_and_scores[:10]] - self.do_ply(best_boards, player) + scores, trans_scores = self.do_ply(best_boards, player) + best_score_idx = np.array(trans_scores).argmax() - #best_score_index = np.array(all_rolls_scores).argmax() - #best_board = best_fifteen_boards[best_score_index] - - #return [best_board, max(all_rolls_scores)] + return [best_boards[best_score_idx], scores[best_score_idx]] def do_ply(self, boards, player): """ @@ -305,31 +303,29 @@ class Network: all_rolls = gen_21_rolls() - all_rolls_scores = [] - start = time.time() list_of_moves = [] for idx, board in enumerate(boards): - list_of_moves.append([]) - for roll in all_rolls: - all_states = list(Board.calculate_legal_states(board, player, roll)) - list_of_moves[idx].append(all_states) - - tmp = [] - for board in list_of_moves: all_board_moves = [] - for roll in board: - for spec in roll: - legal_state = np.array(self.board_trans_func(spec, player)[0]) - all_board_moves.append(legal_state) - tmp.append(np.array(all_board_moves)) + for roll in all_rolls: + all_states = list(Board.calculate_legal_states(board, player*-1, roll)) + for state in all_states: + state = np.array(self.board_trans_func(state, player*-1)[0]) + all_board_moves.append(state) + list_of_moves.append(np.array(all_board_moves)) - # print(tmp) - for board in tmp: - print(self.model.predict_on_batch(board)) + all_scores = [self.model.predict_on_batch(board) for board in list_of_moves] + transformed_scores = [x if player == 1 else (1-x) for x in all_scores] + + scores_means = [tf.reduce_mean(score) for score in all_scores] + transformed_means = [tf.reduce_mean(score) for score in transformed_scores] + + + return ([scores_means, transformed_means]) + print(time.time() - start) diff --git a/network_test.py b/network_test.py index 243d2df..a4d8dda 100644 --- a/network_test.py +++ b/network_test.py @@ -55,4 +55,4 @@ network.print_variables() network.save_model(2) -network.calculate_1_ply(Board.initial_state, [3,2], 1) \ No newline at end of file +print(network.calculate_1_ply(Board.initial_state, [3,2], 1)) \ No newline at end of file From 1aedc23de1cbb3f31842a407f289c91c6030c319 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 19:13:18 +0200 Subject: [PATCH 09/29] 1-ply now works again. --- network.py | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/network.py b/network.py index 657b924..86e0fe2 100644 --- a/network.py +++ b/network.py @@ -323,42 +323,9 @@ class Network: scores_means = [tf.reduce_mean(score) for score in all_scores] transformed_means = [tf.reduce_mean(score) for score in transformed_scores] - - return ([scores_means, transformed_means]) - - print(time.time() - start) - # count = 0 - # # loop over boards - # for a_board in boards: - # a_board_scores = [] - # - # # loop over all rolls, for each board - # for roll in all_rolls: - # - # # find all states we can get to, given the board and roll and the opposite player - # all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) - # count += len(all_rolls_boards) - # # find scores for each board found above - # spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1)) - # for new_board in all_rolls_boards] - # - # # if the original player is the -1 player, then we need to find (1-value) - # spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores] - # - # # find the best score - # best_score = max(spec_roll_scores) - # - # # append the best score to a_board_scores, where we keep track of the best score for each board - # a_board_scores.append(best_score) - # - # # save the expected average of board scores - # all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores)) - # - # # return all the average scores - # print(count) - # return all_rolls_scores + return ([scores_means, transformed_means]) def calc_n_ply(self, n_init, sess, board, player, roll): From 6131d5b5f45677c6307203dfe572ffb9b7ac8345 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 19:25:28 +0200 Subject: [PATCH 10/29] Added comments for Christoffer! --- network.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/network.py b/network.py index 86e0fe2..10d238f 100644 --- a/network.py +++ b/network.py @@ -307,6 +307,7 @@ class Network: list_of_moves = [] + # Prepping of data for idx, board in enumerate(boards): all_board_moves = [] for roll in all_rolls: @@ -317,14 +318,16 @@ class Network: list_of_moves.append(np.array(all_board_moves)) + print(time.time() - start) + + start = time.time() + # Running data through networks all_scores = [self.model.predict_on_batch(board) for board in list_of_moves] transformed_scores = [x if player == 1 else (1-x) for x in all_scores] scores_means = [tf.reduce_mean(score) for score in all_scores] transformed_means = [tf.reduce_mean(score) for score in transformed_scores] - print(time.time() - start) - return ([scores_means, transformed_means]) From 4fa10861bb558cae99d588d6651e7b052f890fac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Thu, 10 May 2018 19:27:51 +0200 Subject: [PATCH 11/29] update TF dependency to 1.8.0 --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index e7ac94a..2738d5d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,8 +16,8 @@ pyparsing==2.2.0 python-dateutil==2.7.2 pytz==2018.3 six==1.11.0 -tensorboard==1.6.0 -tensorflow==1.6.0 +tensorboard==1.8.0 +tensorflow==1.8.0 termcolor==1.1.0 Werkzeug==0.14.1 pygame==1.9.3 From 3b57c10b5ad6450463d356b39682d5c47fdf696a Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 22:57:27 +0200 Subject: [PATCH 12/29] Saves calling tf.reduce_mean on all values once. --- network.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/network.py b/network.py index 10d238f..070f0b3 100644 --- a/network.py +++ b/network.py @@ -321,12 +321,13 @@ class Network: print(time.time() - start) start = time.time() + # Running data through networks all_scores = [self.model.predict_on_batch(board) for board in list_of_moves] - transformed_scores = [x if player == 1 else (1-x) for x in all_scores] - scores_means = [tf.reduce_mean(score) for score in all_scores] - transformed_means = [tf.reduce_mean(score) for score in transformed_scores] + + transformed_means = [x if player == 1 else (1-x) for x in scores_means] + print(time.time() - start) return ([scores_means, transformed_means]) From 504308a9af13efb0e15dd0cf2621e84c9dc15f63 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Thu, 10 May 2018 23:22:41 +0200 Subject: [PATCH 13/29] Yet another input argument, "--ply", 0 for no look-ahead, 1 for a single look-ahead. --- main.py | 5 ++++- network.py | 31 ++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/main.py b/main.py index a276220..e3ded40 100644 --- a/main.py +++ b/main.py @@ -40,6 +40,8 @@ parser.add_argument('--use-baseline', action='store_true', help='use the baseline model, note, has size 28') parser.add_argument('--verbose', action='store_true', help='If set, a lot of stuff will be printed') +parser.add_argument('--ply', action='store', dest='ply', + help='defines the amount of ply used when deciding what move to make') args = parser.parse_args() @@ -64,7 +66,8 @@ config = { 'force_creation': args.force_creation, 'use_baseline': args.use_baseline, 'global_step': 0, - 'verbose': args.verbose + 'verbose': args.verbose, + 'ply': args.ply } diff --git a/network.py b/network.py index 070f0b3..6a91198 100644 --- a/network.py +++ b/network.py @@ -23,6 +23,7 @@ class Network: 'tesauro-poop': (198, Board.board_features_tesauro_wrong) } + def custom_tanh(self, x, name=None): return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) @@ -31,6 +32,12 @@ class Network: :param config: :param name: """ + + move_options = { + '1': self.make_move_1_ply, + '0': self.make_move_0_ply + } + tf.enable_eager_execution() xavier_init = tf.contrib.layers.xavier_initializer() @@ -40,6 +47,10 @@ class Network: self.name = name + self.make_move = move_options[ + self.config['ply'] + ] + # Set board representation from config self.input_size, self.board_trans_func = Network.board_reps[ self.config['board_representation'] @@ -191,7 +202,7 @@ class Network: - def make_move(self, board, roll, player): + def make_move_0_ply(self, board, roll, player): """ Find the best move given a board, roll and a player, by finding all possible states one can go to and then picking the best, by using the network to evaluate each state. The highest score is picked @@ -218,17 +229,16 @@ class Network: return [best_move, best_score] - def make_move_n_ply(self, sess, board, roll, player, n = 1): + def make_move_1_ply(self, board, roll, player): """ - - :param sess: :param board: :param roll: :param player: - :param n: :return: """ - best_pair = self.calc_n_ply(n, sess, board, player, roll) + start = time.time() + best_pair = self.calculate_1_ply(board, roll, player) + print(time.time() - start) return best_pair @@ -303,7 +313,7 @@ class Network: all_rolls = gen_21_rolls() - start = time.time() + # start = time.time() list_of_moves = [] @@ -318,9 +328,8 @@ class Network: list_of_moves.append(np.array(all_board_moves)) - print(time.time() - start) - - start = time.time() + # print(time.time() - start) + # start = time.time() # Running data through networks all_scores = [self.model.predict_on_batch(board) for board in list_of_moves] @@ -328,7 +337,7 @@ class Network: transformed_means = [x if player == 1 else (1-x) for x in scores_means] - print(time.time() - start) + # print(time.time() - start) return ([scores_means, transformed_means]) From 93224864a468a19c1b7f37a9a9541eb999751ceb Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Fri, 11 May 2018 13:35:01 +0200 Subject: [PATCH 14/29] More comments, backprop have been somewhat tested in the eager_main.py and normal_main.py. --- network.py | 16 +++--- tensorflow_impl_tests/eager_main.py | 76 ++++++++++++++++------------ tensorflow_impl_tests/normal_main.py | 4 +- 3 files changed, 54 insertions(+), 42 deletions(-) diff --git a/network.py b/network.py index 6a91198..f6e4914 100644 --- a/network.py +++ b/network.py @@ -160,7 +160,8 @@ class Network: def calc_vals(self, states): """ - :param states: + Calculate a score of each state in states + :param states: A number of states. The states have to be transformed before being given to this function. :return: """ values = self.model.predict_on_batch(states) @@ -205,8 +206,8 @@ class Network: def make_move_0_ply(self, board, roll, player): """ Find the best move given a board, roll and a player, by finding all possible states one can go to - and then picking the best, by using the network to evaluate each state. The highest score is picked - for the 1-player and the max(1-score) is picked for the -1-player. + and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead. + The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player. :param sess: :param board: Current board @@ -231,6 +232,7 @@ class Network: def make_move_1_ply(self, board, roll, player): """ + Return the best board and best score based on a 1-ply look-ahead. :param board: :param roll: :param player: @@ -244,9 +246,9 @@ class Network: def calculate_1_ply(self, board, roll, player): """ - Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an - exhaustive search is performed on the best 15 moves from the single ply. - + Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then + all moves and scores are found for them. The expected score is then calculated for each of the boards from the + 0-ply. :param sess: :param board: :param roll: The original roll @@ -343,7 +345,6 @@ class Network: def calc_n_ply(self, n_init, sess, board, player, roll): """ - :param n_init: :param sess: :param board: @@ -371,7 +372,6 @@ class Network: def n_ply(self, n_init, sess, boards_init, player_init): """ - :param n_init: :param sess: :param boards_init: diff --git a/tensorflow_impl_tests/eager_main.py b/tensorflow_impl_tests/eager_main.py index f68f65f..0cce81f 100644 --- a/tensorflow_impl_tests/eager_main.py +++ b/tensorflow_impl_tests/eager_main.py @@ -18,11 +18,12 @@ input_size = 30 model = tf.keras.Sequential([ - tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init, input_shape=(1,input_size)), - tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init) + tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)), + tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2)) ]) -#tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./")) + +# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./")) input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0] @@ -38,45 +39,56 @@ start = time.time() all_predictions = model.predict_on_batch(all_input) -print(all_predictions) -print(time.time() - start) +learning_rate = 0.1 - -start = time.time() -all_predictions = [model(single_in) for _ in range(20)] - -#print(all_predictions[:10]) -print(time.time() - start) - -print("-"*30) with tf.GradientTape() as tape: - val = model(single_in) -grads = tape.gradient(val, model.variables) + value = model(single_in) -# grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)] -# print(model.variables[0][0]) -weights_before = model.weights[0] +print("Before:", value) -start = time.time() -#[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)] +grads = tape.gradient(value, model.variables) +print("/"*40,"model_variables","/"*40) +print(model.variables) +print("/"*40,"grads","/"*40) +print(grads) -start = time.time() -for gradient, trainable_var in zip(grads, model.variables): - backprop_calc = 0.1 * (0.9 - val) * gradient - trainable_var.assign_add(backprop_calc) +difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), []) -# opt.apply_gradients(zip(grads, model.variables)) +for grad, train_var in zip(grads, model.variables): + backprop_calc = 0.1 * difference_in_values * grad + train_var.assign_add(backprop_calc) -print(time.time() - start) +value = model(single_in) +print("/"*40,"model_variables","/"*40) +print(model.variables) +print("After:", value) -print(model(single_in)) -vals = model.predict_on_batch(all_input) -vals = list(vals) -vals[3] = 4 -print(vals) -print(np.argmax(np.array(vals))) +# # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)] +# +# # print(model.variables[0][0]) +# weights_before = model.weights[0] +# +# start = time.time() +# #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)] +# +# start = time.time() +# for gradient, trainable_var in zip(grads, model.variables): +# backprop_calc = 0.1 * (0.9 - val) * gradient +# trainable_var.assign_add(backprop_calc) +# +# # opt.apply_gradients(zip(grads, model.variables)) +# +# print(time.time() - start) +# +# print(model(single_in)) +# +# vals = model.predict_on_batch(all_input) +# vals = list(vals) +# vals[3] = 4 +# print(vals) +# print(np.argmax(np.array(vals))) # tfe.Saver(model.variables).save("./tmp_ckpt") diff --git a/tensorflow_impl_tests/normal_main.py b/tensorflow_impl_tests/normal_main.py index 8e3887d..a8b106c 100644 --- a/tensorflow_impl_tests/normal_main.py +++ b/tensorflow_impl_tests/normal_main.py @@ -16,9 +16,9 @@ class Everything: W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size), - initializer=xavier_init) + initializer=tf.constant_initializer(-2)) W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size), - initializer=xavier_init) + initializer=tf.constant_initializer(0.2)) b_1 = tf.get_variable("b_1", (self.hidden_size,), initializer=tf.zeros_initializer) From 03e61a59cf3cd76f7c855d5adf5867ee1073803c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Fri, 11 May 2018 17:29:22 +0200 Subject: [PATCH 15/29] quack --- board.py | 96 +------- quack/quack.c | 241 ++++++++++++++++++++ quack/setup.py | 9 + test.py | 598 ++++++++++++++++++++++++------------------------- 4 files changed, 552 insertions(+), 392 deletions(-) create mode 100644 quack/quack.c create mode 100644 quack/setup.py diff --git a/board.py b/board.py index 56c2737..5f438ae 100644 --- a/board.py +++ b/board.py @@ -1,3 +1,4 @@ +import quack import numpy as np import itertools @@ -12,11 +13,7 @@ class Board: @staticmethod def idxs_with_checkers_of_player(board, player): - idxs = [] - for idx, checker_count in enumerate(board): - if checker_count * player >= 1: - idxs.append(idx) - return idxs + return quack.idxs_with_checkers_of_player(board, player) # TODO: Write a test for this @@ -132,90 +129,7 @@ class Board: @staticmethod def is_move_valid(board, player, face_value, move): - if face_value == 0: - return True - else: - def sign(a): - return (a > 0) - (a < 0) - - from_idx = move[0] - to_idx = move[1] - to_state = None - from_state = board[from_idx] - delta = to_idx - from_idx - direction = sign(delta) - bearing_off = None - - # FIXME: Use get instead of array-like indexing - if to_idx >= 1 and to_idx <= 24: - to_state = board[to_idx] - bearing_off = False - else: # Bearing off - to_state = 0 - bearing_off = True - - # print("_"*20) - # print("board:", board) - # print("to_idx:", to_idx, "board[to_idx]:", board[to_idx], "to_state:", to_state) - # print("+"*20) - - def is_forward_move(): - return direction == player - - def face_value_match_move_length(): - return abs(delta) == face_value - - def bear_in_if_checker_on_bar(): - if player == 1: - bar = 0 - else: - bar = 25 - - bar_state = board[bar] - - if bar_state != 0: - return from_idx == bar - else: - return True - - def checkers_at_from_idx(): - return sign(from_state) == player - - def no_block_at_to_idx(): - if -sign(to_state) == player: - return abs(to_state) == 1 - else: - return True - - def can_bear_off(): - checker_idxs = Board.idxs_with_checkers_of_player(board, player) - def is_moving_backmost_checker(): - if player == 1: - return all([(idx >= from_idx) for idx in checker_idxs]) - else: - return all([(idx <= from_idx) for idx in checker_idxs]) - - def all_checkers_in_last_quadrant(): - if player == 1: - return all([(idx >= 19) for idx in checker_idxs]) - else: - return all([(idx <= 6) for idx in checker_idxs]) - - return all([ is_moving_backmost_checker(), - all_checkers_in_last_quadrant() ]) - - # TODO: add switch here instead of wonky ternary in all - # print("is_forward:",is_forward_move()) - # print("face_value:",face_value_match_move_length()) - # print("Checkes_at_from:",checkers_at_from_idx()) - # print("no_block:",no_block_at_to_idx()) - - return all([ is_forward_move(), - face_value_match_move_length(), - bear_in_if_checker_on_bar(), - checkers_at_from_idx(), - no_block_at_to_idx(), - can_bear_off() if bearing_off else True ]) + return quack.is_move_valid(board, player, face_value, move) @staticmethod def any_move_valid(board, player, roll): @@ -393,7 +307,3 @@ class Board: board[to_idx] += player return tuple(board) - - @staticmethod - def flip(board): - return tuple((-x for x in reversed(board))) diff --git a/quack/quack.c b/quack/quack.c new file mode 100644 index 0000000..b09e722 --- /dev/null +++ b/quack/quack.c @@ -0,0 +1,241 @@ +#include + +static PyObject* QuackError; + +/* Utility functions */ +int sign(int x) { + return (x > 0) - (x < 0); +} + +int abs(int x) { + if (x >= 0) { + return x; + } else { + return -x; + } +} +/* end utility functions */ + +/* Helper functions */ + +int *idxs_with_checkers_of_player(int board[], int player) { + int idxs_tmp[26]; + int ctr = 0; + + for (int i = 0; i < 26; i++) { + if (board[i] * player >= 1) { + idxs_tmp[ctr] = i; + ctr++; + } + } + + int *idxs = malloc((1 + ctr) * sizeof(int)); + if (idxs == NULL) { + fprintf(stderr, "malloc failed\n"); + abort(); + } + + idxs[0] = ctr; + for (int i = 0; i < ctr; i++) { + idxs[i+1] = idxs_tmp[i]; + } + + return idxs; +} + +int is_forward_move(int direction, int player) { + return direction == player; +} + +int face_value_match_move_length(int delta, int face_value) { + return abs(delta) == face_value; +} + +int bear_in_if_checker_on_bar(int board[], int player, int from_idx) { + int bar; + + if (player == 1) bar = 0; + else bar = 25; + + if (board[bar] != 0) return from_idx == bar; + else return 1; +} + +int checkers_at_from_idx(int from_state, int player) { + return sign(from_state) == player; +} + +int no_block_at_to_idx(int to_state, int player) { + if (-sign(to_state) == player) return abs(to_state) == 1; + else return 1; +} + +int can_bear_off(int board[], int player, int from_idx) { + int* checker_idxs = idxs_with_checkers_of_player(board, player); + + if (player == 1) { + for (int i = 1; i <= checker_idxs[0]; i++) { + if ( !((checker_idxs[i] >= from_idx) && + (checker_idxs[i] >= 19)) ) return 0; + } + } else { + for (int i = 1; i <= checker_idxs[0]; i++) { + if ( !((checker_idxs[i] <= from_idx) && + (checker_idxs[i] <= 6)) ) return 0; + } + } + + return 1; +} + +/* end helper functions */ + +int is_move_valid(int board[], int player, int face_value, int move[]) { + int from_idx = move[0]; + int to_idx = move[1]; + int to_state; + int from_state = board[from_idx]; + int delta = to_idx - from_idx; + int direction = sign(delta); + int bearing_off; + + if (to_idx >= 1 && to_idx <= 24) { + to_state = board[to_idx]; + bearing_off = 0; + } else { + to_state = 0; + bearing_off = 1; + } + + return is_forward_move(direction, player) + && face_value_match_move_length(delta, face_value) + && bear_in_if_checker_on_bar(board, player, from_idx) + && checkers_at_from_idx(from_state, player) + && no_block_at_to_idx(to_state, player) + && (!bearing_off || can_bear_off(board, player, from_idx)) + ; +} + +/* Meta definitions */ +static PyObject* +quack_is_move_valid(PyObject *self, PyObject *args) { + int board[26]; + int player; + int face_value; + int move[2]; + + int validity; + + PyObject* board_tuple_obj; + PyObject* move_tuple_obj; + + if (! PyArg_ParseTuple(args, "O!iiO!", + &PyTuple_Type, &board_tuple_obj, + &player, + &face_value, + &PyTuple_Type, &move_tuple_obj)) + return NULL; + + long numValuesBoard; + numValuesBoard = PyTuple_Size(board_tuple_obj); + if (numValuesBoard != 26) { + PyErr_SetString(QuackError, "Board tuple must have 26 entries"); + return NULL; + } + + PyObject* board_val_obj; + // Iterate over tuple to retreive positions + for (int i=0; i Date: Fri, 11 May 2018 19:00:39 +0200 Subject: [PATCH 16/29] quack kind of works --- quack/quack.c | 239 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 199 insertions(+), 40 deletions(-) diff --git a/quack/quack.c b/quack/quack.c index b09e722..61e3e53 100644 --- a/quack/quack.c +++ b/quack/quack.c @@ -2,6 +2,12 @@ static PyObject* QuackError; +typedef struct board_list board_list; +struct board_list { + int size; + PyObject** list; +}; + /* Utility functions */ int sign(int x) { return (x > 0) - (x < 0); @@ -88,6 +94,8 @@ int can_bear_off(int board[], int player, int from_idx) { return 1; } + + /* end helper functions */ int is_move_valid(int board[], int player, int face_value, int move[]) { @@ -116,7 +124,127 @@ int is_move_valid(int board[], int player, int face_value, int move[]) { ; } +int* do_move(int board[], int player, int move[]) { + int from_idx = move[0]; + int to_idx = move[1]; + + /* "lift" checker */ + board[from_idx] -= player; + + /* Return early if bearing off */ + if (to_idx < 1 || to_idx > 24) return board; + + /* Hit opponent checker */ + if (board[to_idx] * player == -1) { + /* Move checker to bar */ + if (player == 1) board[25] -= player; + else board[0] -= player; + + board[to_idx] = 0; + } + + /* Put down checker */ + board[to_idx] += player; + + return board; +} + +int* do_move_clone(int board[], int player, int move[]) { + int new_board[26]; + for (int i = 0; i < 26; i++) { + new_board[i] = board[i]; + } + return do_move(new_board, player, move); +} + +void board_to_pyboard(PyObject* board_tuple, int board[]) { + for (int i = 0; i < 26; i++) { + PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i])); + } + return; +} + +board_list calc_moves(int board[], int player, int face_value) { + int* checker_idxs = idxs_with_checkers_of_player(board, player); + board_list boards = { .size = 0, + .list = malloc((15 + 1) * sizeof(PyObject*)) }; + if (boards.list == NULL) { + fprintf(stderr, "malloc failed\n"); + abort(); + } + + if (checker_idxs[0] == 0) { + boards.size = 1; + PyObject* board_tuple = PyTuple_New(26); + board_to_pyboard(board_tuple, board); + boards.list[0] = board_tuple; + return boards; + } + + int ctr = 0; + for (int i = 1; i <= checker_idxs[0]; i++) { + int move[2]; + move[0] = checker_idxs[i]; + move[1] = checker_idxs[i] + (face_value * player); + for (int i = 0; i < 2; i++) { + printf("move[%i]: %i\n", i, move[i]); + } + if (is_move_valid(board, player, face_value, move)) { + printf("legal\n"); + int* new_board = do_move_clone(board, player, move); + PyObject* board_tuple = PyTuple_New(26); + board_to_pyboard(board_tuple, new_board); + + // segfault maybe :'( + //free(new_board); + + boards.list[i] = board_tuple; + ctr++; + } + } + + boards.size = ctr; + printf("boards.size: %i\n\n",boards.size); + + + return boards; +} + /* Meta definitions */ +int extract_board(int *board, PyObject* board_tuple_obj) { + long numValuesBoard; + numValuesBoard = PyTuple_Size(board_tuple_obj); + if (numValuesBoard != 26) { + PyErr_SetString(QuackError, "Board tuple must have 26 entries"); + return 1; + } + + PyObject* board_val_obj; + // Iterate over tuple to retreive positions + for (int i=0; i Date: Fri, 11 May 2018 20:07:27 +0200 Subject: [PATCH 17/29] more quack for board --- board.py | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/board.py b/board.py index 5f438ae..6e9aeb1 100644 --- a/board.py +++ b/board.py @@ -185,6 +185,8 @@ class Board: # turn and then do something with the second die def calc_moves(board, face_value): + return quack.calc_moves(board, player, face_value) + idxs_with_checkers = Board.idxs_with_checkers_of_player(board, player) if len(idxs_with_checkers) == 0: return [board] @@ -271,39 +273,4 @@ class Board: @staticmethod def do_move(board, player, move): # Implies that move is valid; make sure to check move validity before calling do_move(...) - - def move_to_bar(board, to_idx): - board = list(board) - if player == 1: - board[25] -= player - else: - board[0] -= player - - board[to_idx] = 0 - return board - - # TODO: Moving in from bar is handled by the representation - # TODONE: Handle bearing off - - from_idx = move[0] - #print("from_idx: ", from_idx) - to_idx = move[1] - #print("to_idx: ", to_idx) - # pdb.set_trace() - board = list(board) # Make mutable copy of board - - # 'Lift' checker - board[from_idx] -= player - - # Handle bearing off - if to_idx < 1 or to_idx > 24: - return tuple(board) - - # Handle hitting checkers - if board[to_idx] * player == -1: - board = move_to_bar(board, to_idx) - - # Put down checker - board[to_idx] += player - - return tuple(board) + return quack.do_move(board, player, move) From 383dd7aa4b8cbfda861a7532bd7b3c0df85b3137 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Fri, 11 May 2018 20:13:43 +0200 Subject: [PATCH 18/29] code works again; quack gave ~3 times improvement for calc_moves --- board.py | 19 ------------ quack/quack.c | 81 ++++++++++++++++++++++----------------------------- 2 files changed, 35 insertions(+), 65 deletions(-) diff --git a/board.py b/board.py index 6e9aeb1..0f403ed 100644 --- a/board.py +++ b/board.py @@ -187,25 +187,6 @@ class Board: def calc_moves(board, face_value): return quack.calc_moves(board, player, face_value) - idxs_with_checkers = Board.idxs_with_checkers_of_player(board, player) - if len(idxs_with_checkers) == 0: - return [board] - boards = [(Board.do_move(board, - player, - (idx, idx + (face_value * player))) - if Board.is_move_valid(board, - player, - face_value, - (idx, idx + (face_value * player))) - else None) - for idx in idxs_with_checkers] - # print("pls:",boards) - board_list = list(filter(None, boards)) # Remove None-values - # if len(board_list) == 0: - # return [board] - # print("board list:", board_list) - return board_list - # Problem with cal_moves: Method can return empty list (should always contain at least same board). # *Update*: Seems to be fixed. diff --git a/quack/quack.c b/quack/quack.c index 61e3e53..8292f39 100644 --- a/quack/quack.c +++ b/quack/quack.c @@ -5,7 +5,7 @@ static PyObject* QuackError; typedef struct board_list board_list; struct board_list { int size; - PyObject** list; + PyObject* list[16]; }; /* Utility functions */ @@ -14,11 +14,8 @@ int sign(int x) { } int abs(int x) { - if (x >= 0) { - return x; - } else { - return -x; - } + if (x >= 0) return x; + else return -x; } /* end utility functions */ @@ -124,7 +121,7 @@ int is_move_valid(int board[], int player, int face_value, int move[]) { ; } -int* do_move(int board[], int player, int move[]) { +void do_move(int board[], int player, int move[]) { int from_idx = move[0]; int to_idx = move[1]; @@ -132,7 +129,7 @@ int* do_move(int board[], int player, int move[]) { board[from_idx] -= player; /* Return early if bearing off */ - if (to_idx < 1 || to_idx > 24) return board; + if (to_idx < 1 || to_idx > 24) return; /* Hit opponent checker */ if (board[to_idx] * player == -1) { @@ -146,37 +143,39 @@ int* do_move(int board[], int player, int move[]) { /* Put down checker */ board[to_idx] += player; - return board; -} - -int* do_move_clone(int board[], int player, int move[]) { - int new_board[26]; - for (int i = 0; i < 26; i++) { - new_board[i] = board[i]; - } - return do_move(new_board, player, move); -} - -void board_to_pyboard(PyObject* board_tuple, int board[]) { - for (int i = 0; i < 26; i++) { - PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i])); - } return; } -board_list calc_moves(int board[], int player, int face_value) { - int* checker_idxs = idxs_with_checkers_of_player(board, player); - board_list boards = { .size = 0, - .list = malloc((15 + 1) * sizeof(PyObject*)) }; - if (boards.list == NULL) { +int* do_move_clone(int board[], int player, int move[]) { + int* new_board = malloc(sizeof(int) * 26); + if (new_board == NULL) { fprintf(stderr, "malloc failed\n"); abort(); } + for (int i = 0; i < 26; i++) { + new_board[i] = board[i]; + } + + do_move(new_board, player, move); + return new_board; +} + +PyObject* store_board_to_pyobject(int board[]) { + PyObject* board_tuple = PyTuple_New(26); + for (int i = 0; i < 26; i++) { + PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i])); + } + return board_tuple; +} + +board_list calc_moves(int board[], int player, int face_value) { + int* checker_idxs = idxs_with_checkers_of_player(board, player); + board_list boards = { .size = 0 }; + if (checker_idxs[0] == 0) { boards.size = 1; - PyObject* board_tuple = PyTuple_New(26); - board_to_pyboard(board_tuple, board); + PyObject* board_tuple = store_board_to_pyobject(board); boards.list[0] = board_tuple; return boards; } @@ -186,27 +185,20 @@ board_list calc_moves(int board[], int player, int face_value) { int move[2]; move[0] = checker_idxs[i]; move[1] = checker_idxs[i] + (face_value * player); - for (int i = 0; i < 2; i++) { - printf("move[%i]: %i\n", i, move[i]); - } + if (is_move_valid(board, player, face_value, move)) { - printf("legal\n"); int* new_board = do_move_clone(board, player, move); - PyObject* board_tuple = PyTuple_New(26); - board_to_pyboard(board_tuple, new_board); + PyObject* board_tuple = store_board_to_pyobject(new_board); // segfault maybe :'( - //free(new_board); + free(new_board); - boards.list[i] = board_tuple; + boards.list[ctr] = board_tuple; ctr++; } } boards.size = ctr; - printf("boards.size: %i\n\n",boards.size); - - return boards; } @@ -319,8 +311,7 @@ quack_do_move(PyObject *self, PyObject *args) { do_move(board, player, move); - PyObject* board_tuple = PyTuple_New(26); - board_to_pyboard(board_tuple, board); + PyObject* board_tuple = store_board_to_pyobject(board); return Py_BuildValue("O", board_tuple); } @@ -345,13 +336,11 @@ quack_calc_moves(PyObject *self, PyObject *args) { PyObject* boards_list = PyList_New(0); for (int i = 0; i < boards.size; i++) { - printf("%i\n",i); if (PyList_Append(boards_list, boards.list[i])) { printf("list insertion failed at index %i\n",i); + abort(); } - //free(boards.list[i]); } - //free(boards.list); return Py_BuildValue("O", boards_list); } From 1aa9cf705fd7d6ce0027ad7e36b73d6cd13d5e91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Fri, 11 May 2018 21:24:10 +0200 Subject: [PATCH 19/29] quack without leaks --- board.py | 15 +++--- network.py | 12 +---- quack/quack.c | 125 ++++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 110 insertions(+), 42 deletions(-) diff --git a/board.py b/board.py index 0f403ed..0306a22 100644 --- a/board.py +++ b/board.py @@ -42,13 +42,14 @@ class Board: # quack-fat @staticmethod def board_features_quack_fat(board, player): - board = list(board) - positives = [x if x > 0 else 0 for x in board] - negatives = [x if x < 0 else 0 for x in board] - board.append( 15 - sum(positives)) - board.append(-15 - sum(negatives)) - board += ([1, 0] if np.sign(player) > 0 else [0, 1]) - return np.array(board).reshape(1,30) + return np.array(quack.board_features_quack_fat(board,player)).reshape(1,30) + # board = list(board) + # positives = [x if x > 0 else 0 for x in board] + # negatives = [x if x < 0 else 0 for x in board] + # board.append( 15 - sum(positives)) + # board.append(-15 - sum(negatives)) + # board += ([1, 0] if np.sign(player) > 0 else [0, 1]) + # return np.array(board).reshape(1,30) # quack-fatter diff --git a/network.py b/network.py index f6e4914..5ea80cd 100644 --- a/network.py +++ b/network.py @@ -216,10 +216,7 @@ class Network: :return: A pair of the best state to go to, together with the score of that state """ legal_moves = list(Board.calculate_legal_states(board, player, roll)) - - legal_states = [list(tmp) for tmp in legal_moves] - - legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states]) + legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves]) scores = self.model.predict_on_batch(legal_states) transformed_scores = [x if np.sign(player) > 0 else 1 - x for x in scores] @@ -260,12 +257,7 @@ class Network: # find all legal states from the given board and the given roll init_legal_states = Board.calculate_legal_states(board, player, roll) - - legal_moves = list(Board.calculate_legal_states(board, player, roll)) - - legal_states = [list(tmp) for tmp in legal_moves] - - legal_states = np.array([self.board_trans_func(tmp, player)[0] for tmp in legal_states]) + legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states]) scores = self.calc_vals(legal_states) scores = [score.numpy() for score in scores] diff --git a/quack/quack.c b/quack/quack.c index 8292f39..0c72ae5 100644 --- a/quack/quack.c +++ b/quack/quack.c @@ -34,7 +34,7 @@ int *idxs_with_checkers_of_player(int board[], int player) { int *idxs = malloc((1 + ctr) * sizeof(int)); if (idxs == NULL) { - fprintf(stderr, "malloc failed\n"); + PyErr_NoMemory(); abort(); } @@ -79,15 +79,22 @@ int can_bear_off(int board[], int player, int from_idx) { if (player == 1) { for (int i = 1; i <= checker_idxs[0]; i++) { if ( !((checker_idxs[i] >= from_idx) && - (checker_idxs[i] >= 19)) ) return 0; + (checker_idxs[i] >= 19)) ) { + free(checker_idxs); + return 0; + } } } else { for (int i = 1; i <= checker_idxs[0]; i++) { if ( !((checker_idxs[i] <= from_idx) && - (checker_idxs[i] <= 6)) ) return 0; + (checker_idxs[i] <= 6)) ) { + free(checker_idxs); + return 0; + } } } - + + free(checker_idxs); return 1; } @@ -149,7 +156,7 @@ void do_move(int board[], int player, int move[]) { int* do_move_clone(int board[], int player, int move[]) { int* new_board = malloc(sizeof(int) * 26); if (new_board == NULL) { - fprintf(stderr, "malloc failed\n"); + PyErr_NoMemory(); abort(); } @@ -161,9 +168,9 @@ int* do_move_clone(int board[], int player, int move[]) { return new_board; } -PyObject* store_board_to_pyobject(int board[]) { - PyObject* board_tuple = PyTuple_New(26); - for (int i = 0; i < 26; i++) { +PyObject* store_board_to_pytuple(int board[], int size) { + PyObject* board_tuple = PyTuple_New(size); + for (int i = 0; i < size; i++) { PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i])); } return board_tuple; @@ -175,8 +182,9 @@ board_list calc_moves(int board[], int player, int face_value) { if (checker_idxs[0] == 0) { boards.size = 1; - PyObject* board_tuple = store_board_to_pyobject(board); + PyObject* board_tuple = store_board_to_pytuple(board, 26); boards.list[0] = board_tuple; + free(checker_idxs); return boards; } @@ -188,7 +196,7 @@ board_list calc_moves(int board[], int player, int face_value) { if (is_move_valid(board, player, face_value, move)) { int* new_board = do_move_clone(board, player, move); - PyObject* board_tuple = store_board_to_pyobject(new_board); + PyObject* board_tuple = store_board_to_pytuple(new_board, 26); // segfault maybe :'( free(new_board); @@ -198,10 +206,40 @@ board_list calc_moves(int board[], int player, int face_value) { } } + free(checker_idxs); + boards.size = ctr; return boards; } +int* board_features_quack_fat(int board[], int player) { + int* new_board = malloc(sizeof(int) * 30); + if (new_board == NULL) { + PyErr_NoMemory(); + abort(); + } + + int pos_sum = 0; + int neg_sum = 0; + for (int i = 0; i < 26; i++) { + new_board[i] = board[i]; + if (sign(new_board[i] > 0)) pos_sum += new_board[i]; + else neg_sum += new_board[i]; + } + + new_board[26] = 15 - pos_sum; + new_board[27] = -15 - neg_sum; + if (player == 1) { + new_board[28] = 1; + new_board[29] = 0; + } else { + new_board[28] = 0; + new_board[29] = 1; + } + + return new_board; +} + /* Meta definitions */ int extract_board(int *board, PyObject* board_tuple_obj) { long numValuesBoard; @@ -233,7 +271,7 @@ int extract_move(int *move, PyObject* move_tuple_obj) { move_val_obj = PyTuple_GetItem(move_tuple_obj, i); move[i] = PyLong_AsLong(move_val_obj); } - + return 0; } @@ -244,8 +282,6 @@ quack_is_move_valid(PyObject *self, PyObject *args) { int face_value; int move[2]; - int validity; - PyObject* board_tuple_obj; PyObject* move_tuple_obj; @@ -258,9 +294,9 @@ quack_is_move_valid(PyObject *self, PyObject *args) { if (extract_board(board, board_tuple_obj)) return NULL; if (extract_move(move, move_tuple_obj)) return NULL; - - validity = is_move_valid(board, player, face_value, move); - return Py_BuildValue("i", validity); + + if (is_move_valid(board, player, face_value, move)) Py_RETURN_TRUE; + else Py_RETURN_FALSE; } static PyObject* @@ -281,14 +317,17 @@ quack_idxs_with_checkers_of_player(PyObject *self, PyObject *args) { if (extract_board(board, board_tuple_obj)) return NULL; idxs = idxs_with_checkers_of_player(board, player); - PyObject* idxs_list = PyList_New(0); + PyObject* idxs_list = PyList_New(idxs[0]); - for (int i = 1; i <= idxs[0]; i++) { - PyList_Append(idxs_list, Py_BuildValue("i", idxs[i])); + for (int i = 0; i < idxs[0]; i++) { + PyList_SetItem(idxs_list, i, Py_BuildValue("i", idxs[i+1])); } free(idxs); + + PyObject *result = Py_BuildValue("O", idxs_list); + Py_DECREF(idxs_list); - return Py_BuildValue("O", idxs_list); + return result; } static PyObject* @@ -310,10 +349,15 @@ quack_do_move(PyObject *self, PyObject *args) { if (extract_move(move, move_tuple_obj)) return NULL; do_move(board, player, move); + PyObject* board_tuple = store_board_to_pytuple(board, 26); - PyObject* board_tuple = store_board_to_pyobject(board); + // This is shaky + Py_DECREF(board); + + PyObject *result = Py_BuildValue("O", board_tuple); + Py_DECREF(board_tuple); - return Py_BuildValue("O", board_tuple); + return result; } static PyObject* @@ -333,16 +377,43 @@ quack_calc_moves(PyObject *self, PyObject *args) { if (extract_board(board, board_tuple_obj)) return NULL; board_list boards = calc_moves(board, player, face_value); - PyObject* boards_list = PyList_New(0); + PyObject* boards_list = PyList_New(boards.size); for (int i = 0; i < boards.size; i++) { - if (PyList_Append(boards_list, boards.list[i])) { + if (PyList_SetItem(boards_list, i, boards.list[i])) { printf("list insertion failed at index %i\n",i); abort(); } } - return Py_BuildValue("O", boards_list); + PyObject *result = Py_BuildValue("O", boards_list); + Py_DECREF(boards_list); + + return result; +} + +static PyObject* +quack_board_features_quack_fat(PyObject *self, PyObject *args) { + int board[26]; + int player; + + PyObject* board_tuple_obj; + + if (! PyArg_ParseTuple(args, "O!i", + &PyTuple_Type, &board_tuple_obj, + &player)) + return NULL; + + if (extract_board(board, board_tuple_obj)) return NULL; + + int* new_board = board_features_quack_fat(board, player); + PyObject* board_tuple = store_board_to_pytuple(new_board, 30); + free(new_board); + + PyObject *result = Py_BuildValue("O", board_tuple); + Py_DECREF(board_tuple); + + return result; } @@ -363,6 +434,10 @@ static PyMethodDef quack_methods[] = { "calc_moves", quack_calc_moves, METH_VARARGS, "Calculates all legal moves from board with specified face value" }, + { + "board_features_quack_fat", quack_board_features_quack_fat, METH_VARARGS, + "Transforms a board to the quack-fat board representation" + }, {NULL, NULL, 0, NULL} }; From c3f5e909d6ce9ac517590abd0f89a7eadb65f12d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Fri, 11 May 2018 21:47:48 +0200 Subject: [PATCH 20/29] flip is back --- board.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/board.py b/board.py index 0306a22..38693c3 100644 --- a/board.py +++ b/board.py @@ -256,3 +256,8 @@ class Board: def do_move(board, player, move): # Implies that move is valid; make sure to check move validity before calling do_move(...) return quack.do_move(board, player, move) + + + @staticmethod + def flip(board): + return tuple((-x for x in reversed(board))) From ba4ef86bb5101b88c92a2fa34c89c851731db72d Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Sat, 12 May 2018 12:14:47 +0200 Subject: [PATCH 21/29] Board rep can now be inferred from file after being given once. We can also evaluate multiple times by using the flag "--repeat-eval". The flag defaults to 1, if not provided. --- main.py | 57 ++++++++++++++++++++++++++++++++++++++++++++---------- network.py | 5 +++-- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/main.py b/main.py index e3ded40..0631df3 100644 --- a/main.py +++ b/main.py @@ -34,14 +34,15 @@ parser.add_argument('--list-models', action='store_true', parser.add_argument('--force-creation', action='store_true', help='force model creation if model does not exist') parser.add_argument('--board-rep', action='store', dest='board_rep', - default='tesauro', help='name of board representation to use as input to neural network') parser.add_argument('--use-baseline', action='store_true', help='use the baseline model, note, has size 28') parser.add_argument('--verbose', action='store_true', help='If set, a lot of stuff will be printed') -parser.add_argument('--ply', action='store', dest='ply', +parser.add_argument('--ply', action='store', dest='ply', default='0', help='defines the amount of ply used when deciding what move to make') +parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1', + help='the amount of times the evaluation method should be repeated') args = parser.parse_args() @@ -67,10 +68,11 @@ config = { 'use_baseline': args.use_baseline, 'global_step': 0, 'verbose': args.verbose, - 'ply': args.ply - + 'ply': args.ply, + 'repeat_eval': args.repeat_eval } + # Create models folder if not os.path.exists(config['model_storage_path']): os.makedirs(config['model_storage_path']) @@ -133,6 +135,24 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0): with open(log_path, 'a+') as f: f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n") +def find_board_rep(): + checkpoint_path = os.path.join(config['model_storage_path'], config['model']) + board_rep_path = os.path.join(checkpoint_path, "board_representation") + with open(board_rep_path, 'r') as f: + return f.read() + + +def board_rep_file_exists(): + checkpoint_path = os.path.join(config['model_storage_path'], config['model']) + board_rep_path = os.path.join(checkpoint_path, "board_representation") + return os.path.isfile(board_rep_path) + +def create_board_rep(): + checkpoint_path = os.path.join(config['model_storage_path'], config['model']) + board_rep_path = os.path.join(checkpoint_path, "board_representation") + with open(board_rep_path, 'a+') as f: + f.write(config['board_representation']) + # Do actions specified by command-line if args.list_models: def get_eps_trained(folder): @@ -155,6 +175,22 @@ if __name__ == "__main__": # Set up variables episode_count = config['episode_count'] + + if config['board_representation'] is None: + if board_rep_file_exists(): + config['board_representation'] = find_board_rep() + else: + sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n") + exit() + else: + if not board_rep_file_exists(): + create_board_rep() + else: + if config['board_representation'] != find_board_rep(): + sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n". + format(given = config['board_representation'], board_rep = find_board_rep())) + exit() + if args.train: network = Network(config, config['model']) @@ -172,12 +208,13 @@ if __name__ == "__main__": elif args.eval: network = Network(config, config['model']) - start_episode = network.episodes_trained - # Evaluation measures are described in `config` - outcomes = network.eval(config['episode_count']) - log_eval_outcomes(outcomes, trained_eps = start_episode) - # elif args.play: - # g.play(episodes = episode_count) + for i in range(int(config['repeat_eval'])): + start_episode = network.episodes_trained + # Evaluation measures are described in `config` + outcomes = network.eval(config['episode_count']) + log_eval_outcomes(outcomes, trained_eps = start_episode) + # elif args.play: + # g.play(episodes = episode_count) elif args.bench_eval_scores: diff --git a/network.py b/network.py index 5ea80cd..ad8e27a 100644 --- a/network.py +++ b/network.py @@ -177,6 +177,7 @@ class Network: :return: Nothing. It's a side-effect that a model gets restored for the network. """ + if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')): latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) @@ -235,9 +236,9 @@ class Network: :param player: :return: """ - start = time.time() + # start = time.time() best_pair = self.calculate_1_ply(board, roll, player) - print(time.time() - start) + # print(time.time() - start) return best_pair From 9f1bd56c0aacee8e4864f5155562cb026fea2632 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Sat, 12 May 2018 15:18:52 +0200 Subject: [PATCH 22/29] fix bear_off bug; addtional tests and additional fixes --- quack/quack.c | 52 ++-- test.py | 648 +++++++++++++++++++++++++++----------------------- 2 files changed, 385 insertions(+), 315 deletions(-) diff --git a/quack/quack.c b/quack/quack.c index 0c72ae5..213c49c 100644 --- a/quack/quack.c +++ b/quack/quack.c @@ -73,29 +73,49 @@ int no_block_at_to_idx(int to_state, int player) { else return 1; } -int can_bear_off(int board[], int player, int from_idx) { + +int can_bear_off(int board[], int player, int from_idx, int to_idx) { int* checker_idxs = idxs_with_checkers_of_player(board, player); - if (player == 1) { - for (int i = 1; i <= checker_idxs[0]; i++) { - if ( !((checker_idxs[i] >= from_idx) && - (checker_idxs[i] >= 19)) ) { - free(checker_idxs); - return 0; + int moving_backmost_checker = 1; + int bearing_directly_off = 0; + int all_checkers_in_last_quadrant = 1; + + /* Check if bearing directly off */ + if (player == 1 && to_idx == 25) bearing_directly_off = 1; + else if (player == -1 && to_idx == 0) bearing_directly_off = 1; + + for (int i = 1; i <= checker_idxs[0]; i++) { + if (player == 1 ) { + /* Check if all checkers are in last quardrant */ + if (checker_idxs[i] < 19) { + all_checkers_in_last_quadrant = 0; + break; } - } - } else { - for (int i = 1; i <= checker_idxs[0]; i++) { - if ( !((checker_idxs[i] <= from_idx) && - (checker_idxs[i] <= 6)) ) { - free(checker_idxs); - return 0; + + /* Check if moving backmost checker */ + if (checker_idxs[i] < from_idx) { + moving_backmost_checker = 0; + if (!bearing_directly_off) break; + } + } else { + if (checker_idxs[i] > 6) { + all_checkers_in_last_quadrant = 0; + break; + } + + if (checker_idxs[i] > from_idx) { + moving_backmost_checker = 0; + if (!bearing_directly_off) break; } } } free(checker_idxs); - return 1; + + if (all_checkers_in_last_quadrant && + (bearing_directly_off || moving_backmost_checker)) return 1; + else return 0; } @@ -124,7 +144,7 @@ int is_move_valid(int board[], int player, int face_value, int move[]) { && bear_in_if_checker_on_bar(board, player, from_idx) && checkers_at_from_idx(from_state, player) && no_block_at_to_idx(to_state, player) - && (!bearing_off || can_bear_off(board, player, from_idx)) + && (!bearing_off || can_bear_off(board, player, from_idx, to_idx)) ; } diff --git a/test.py b/test.py index 759c2c5..90cea23 100644 --- a/test.py +++ b/test.py @@ -141,6 +141,56 @@ class TestIsMoveValid(unittest.TestCase): # TODO: More tests for bearing off are needed + def test_bear_off_non_backmost(self): + board = ( 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, + 0 ) + self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), True) + self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True) + self.assertEqual(Board.is_move_valid(board, 1, 2, (24, 26)), False) + + def test_bear_off_quadrant_limits_white(self): + board = ( 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, + 0 ) + self.assertEqual(Board.is_move_valid(board, 1, 2, (23, 25)), False) + self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), False) + + def test_bear_off_quadrant_limits_black(self): + board = ( 0, + -1, -1, -1, -1, -1, -1, + -1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0 ) + self.assertEqual(Board.is_move_valid(board, -1, 2, (2, 0)), False) + self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), False) + + def test_bear_off_quadrant_limits_white_2(self): + board = ( 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 1, + 0 ) + self.assertEqual(Board.is_move_valid(board, 1, 1, (24, 25)), True) + + def test_bear_off_quadrant_limits_black_2(self): + board = ( 0, + -1, 0, 0, 0, 0, -1, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0 ) + self.assertEqual(Board.is_move_valid(board, -1, 1, (1, 0)), True) + + class TestNumOfChecker(unittest.TestCase): def test_simple_1(self): board = ( 0, @@ -552,372 +602,372 @@ class TestLegalMoves(unittest.TestCase): self.assertEqual(Board.calculate_legal_states(board, -1, (4,3)), expected_board_set) -# class TestBoardFlip(unittest.TestCase): -# def test_flip_board(self): -# board = (0, -# -14, -1, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 1, -# 0) +class TestBoardFlip(unittest.TestCase): + def test_flip_board(self): + board = (0, + -14, -1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, + 0) -# expected_board = ( 0, -# -1, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 1, 14, -# 0 ) + expected_board = ( 0, + -1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 14, + 0 ) -# self.assertEqual(Board.flip(board), expected_board) + self.assertEqual(Board.flip(board), expected_board) -# def test_flip_board_bar(self): -# board = (2, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# -7) + def test_flip_board_bar(self): + board = (2, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + -7) -# expected_board = (7, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# 0, 0, 0, 0, 0, 0, -# -2) + expected_board = (7, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + -2) -# self.assertEqual(Board.flip(board), expected_board) + self.assertEqual(Board.flip(board), expected_board) -# def test_flip_board_extensive(self): -# board = (4, -# -5, -1, 0, 4, 3, 0, -# 0, -1, 0, -5, 0, 0, -# 0, 3, 0, 0, 0, 0, -# 0, 0, 0, -1, 0, 1, -# -2) + def test_flip_board_extensive(self): + board = (4, + -5, -1, 0, 4, 3, 0, + 0, -1, 0, -5, 0, 0, + 0, 3, 0, 0, 0, 0, + 0, 0, 0, -1, 0, 1, + -2) -# expected_board = (2, -# -1, 0, 1, 0, 0, 0, -# 0, 0, 0, 0, -3, 0, -# 0, 0, 5, 0, 1, 0, -# 0, -3, -4, 0, 1, 5, -# -4) + expected_board = (2, + -1, 0, 1, 0, 0, 0, + 0, 0, 0, 0, -3, 0, + 0, 0, 5, 0, 1, 0, + 0, -3, -4, 0, 1, 5, + -4) -# self.assertEqual(Board.flip(board), expected_board) + self.assertEqual(Board.flip(board), expected_board) -# def test_inverse(self): -# board = (4, -# -5, -1, 0, 4, 3, 0, -# 0, -1, 0, -5, 0, 0, -# 0, 3, 0, 0, 0, 0, -# 0, 0, 0, -1, 0, 1, -# -2) + def test_inverse(self): + board = (4, + -5, -1, 0, 4, 3, 0, + 0, -1, 0, -5, 0, 0, + 0, 3, 0, 0, 0, 0, + 0, 0, 0, -1, 0, 1, + -2) -# self.assertEqual(Board.flip(Board.flip(board)), board) + self.assertEqual(Board.flip(Board.flip(board)), board) -# def test_tesauro_initial(self): -# board = Board.initial_state + def test_tesauro_initial(self): + board = Board.initial_state -# expected = (1,1,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + expected = (1,1,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,0, -# 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, -# 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0.0, -# 0, + 0.0, + 0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, -# 0,0,0,0, -# 1,1,1,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,0,0, -# 0.0, -# 0, + 0.0, + 0, -# 1, -# 0 -# ) + 1, + 0 + ) -# import numpy as np -# self.assertTrue((Board.board_features_tesauro(board, 1) == -# np.array(expected).reshape(1, 198)).all()) + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, 1) == + np.array(expected).reshape(1, 198)).all()) -# def test_tesauro_bars(self): -# board = list(Board.initial_state) -# board[1] = 0 -# board[0] = 2 -# board[24] = 0 -# board[25] = -2 + def test_tesauro_bars(self): + board = list(Board.initial_state) + board[1] = 0 + board[0] = 2 + board[24] = 0 + board[25] = -2 -# board = tuple(board) + board = tuple(board) -# expected = (0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + expected = (0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,0, -# 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, -# 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 1.0, -# 0, + 1.0, + 0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, -# 0,0,0,0, -# 1,1,1,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 1.0, -# 0, + 1.0, + 0, -# 1, -# 0 -# ) + 1, + 0 + ) -# import numpy as np -# self.assertTrue((Board.board_features_tesauro(board, 1) == -# np.array(expected).reshape(1, 198)).all()) + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, 1) == + np.array(expected).reshape(1, 198)).all()) -# def test_tesauro_home(self): -# board = list(Board.initial_state) + def test_tesauro_home(self): + board = list(Board.initial_state) -# board[1] = 0 -# board[24] = 0 + board[1] = 0 + board[24] = 0 -# board = tuple(board) + board = tuple(board) -# expected = (0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + expected = (0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,0, -# 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, -# 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0.0, -# 2, + 0.0, + 2, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, -# 0,0,0,0, -# 1,1,1,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0.0, -# 2, + 0.0, + 2, -# 1, -# 0 -# ) + 1, + 0 + ) -# import numpy as np -# self.assertTrue((Board.board_features_tesauro(board, 1) == -# np.array(expected).reshape(1, 198)).all()) + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, 1) == + np.array(expected).reshape(1, 198)).all()) -# def test_tesauro_black_player(self): -# board = Board.initial_state + def test_tesauro_black_player(self): + board = Board.initial_state -# expected = (1,1,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + expected = (1,1,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,0, -# 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, -# 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0.0, -# 0, + 0.0, + 0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, -# 0,0,0,0, -# 1,1,1,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 1,1,1,1, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 0,0,0,0, -# 1,1,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,0,0, -# 0.0, -# 0, + 0.0, + 0, -# 0, -# 1 -# ) + 0, + 1 + ) -# import numpy as np -# self.assertTrue((Board.board_features_tesauro(board, -1) == -# np.array(expected).reshape(1, 198)).all()) + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, -1) == + np.array(expected).reshape(1, 198)).all()) if __name__ == '__main__': From d932663519006e158dc3dbf4adbe9dd93f2e72b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Sun, 13 May 2018 22:26:24 +0200 Subject: [PATCH 23/29] add explanation of ply speedup --- report_docs.txt | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 report_docs.txt diff --git a/report_docs.txt b/report_docs.txt new file mode 100644 index 0000000..e9d68ca --- /dev/null +++ b/report_docs.txt @@ -0,0 +1,28 @@ + Alexander og jeg skrev noget af vores bachelorprojekt om til C her i fredags. + Man skal virkelig passe på sine hukommelsesallokeringer. + Ja, helt klart. + Jeg fandt et memory leak, der lækkede 100 MiB hukommelse i sekundet. + Hvilken del blev C-ificeret? + Damned + Årsagen var at vi gav et objekt med tilbage til Python uden at dekrementere dets ref-count, så fortolkeren stadig troede at nogen havde brug for det. + Den del af spillogikken, der tjekker om træk er gyldige. + Det bliver kaldt ret mange tusinde gange pr. spil, så vi tænkte at der måske kunne være lidt optimering at hente i at omskrive det til C. + Ok, så I har ikke selv brugt alloc og free. Det er alligevel noget. + Metoden selv blev 7 gange hurtigere! + Wow! + Jo. Det endte vi også med at gøre. + Vi havde brug for lister af variabel størrelse. Det endte med en struct med et "size" felt og et "list" felt. + Inkluderer det speedup, frem og tilbagen mellem C og python? + Det burde det gøre, ja! + Gjorde det nogen stor effekt for hvor hurtigt I kan evaluere? + Jeg tror ikke at der er særligt meget "frem og tilbage"-stads. Det ser ud til at det kode man skriver bliver kastet ret direkte ind i fortolkeren. + Det gjorde en stor forskel for når vi laver 1-ply. + "ply" er hvor mange træk man kigger fremad. + Så kun at kigge på det umiddelbart næste træk er 0-ply, hvilket er det vi har gjort indtil nu + 1-ply var for langsomt. Det tog ca. 6-7 sekunder at evaluere ét træk. + Alexander lavede lidt omskrivninger, så TensorFlow udregnede det hurtigere og fik det ned på ca. 3-4 sekunder *pr. spil*. + Så skrev vi noget af det om til C, og nu er vi så på ca. 2 sekunder pr. spil med 1-ply, hvilket er ret vildt. + Det er så godt at Python-fortolkeren kan udvides med C! + caspervk, kan I optimere jeres bachelorprojekt med et par C-moduler? + Det er en hel lille sektion til rapporten det der. + Yeah. Kopierer bare det her verbatim ind. From 926a331df0644237fac11d0b22ce582de1ada00e Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Sun, 13 May 2018 23:54:13 +0200 Subject: [PATCH 24/29] Some flags from main.py is gone, rolls now allow a face_value of 0 yet again and it is possible to play against the ai. There is no flag for this yet, so this has to be added. --- board.py | 59 +++++++++++++++------- bot.py | 84 ++++++++++++++++++++----------- main.py | 18 +++---- network.py | 131 ++++++++++++++++++++++++++++-------------------- network_test.py | 20 ++++---- player.py | 64 +++++++++++++++++------ 6 files changed, 244 insertions(+), 132 deletions(-) diff --git a/board.py b/board.py index 38693c3..ede4b87 100644 --- a/board.py +++ b/board.py @@ -170,12 +170,27 @@ class Board: @staticmethod - def apply_moves_to_board(board, player, moves): - for move in moves: - from_idx, to_idx = move.split("/") - board[int(from_idx)] -= int(player) - board[int(to_idx)] += int(player) - return board + def apply_moves_to_board(board, player, move): + from_idx = move[0] + to_idx = move[1] + board = list(board) + board[from_idx] -= player + + if (to_idx < 1 or to_idx > 24): + return + + if (board[to_idx] * player == -1): + + if (player == 1): + board[25] -= player + else: + board[0] -= player + + board[to_idx] = 0 + + board[to_idx] += player + + return tuple(board) @staticmethod def calculate_legal_states(board, player, roll): @@ -186,6 +201,8 @@ class Board: # turn and then do something with the second die def calc_moves(board, face_value): + if face_value == 0: + return [board] return quack.calc_moves(board, player, face_value) # Problem with cal_moves: Method can return empty list (should always contain at least same board). @@ -200,26 +217,32 @@ class Board: if not Board.any_move_valid(board, player, roll): return { board } dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4] + #print("Permuts:",dice_permutations) # print("Dice permuts:",dice_permutations) for roll in dice_permutations: # Calculate boards resulting from first move #print("initial board: ", board) #print("roll:", roll) + #print("Rest of roll:",roll[1:]) boards = calc_moves(board, roll[0]) + #print("Boards:",boards) + #print("Roll:",roll[0]) #print("boards after first die: ", boards) for die in roll[1:]: - # Calculate boards resulting from second move - nested_boards = [calc_moves(board, die) for board in boards] - #print("nested boards: ", nested_boards) - boards = [board for boards in nested_boards for board in boards] - # What the fuck - #for board in boards: - # print(board) - # print("type__:",type(board)) - # Add resulting unique boards to set of legal boards resulting from roll + # if die != 0: + if True: + # Calculate boards resulting from second move + nested_boards = [calc_moves(board, die) for board in boards] + #print("nested boards: ", nested_boards) + boards = [board for boards in nested_boards for board in boards] + # What the fuck + #for board in boards: + # print(board) + # print("type__:",type(board)) + # Add resulting unique boards to set of legal boards resulting from roll - #print("printing boards from calculate_legal_states: ", boards) + #print("printing boards from calculate_legal_states: ", boards) legal_moves = legal_moves | set(boards) # print("legal moves: ", legal_moves) if len(legal_moves) == 0: @@ -245,9 +268,9 @@ class Board: return """ 13 14 15 16 17 18 19 20 21 22 23 24 +--------------------------------------------------------------------------+ -| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO| +| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end 1: TODO| |---|---|---|---|---|---|------------|---|---|---|---|---|---| | -| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end 1: TODO| +| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO| +--------------------------------------------------------------------------+ 12 11 10 9 8 7 6 5 4 3 2 1 """.format(*temp) diff --git a/bot.py b/bot.py index 297f203..d1d74a6 100644 --- a/bot.py +++ b/bot.py @@ -1,24 +1,8 @@ -from cup import Cup -from network import Network from board import Board -import tensorflow as tf -import numpy as np -import random - class Bot: - def __init__(self, sym, config = None, name = "unnamed"): - self.config = config - self.cup = Cup() + def __init__(self, sym): self.sym = sym - self.graph = tf.Graph() - - self.network = Network(config, name) - self.network.restore_model() - - def restore_model(self): - with self.graph.as_default(): - self.network.restore_model() def get_session(self): return self.session @@ -26,16 +10,60 @@ class Bot: def get_sym(self): return self.sym - def get_network(self): - return self.network - # TODO: DEPRECATE - def make_move(self, board, sym, roll): - # print(Board.pretty(board)) - legal_moves = Board.calculate_legal_states(board, sym, roll) - moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ] - scores = [ x[1] for x in moves_and_scores ] - best_move_pair = moves_and_scores[np.array(scores).argmax()] - #print("Found the best state, being:", np.array(move_scores).argmax()) - return best_move_pair + def calc_move_sets(self, from_board, roll, player): + board = from_board + sets = [] + total = 0 + print("board!:",board) + for r in roll: + # print("Value of r:",r) + sets.append([Board.calculate_legal_states(board, player, [r,0]), r]) + total += r + sets.append([Board.calculate_legal_states(board, player, [total,0]), total]) + return sets + + + def handle_move(self, from_board, to_board, roll, player): + + # print("Cur board:",board) + sets = self.calc_move_sets(from_board, roll, player) + for idx, board_set in enumerate(sets): + board_set[0] = list(board_set[0]) + # print("My board_set:",board_set) + if to_board in [list(c) for c in board_set[0]]: + self.total_moves -= board_set[1] + if idx < 2: + # print("Roll object:",self.roll) + self.roll[idx] = 0 + else: + self.roll = [0,0] + break + print("Total moves left:",self.total_moves) + + + def tmp_name(self, from_board, to_board, roll, player, total_moves): + sets = self.calc_move_sets(from_board, roll, player) + return_board = from_board + for idx, board_set in enumerate(sets): + board_set = list(board_set[0]) + if to_board in [list(board) for board in board_set]: + total_moves -= board_set[1] + # if it's not the sum of the moves + if idx < 2: + roll[idx] = 0 + else: + roll = [0,0] + return_board = to_board + break + return total_moves, roll, return_board + + def make_human_move(self, board, player, roll): + total_moves = roll[0] + roll[1] + previous_board = board + while total_moves != 0: + move = input("Pick a move!\n") + to_board = Board.apply_moves_to_board(previous_board, player, move) + total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves) + diff --git a/main.py b/main.py index 0631df3..a5fbf47 100644 --- a/main.py +++ b/main.py @@ -31,12 +31,8 @@ parser.add_argument('--train-perpetually', action='store_true', help='start new training session as soon as the previous is finished') parser.add_argument('--list-models', action='store_true', help='list all known models') -parser.add_argument('--force-creation', action='store_true', - help='force model creation if model does not exist') parser.add_argument('--board-rep', action='store', dest='board_rep', help='name of board representation to use as input to neural network') -parser.add_argument('--use-baseline', action='store_true', - help='use the baseline model, note, has size 28') parser.add_argument('--verbose', action='store_true', help='If set, a lot of stuff will be printed') parser.add_argument('--ply', action='store', dest='ply', default='0', @@ -46,9 +42,6 @@ parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default args = parser.parse_args() -if args.model == "baseline_model": - print("Model name 'baseline_model' not allowed") - exit() config = { 'model': args.model, @@ -64,8 +57,6 @@ config = { 'model_storage_path': 'models', 'bench_storage_path': 'bench', 'board_representation': args.board_rep, - 'force_creation': args.force_creation, - 'use_baseline': args.use_baseline, 'global_step': 0, 'verbose': args.verbose, 'ply': args.ply, @@ -87,6 +78,14 @@ if not os.path.isdir(log_path): os.mkdir(log_path) +def save_config(): + import yaml + # checkpoint_path = os.path.join(config['model_storage_path'], config['model']) + # config_path = os.path.join(checkpoint_path, 'config') + # with open(config_path, 'a+') as f: + # print("lol") + print(yaml.dump(config)) + # Define helper functions def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")): format_vars = { 'trained_eps': trained_eps, @@ -173,6 +172,7 @@ if __name__ == "__main__": # Set up network from network import Network + save_config() # Set up variables episode_count = config['episode_count'] diff --git a/network.py b/network.py index ad8e27a..00e405e 100644 --- a/network.py +++ b/network.py @@ -9,6 +9,7 @@ from eval import Eval import glob from operator import itemgetter import tensorflow.contrib.eager as tfe +from player import Player class Network: # board_features_quack has size 28 @@ -562,6 +563,28 @@ class Network: return outcomes + def play_against_network(self): + self.restore_model() + human_player = Player(-1) + cur_player = 1 + player = 1 + board = Board.initial_state + i = 0 + while Board.outcome(board) is None: + print(Board.pretty(board)) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + print("Bot rolled:", roll) + + board, _ = self.make_move(board, roll, player) + print(Board.pretty(board)) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + print("You rolled:", roll) + board = human_player.make_human_move(board, roll) + print("DONE "*10) + print(Board.pretty(board)) + + + def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): """ @@ -570,79 +593,79 @@ class Network: :param trained_eps: :return: """ - with tf.Session() as sess: - difference_in_vals = 0 - self.restore_model() + difference_in_vals = 0 - start_time = time.time() + self.restore_model() - def print_time_estimate(eps_completed): - cur_time = time.time() - time_diff = cur_time - start_time - eps_per_sec = eps_completed / time_diff - secs_per_ep = time_diff / eps_completed - eps_remaining = (episodes - eps_completed) - sys.stderr.write( - "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) - sys.stderr.write( - "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( - eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) + start_time = time.time() - sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) - outcomes = [] - for episode in range(1, episodes + 1): + def print_time_estimate(eps_completed): + cur_time = time.time() + time_diff = cur_time - start_time + eps_per_sec = eps_completed / time_diff + secs_per_ep = time_diff / eps_completed + eps_remaining = (episodes - eps_completed) + sys.stderr.write( + "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) + sys.stderr.write( + "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( + eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) - sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) - # TODO decide which player should be here + sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) + outcomes = [] + for episode in range(1, episodes + 1): - player = 1 - prev_board = Board.initial_state - i = 0 - while Board.outcome(prev_board) is None: - i += 1 - self.global_step += 1 + sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) + # TODO decide which player should be here + + player = 1 + prev_board = Board.initial_state + i = 0 + while Board.outcome(prev_board) is None: + i += 1 + self.global_step += 1 - cur_board, cur_board_value = self.make_move(prev_board, - (random.randrange(1, 7), random.randrange(1, 7)), - player) + cur_board, cur_board_value = self.make_move(prev_board, + (random.randrange(1, 7), random.randrange(1, 7)), + player) - difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player)))) + difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player)))) - if self.config['verbose']: - print("Difference in values:", difference_in_vals) - print("Current board value :", cur_board_value) - print("Current board is :\n",cur_board) + if self.config['verbose']: + print("Difference in values:", difference_in_vals) + print("Current board value :", cur_board_value) + print("Current board is :\n",cur_board) - # adjust weights - if Board.outcome(cur_board) is None: - self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value) - player *= -1 + # adjust weights + if Board.outcome(cur_board) is None: + self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value) + player *= -1 - prev_board = cur_board + prev_board = cur_board - final_board = prev_board - sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i)) - outcomes.append(Board.outcome(final_board)[1]) - final_score = np.array([Board.outcome(final_board)[1]]) - scaled_final_score = ((final_score + 2) / 4) + final_board = prev_board + sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i)) + outcomes.append(Board.outcome(final_board)[1]) + final_score = np.array([Board.outcome(final_board)[1]]) + scaled_final_score = ((final_score + 2) / 4) - self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) + self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) - sys.stderr.write("\n") + sys.stderr.write("\n") - if episode % min(save_step_size, episodes) == 0: - sys.stderr.write("[TRAIN] Saving model...\n") - self.save_model(episode + trained_eps) + if episode % min(save_step_size, episodes) == 0: + sys.stderr.write("[TRAIN] Saving model...\n") + self.save_model(episode + trained_eps) - if episode % 50 == 0: - print_time_estimate(episode) + if episode % 50 == 0: + print_time_estimate(episode) - sys.stderr.write("[TRAIN] Saving model for final episode...\n") - self.save_model(episode+trained_eps) + sys.stderr.write("[TRAIN] Saving model for final episode...\n") + self.save_model(episode+trained_eps) - return outcomes, difference_in_vals[0][0] + return outcomes, difference_in_vals[0][0] diff --git a/network_test.py b/network_test.py index a4d8dda..1bcb878 100644 --- a/network_test.py +++ b/network_test.py @@ -9,8 +9,8 @@ from board import Board import main config = main.config.copy() -config['model'] = "eager_testings" -config['force_creation'] = True +config['model'] = "player_testings" +config['ply'] = "1" config['board_representation'] = 'quack-fat' network = Network(config, config['model']) @@ -40,19 +40,21 @@ boards = {initial_state, -board = network.board_trans_func(Board.initial_state, 1) +# board = network.board_trans_func(Board.initial_state, 1) -pair = network.make_move(Board.initial_state, [3,2], 1) +# pair = network.make_move(Board.initial_state, [3,2], 1) -print(pair[1]) +# print(pair[1]) -network.do_backprop(board, 0.9) +# network.do_backprop(board, 0.9) -network.print_variables() +# network.print_variables() -network.save_model(2) +# network.save_model(2) -print(network.calculate_1_ply(Board.initial_state, [3,2], 1)) \ No newline at end of file +# print(network.calculate_1_ply(Board.initial_state, [3,2], 1)) + +network.play_against_network() \ No newline at end of file diff --git a/player.py b/player.py index 596449f..4208cdd 100644 --- a/player.py +++ b/player.py @@ -11,19 +11,55 @@ class Player: def get_sym(self): return self.sym - def make_move(self, board, sym, roll): - print(Board.pretty(board)) - legal_moves = Board.calculate_legal_states(board, sym, roll) - if roll[0] == roll[1]: - print("Example of move: 4/6,6/8,12/14,13/15") - else: - print("Example of move: 4/6,13/17") + def calc_move_sets(self, from_board, roll, player): + board = from_board + sets = [] + total = 0 + for r in roll: + # print("Value of r:",r) + sets.append([Board.calculate_legal_states(board, player, [r,0]), r]) + total += r + sets.append([Board.calculate_legal_states(board, player, [total,0]), total]) + return sets - user_moves = input("Enter your move: ").strip().split(",") - board = Board.apply_moves_to_board(board, sym, user_moves) - while board not in legal_moves: - print("Move is invalid, please enter a new move") - user_moves = input("Enter your move: ").strip().split(",") - board = Board.apply_moves_to_board(board, sym, user_moves) - return board + def tmp_name(self, from_board, to_board, roll, player, total_moves): + sets = self.calc_move_sets(from_board, roll, player) + return_board = from_board + for idx, board_set in enumerate(sets): + + board_set[0] = list(board_set[0]) + print(to_board) + print(board_set) + if to_board in board_set[0]: + total_moves -= board_set[1] + # if it's not the sum of the moves + if idx < 2: + roll[idx] = 0 + else: + roll = [0,0] + return_board = to_board + break + return total_moves, roll, return_board + + def make_human_move(self, board, roll): + total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4 + move = "" + while total_moves != 0: + while True: + print("You have {roll} left!".format(roll=total_moves)) + move = input("Pick a move!\n") + pot_move = move.split("/") + if len(pot_move) == 2: + try: + pot_move[0] = int(pot_move[0]) + pot_move[1] = int(pot_move[1]) + move = pot_move + break; + except TypeError: + print("The correct syntax is: 2/5 for a move from index 2 to 5.") + + to_board = Board.apply_moves_to_board(board, self.get_sym(), move) + total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves) + print(Board.pretty(board)) + return board \ No newline at end of file From 00974b0f11228b53166a6a945ec77c7da657aa87 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Mon, 14 May 2018 13:07:48 +0200 Subject: [PATCH 25/29] Added '--play' flag, so you can now play against the ai. --- board.py | 22 ++++++++++------------ main.py | 5 ++++- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/board.py b/board.py index ede4b87..0233cf2 100644 --- a/board.py +++ b/board.py @@ -230,19 +230,17 @@ class Board: #print("boards after first die: ", boards) for die in roll[1:]: - # if die != 0: - if True: - # Calculate boards resulting from second move - nested_boards = [calc_moves(board, die) for board in boards] - #print("nested boards: ", nested_boards) - boards = [board for boards in nested_boards for board in boards] - # What the fuck - #for board in boards: - # print(board) - # print("type__:",type(board)) - # Add resulting unique boards to set of legal boards resulting from roll + # Calculate boards resulting from second move + nested_boards = [calc_moves(board, die) for board in boards] + #print("nested boards: ", nested_boards) + boards = [board for boards in nested_boards for board in boards] + # What the fuck + #for board in boards: + # print(board) + # print("type__:",type(board)) + # Add resulting unique boards to set of legal boards resulting from roll - #print("printing boards from calculate_legal_states: ", boards) + #print("printing boards from calculate_legal_states: ", boards) legal_moves = legal_moves | set(boards) # print("legal moves: ", legal_moves) if len(legal_moves) == 0: diff --git a/main.py b/main.py index a5fbf47..53b0444 100644 --- a/main.py +++ b/main.py @@ -205,7 +205,10 @@ if __name__ == "__main__": if not config['train_perpetually']: break - + elif args.play: + network = Network(config, config['model']) + network.play_against_network() + elif args.eval: network = Network(config, config['model']) for i in range(int(config['repeat_eval'])): From 260c32d9098308aa7fe49eb1a6e8f752d43dfe38 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Tue, 15 May 2018 18:16:44 +0200 Subject: [PATCH 26/29] oiuhhiu --- network.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/network.py b/network.py index 00e405e..f299e50 100644 --- a/network.py +++ b/network.py @@ -312,7 +312,7 @@ class Network: # start = time.time() list_of_moves = [] - + test_list = [] # Prepping of data for idx, board in enumerate(boards): all_board_moves = [] @@ -321,16 +321,41 @@ class Network: for state in all_states: state = np.array(self.board_trans_func(state, player*-1)[0]) all_board_moves.append(state) + test_list.append(state) list_of_moves.append(np.array(all_board_moves)) - # print(time.time() - start) - # start = time.time() + list_of_lengths = [len(board) for board in list_of_moves] - # Running data through networks + start = time.time() + for i in range(len(test_list)): + self.model.predict_on_batch(np.array([state])) + print("Indiviual rolls:", time.time() - start) all_scores = [self.model.predict_on_batch(board) for board in list_of_moves] + + + start = time.time() + all_scores_legit = self.model.predict_on_batch(np.array(test_list)) + + split_scores = [] + from_idx = 0 + for length in list_of_lengths: + split_scores.append(all_scores_legit[from_idx:from_idx+length]) + from_idx += length + + transformed_splits = [tf.reduce_mean(scores) for scores in split_scores] + + print(transformed_splits) + + + + + print("All in one:", time.time() - start) + scores_means = [tf.reduce_mean(score) for score in all_scores] + print(scores_means) + transformed_means = [x if player == 1 else (1-x) for x in scores_means] # print(time.time() - start) From a77c13a0a47e433ec3c3d69e53d70da9cd11167b Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Tue, 15 May 2018 19:29:27 +0200 Subject: [PATCH 27/29] 1-ply runs even faster. --- network.py | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/network.py b/network.py index f299e50..9405924 100644 --- a/network.py +++ b/network.py @@ -327,12 +327,6 @@ class Network: list_of_lengths = [len(board) for board in list_of_moves] - start = time.time() - for i in range(len(test_list)): - self.model.predict_on_batch(np.array([state])) - print("Indiviual rolls:", time.time() - start) - all_scores = [self.model.predict_on_batch(board) for board in list_of_moves] - start = time.time() all_scores_legit = self.model.predict_on_batch(np.array(test_list)) @@ -343,23 +337,10 @@ class Network: split_scores.append(all_scores_legit[from_idx:from_idx+length]) from_idx += length - transformed_splits = [tf.reduce_mean(scores) for scores in split_scores] + means_splits = [tf.reduce_mean(scores) for scores in split_scores] + transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits] - print(transformed_splits) - - - - - print("All in one:", time.time() - start) - - scores_means = [tf.reduce_mean(score) for score in all_scores] - - print(scores_means) - - transformed_means = [x if player == 1 else (1-x) for x in scores_means] - - # print(time.time() - start) - return ([scores_means, transformed_means]) + return ([means_splits, transformed_means_splits]) def calc_n_ply(self, n_init, sess, board, player, roll): From 90fad334b917055aab543f03133d3c10ee1751f1 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Tue, 15 May 2018 23:37:35 +0200 Subject: [PATCH 28/29] More optimizations. --- network.py | 48 ++++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/network.py b/network.py index 9405924..d61f458 100644 --- a/network.py +++ b/network.py @@ -292,55 +292,47 @@ class Network: to this function. """ - import time + all_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), + (1, 6), (2, 2), (2, 3), (2, 4), (2, 5), + (2, 6), (3, 3), (3, 4), (3, 5), (3, 6), + (4, 4), (4, 5), (4, 6), (5, 5), (5, 6), + (6, 6) ] - def gen_21_rolls(): - """ - Calculate all possible rolls, [[1,1], [1,2] ..] - :return: All possible rolls - """ - a = [] - for x in range(1, 7): - for y in range(1, 7): - if not [x, y] in a and not [y, x] in a: - a.append([x, y]) - - return a - - all_rolls = gen_21_rolls() # start = time.time() - list_of_moves = [] + # print("/"*50) + length_list = [] test_list = [] # Prepping of data - for idx, board in enumerate(boards): - all_board_moves = [] + start= time.time() + for board in boards: + length = 0 for roll in all_rolls: all_states = list(Board.calculate_legal_states(board, player*-1, roll)) for state in all_states: state = np.array(self.board_trans_func(state, player*-1)[0]) - all_board_moves.append(state) test_list.append(state) - list_of_moves.append(np.array(all_board_moves)) - - - list_of_lengths = [len(board) for board in list_of_moves] + length += 1 + length_list.append(length) + # print(time.time() - start) start = time.time() + all_scores_legit = self.model.predict_on_batch(np.array(test_list)) split_scores = [] from_idx = 0 - for length in list_of_lengths: + for length in length_list: split_scores.append(all_scores_legit[from_idx:from_idx+length]) from_idx += length means_splits = [tf.reduce_mean(scores) for scores in split_scores] transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits] + # print(time.time() - start) - return ([means_splits, transformed_means_splits]) + return ([means_sp5lits, transformed_means_splits]) def calc_n_ply(self, n_init, sess, board, player, roll): @@ -570,6 +562,10 @@ class Network: def play_against_network(self): + """ + Allows you to play against a supplied model. + :return: + """ self.restore_model() human_player = Player(-1) cur_player = 1 @@ -593,7 +589,7 @@ class Network: def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): """ - + Train a model to by self-learning. :param episodes: :param save_step_size: :param trained_eps: From 3e379b40c415c674882312ae23ff7371732303cb Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Wed, 16 May 2018 00:20:54 +0200 Subject: [PATCH 29/29] Accidentally added a '5' in the middle of a variable. --- network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/network.py b/network.py index d61f458..381197b 100644 --- a/network.py +++ b/network.py @@ -332,7 +332,7 @@ class Network: transformed_means_splits = [x if player == 1 else (1-x) for x in means_splits] # print(time.time() - start) - return ([means_sp5lits, transformed_means_splits]) + return ([means_splits, transformed_means_splits]) def calc_n_ply(self, n_init, sess, board, player, roll):