diff --git a/board.py b/board.py index 1b018d9..33303c1 100644 --- a/board.py +++ b/board.py @@ -31,29 +31,32 @@ class Board: board = list(board) positives = [x if x > 0 else 0 for x in board] negatives = [x if x < 0 else 0 for x in board] - board.append(15 - sum(positives)) + board.append( 15 - sum(positives)) board.append(-15 - sum(negatives)) return tuple(board) + # quack @staticmethod - def board_features_to_slimmed_down_own(board, player): + def board_features_quack(board, player): board = list(board) board += ([1, 0] if np.sign(player) > 0 else [0, 1]) return np.array(board).reshape(1, -1) + # quack-fat @staticmethod - def board_features_to_own(board, player): + def board_features_quack_fat(board, player): board = list(board) positives = [x if x > 0 else 0 for x in board] negatives = [x if x < 0 else 0 for x in board] - board.append(15 - sum(positives)) + board.append( 15 - sum(positives)) board.append(-15 - sum(negatives)) board += ([1, 0] if np.sign(player) > 0 else [0, 1]) return np.array(board).reshape(1,-1) + # tesauro @staticmethod - def board_features_to_tesauro(board, cur_player): + def board_features_tesauro(board, cur_player): features = [] for player in [-1,1]: sum = 0.0 diff --git a/main.py b/main.py index b5a8ad0..f6a375e 100644 --- a/main.py +++ b/main.py @@ -46,7 +46,8 @@ config = { 'start_episode': args.start_episode, 'train_perpetually': args.train_perpetually, 'model_storage_path': 'models', - 'bench_storage_path': 'bench' + 'bench_storage_path': 'bench', + 'board_representation': 'quack' } # Create models folder diff --git a/network.py b/network.py index f1f1859..d19f23c 100644 --- a/network.py +++ b/network.py @@ -10,15 +10,15 @@ from eval import Eval class Network: - hidden_size = 40 - input_size = 30 - output_size = 1 - # Can't remember the best learning_rate, look this up - learning_rate = 0.01 - # board_features_to_own has size 30 - # board_features_to_tesauro has size 198 - # board_features_to_slimmed_down_own has size 28 - board_rep = Board.board_features_to_own + # board_features_quack has size 28 + # board_features_quack_fat has size 30 + # board_features_tesauro has size 198 + + board_reps = { + 'quack-fat' : (30, Board.board_features_quack_fat), + 'quack' : (28, Board.board_features_quack), + 'tesauro' : (198, Board.board_features_tesauro) + } def custom_tanh(self, x, name=None): return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) @@ -29,6 +29,15 @@ class Network: self.name = name + # Set board representation from config + self.input_size, self.board_trans_func = Network.board_reps[ + self.config['board_representation'] + ] + self.output_size = 1 + self.hidden_size = 40 + # Can't remember the best learning_rate, look this up + self.learning_rate = 0.01 + # Restore trained episode count for model episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained") if os.path.isfile(episode_count_path): @@ -38,19 +47,19 @@ class Network: self.episodes_trained = 0 # input = x - self.x = tf.placeholder('float', [1, Network.input_size], name='input') - self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next") + self.x = tf.placeholder('float', [1, self.input_size], name='input') + self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next") xavier_init = tf.contrib.layers.xavier_initializer() - W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size), + W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size), initializer=xavier_init) - W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size), + W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size), initializer=xavier_init) - b_1 = tf.get_variable("b_1", (Network.hidden_size,), + b_1 = tf.get_variable("b_1", (self.hidden_size,), initializer=tf.zeros_initializer) - b_2 = tf.get_variable("b_2", (Network.output_size,), + b_2 = tf.get_variable("b_2", (self.output_size,), initializer=tf.zeros_initializer) @@ -74,7 +83,7 @@ class Network: with tf.variable_scope('apply_gradients'): for gradient, trainable_var in zip(gradients, trainable_vars): # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t. - backprop_calc = Network.learning_rate * difference_in_values * gradient + backprop_calc = self.learning_rate * difference_in_values * gradient grad_apply = trainable_var.assign_add(backprop_calc) apply_gradients.append(grad_apply) @@ -148,7 +157,7 @@ class Network: def make_move(self, sess, board, roll, player): # print(Board.pretty(board)) legal_moves = Board.calculate_legal_states(board, player, roll) - moves_and_scores = [(move, self.eval_state(sess, Network.board_rep(move, player))) for move in legal_moves] + moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves] scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores] best_score_index = np.array(scores).argmax() best_move_pair = moves_and_scores[best_score_index] @@ -367,7 +376,7 @@ class Network: # adjust weights sess.run(self.training_op, - feed_dict={self.x: Network.board_rep(prev_board, player), + feed_dict={self.x: self.board_trans_func(prev_board, player), self.value_next: cur_board_value}) player *= -1 @@ -386,7 +395,7 @@ class Network: with tf.name_scope("final"): merged = tf.summary.merge_all() summary, _ = sess.run([merged, self.training_op], - feed_dict={self.x: Network.board_rep(prev_board, player), + feed_dict={self.x: self.board_trans_func(prev_board, player), self.value_next: scaled_final_score.reshape((1, 1))}) writer.add_summary(summary, episode + trained_eps)