parametric board representation in network

This commit is contained in:
Christoffer Müller Madsen 2018-03-28 12:00:47 +02:00
parent abce56dd40
commit fda2c6e08d
3 changed files with 38 additions and 25 deletions

View File

@ -35,14 +35,16 @@ class Board:
board.append(-15 - sum(negatives))
return tuple(board)
# quack
@staticmethod
def board_features_to_slimmed_down_own(board, player):
def board_features_quack(board, player):
board = list(board)
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
return np.array(board).reshape(1, -1)
# quack-fat
@staticmethod
def board_features_to_own(board, player):
def board_features_quack_fat(board, player):
board = list(board)
positives = [x if x > 0 else 0 for x in board]
negatives = [x if x < 0 else 0 for x in board]
@ -52,8 +54,9 @@ class Board:
return np.array(board).reshape(1,-1)
# tesauro
@staticmethod
def board_features_to_tesauro(board, cur_player):
def board_features_tesauro(board, cur_player):
features = []
for player in [-1,1]:
sum = 0.0

View File

@ -46,7 +46,8 @@ config = {
'start_episode': args.start_episode,
'train_perpetually': args.train_perpetually,
'model_storage_path': 'models',
'bench_storage_path': 'bench'
'bench_storage_path': 'bench',
'board_representation': 'quack'
}
# Create models folder

View File

@ -10,15 +10,15 @@ from eval import Eval
class Network:
hidden_size = 40
input_size = 30
output_size = 1
# Can't remember the best learning_rate, look this up
learning_rate = 0.01
# board_features_to_own has size 30
# board_features_to_tesauro has size 198
# board_features_to_slimmed_down_own has size 28
board_rep = Board.board_features_to_own
# board_features_quack has size 28
# board_features_quack_fat has size 30
# board_features_tesauro has size 198
board_reps = {
'quack-fat' : (30, Board.board_features_quack_fat),
'quack' : (28, Board.board_features_quack),
'tesauro' : (198, Board.board_features_tesauro)
}
def custom_tanh(self, x, name=None):
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
@ -29,6 +29,15 @@ class Network:
self.name = name
# Set board representation from config
self.input_size, self.board_trans_func = Network.board_reps[
self.config['board_representation']
]
self.output_size = 1
self.hidden_size = 40
# Can't remember the best learning_rate, look this up
self.learning_rate = 0.01
# Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path):
@ -38,19 +47,19 @@ class Network:
self.episodes_trained = 0
# input = x
self.x = tf.placeholder('float', [1, Network.input_size], name='input')
self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
self.x = tf.placeholder('float', [1, self.input_size], name='input')
self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
xavier_init = tf.contrib.layers.xavier_initializer()
W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
initializer=xavier_init)
W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
initializer=xavier_init)
b_1 = tf.get_variable("b_1", (Network.hidden_size,),
b_1 = tf.get_variable("b_1", (self.hidden_size,),
initializer=tf.zeros_initializer)
b_2 = tf.get_variable("b_2", (Network.output_size,),
b_2 = tf.get_variable("b_2", (self.output_size,),
initializer=tf.zeros_initializer)
@ -74,7 +83,7 @@ class Network:
with tf.variable_scope('apply_gradients'):
for gradient, trainable_var in zip(gradients, trainable_vars):
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
backprop_calc = Network.learning_rate * difference_in_values * gradient
backprop_calc = self.learning_rate * difference_in_values * gradient
grad_apply = trainable_var.assign_add(backprop_calc)
apply_gradients.append(grad_apply)
@ -148,7 +157,7 @@ class Network:
def make_move(self, sess, board, roll, player):
# print(Board.pretty(board))
legal_moves = Board.calculate_legal_states(board, player, roll)
moves_and_scores = [(move, self.eval_state(sess, Network.board_rep(move, player))) for move in legal_moves]
moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
best_score_index = np.array(scores).argmax()
best_move_pair = moves_and_scores[best_score_index]
@ -367,7 +376,7 @@ class Network:
# adjust weights
sess.run(self.training_op,
feed_dict={self.x: Network.board_rep(prev_board, player),
feed_dict={self.x: self.board_trans_func(prev_board, player),
self.value_next: cur_board_value})
player *= -1
@ -386,7 +395,7 @@ class Network:
with tf.name_scope("final"):
merged = tf.summary.merge_all()
summary, _ = sess.run([merged, self.training_op],
feed_dict={self.x: Network.board_rep(prev_board, player),
feed_dict={self.x: self.board_trans_func(prev_board, player),
self.value_next: scaled_final_score.reshape((1, 1))})
writer.add_summary(summary, episode + trained_eps)