parametric board representation in network
This commit is contained in:
parent
abce56dd40
commit
fda2c6e08d
13
board.py
13
board.py
|
@ -31,29 +31,32 @@ class Board:
|
|||
board = list(board)
|
||||
positives = [x if x > 0 else 0 for x in board]
|
||||
negatives = [x if x < 0 else 0 for x in board]
|
||||
board.append(15 - sum(positives))
|
||||
board.append( 15 - sum(positives))
|
||||
board.append(-15 - sum(negatives))
|
||||
return tuple(board)
|
||||
|
||||
# quack
|
||||
@staticmethod
|
||||
def board_features_to_slimmed_down_own(board, player):
|
||||
def board_features_quack(board, player):
|
||||
board = list(board)
|
||||
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
||||
return np.array(board).reshape(1, -1)
|
||||
|
||||
# quack-fat
|
||||
@staticmethod
|
||||
def board_features_to_own(board, player):
|
||||
def board_features_quack_fat(board, player):
|
||||
board = list(board)
|
||||
positives = [x if x > 0 else 0 for x in board]
|
||||
negatives = [x if x < 0 else 0 for x in board]
|
||||
board.append(15 - sum(positives))
|
||||
board.append( 15 - sum(positives))
|
||||
board.append(-15 - sum(negatives))
|
||||
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
||||
return np.array(board).reshape(1,-1)
|
||||
|
||||
|
||||
# tesauro
|
||||
@staticmethod
|
||||
def board_features_to_tesauro(board, cur_player):
|
||||
def board_features_tesauro(board, cur_player):
|
||||
features = []
|
||||
for player in [-1,1]:
|
||||
sum = 0.0
|
||||
|
|
3
main.py
3
main.py
|
@ -46,7 +46,8 @@ config = {
|
|||
'start_episode': args.start_episode,
|
||||
'train_perpetually': args.train_perpetually,
|
||||
'model_storage_path': 'models',
|
||||
'bench_storage_path': 'bench'
|
||||
'bench_storage_path': 'bench',
|
||||
'board_representation': 'quack'
|
||||
}
|
||||
|
||||
# Create models folder
|
||||
|
|
47
network.py
47
network.py
|
@ -10,15 +10,15 @@ from eval import Eval
|
|||
|
||||
|
||||
class Network:
|
||||
hidden_size = 40
|
||||
input_size = 30
|
||||
output_size = 1
|
||||
# Can't remember the best learning_rate, look this up
|
||||
learning_rate = 0.01
|
||||
# board_features_to_own has size 30
|
||||
# board_features_to_tesauro has size 198
|
||||
# board_features_to_slimmed_down_own has size 28
|
||||
board_rep = Board.board_features_to_own
|
||||
# board_features_quack has size 28
|
||||
# board_features_quack_fat has size 30
|
||||
# board_features_tesauro has size 198
|
||||
|
||||
board_reps = {
|
||||
'quack-fat' : (30, Board.board_features_quack_fat),
|
||||
'quack' : (28, Board.board_features_quack),
|
||||
'tesauro' : (198, Board.board_features_tesauro)
|
||||
}
|
||||
|
||||
def custom_tanh(self, x, name=None):
|
||||
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
|
||||
|
@ -29,6 +29,15 @@ class Network:
|
|||
|
||||
self.name = name
|
||||
|
||||
# Set board representation from config
|
||||
self.input_size, self.board_trans_func = Network.board_reps[
|
||||
self.config['board_representation']
|
||||
]
|
||||
self.output_size = 1
|
||||
self.hidden_size = 40
|
||||
# Can't remember the best learning_rate, look this up
|
||||
self.learning_rate = 0.01
|
||||
|
||||
# Restore trained episode count for model
|
||||
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
||||
if os.path.isfile(episode_count_path):
|
||||
|
@ -38,19 +47,19 @@ class Network:
|
|||
self.episodes_trained = 0
|
||||
|
||||
# input = x
|
||||
self.x = tf.placeholder('float', [1, Network.input_size], name='input')
|
||||
self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
|
||||
self.x = tf.placeholder('float', [1, self.input_size], name='input')
|
||||
self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
|
||||
|
||||
xavier_init = tf.contrib.layers.xavier_initializer()
|
||||
|
||||
W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
|
||||
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
|
||||
initializer=xavier_init)
|
||||
W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
|
||||
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
|
||||
initializer=xavier_init)
|
||||
|
||||
b_1 = tf.get_variable("b_1", (Network.hidden_size,),
|
||||
b_1 = tf.get_variable("b_1", (self.hidden_size,),
|
||||
initializer=tf.zeros_initializer)
|
||||
b_2 = tf.get_variable("b_2", (Network.output_size,),
|
||||
b_2 = tf.get_variable("b_2", (self.output_size,),
|
||||
initializer=tf.zeros_initializer)
|
||||
|
||||
|
||||
|
@ -74,7 +83,7 @@ class Network:
|
|||
with tf.variable_scope('apply_gradients'):
|
||||
for gradient, trainable_var in zip(gradients, trainable_vars):
|
||||
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
|
||||
backprop_calc = Network.learning_rate * difference_in_values * gradient
|
||||
backprop_calc = self.learning_rate * difference_in_values * gradient
|
||||
grad_apply = trainable_var.assign_add(backprop_calc)
|
||||
apply_gradients.append(grad_apply)
|
||||
|
||||
|
@ -148,7 +157,7 @@ class Network:
|
|||
def make_move(self, sess, board, roll, player):
|
||||
# print(Board.pretty(board))
|
||||
legal_moves = Board.calculate_legal_states(board, player, roll)
|
||||
moves_and_scores = [(move, self.eval_state(sess, Network.board_rep(move, player))) for move in legal_moves]
|
||||
moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
|
||||
scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
|
||||
best_score_index = np.array(scores).argmax()
|
||||
best_move_pair = moves_and_scores[best_score_index]
|
||||
|
@ -367,7 +376,7 @@ class Network:
|
|||
|
||||
# adjust weights
|
||||
sess.run(self.training_op,
|
||||
feed_dict={self.x: Network.board_rep(prev_board, player),
|
||||
feed_dict={self.x: self.board_trans_func(prev_board, player),
|
||||
self.value_next: cur_board_value})
|
||||
|
||||
player *= -1
|
||||
|
@ -386,7 +395,7 @@ class Network:
|
|||
with tf.name_scope("final"):
|
||||
merged = tf.summary.merge_all()
|
||||
summary, _ = sess.run([merged, self.training_op],
|
||||
feed_dict={self.x: Network.board_rep(prev_board, player),
|
||||
feed_dict={self.x: self.board_trans_func(prev_board, player),
|
||||
self.value_next: scaled_final_score.reshape((1, 1))})
|
||||
writer.add_summary(summary, episode + trained_eps)
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user