parametric board representation in network
This commit is contained in:
parent
abce56dd40
commit
fda2c6e08d
9
board.py
9
board.py
|
@ -35,14 +35,16 @@ class Board:
|
||||||
board.append(-15 - sum(negatives))
|
board.append(-15 - sum(negatives))
|
||||||
return tuple(board)
|
return tuple(board)
|
||||||
|
|
||||||
|
# quack
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def board_features_to_slimmed_down_own(board, player):
|
def board_features_quack(board, player):
|
||||||
board = list(board)
|
board = list(board)
|
||||||
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
||||||
return np.array(board).reshape(1, -1)
|
return np.array(board).reshape(1, -1)
|
||||||
|
|
||||||
|
# quack-fat
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def board_features_to_own(board, player):
|
def board_features_quack_fat(board, player):
|
||||||
board = list(board)
|
board = list(board)
|
||||||
positives = [x if x > 0 else 0 for x in board]
|
positives = [x if x > 0 else 0 for x in board]
|
||||||
negatives = [x if x < 0 else 0 for x in board]
|
negatives = [x if x < 0 else 0 for x in board]
|
||||||
|
@ -52,8 +54,9 @@ class Board:
|
||||||
return np.array(board).reshape(1,-1)
|
return np.array(board).reshape(1,-1)
|
||||||
|
|
||||||
|
|
||||||
|
# tesauro
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def board_features_to_tesauro(board, cur_player):
|
def board_features_tesauro(board, cur_player):
|
||||||
features = []
|
features = []
|
||||||
for player in [-1,1]:
|
for player in [-1,1]:
|
||||||
sum = 0.0
|
sum = 0.0
|
||||||
|
|
3
main.py
3
main.py
|
@ -46,7 +46,8 @@ config = {
|
||||||
'start_episode': args.start_episode,
|
'start_episode': args.start_episode,
|
||||||
'train_perpetually': args.train_perpetually,
|
'train_perpetually': args.train_perpetually,
|
||||||
'model_storage_path': 'models',
|
'model_storage_path': 'models',
|
||||||
'bench_storage_path': 'bench'
|
'bench_storage_path': 'bench',
|
||||||
|
'board_representation': 'quack'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create models folder
|
# Create models folder
|
||||||
|
|
47
network.py
47
network.py
|
@ -10,15 +10,15 @@ from eval import Eval
|
||||||
|
|
||||||
|
|
||||||
class Network:
|
class Network:
|
||||||
hidden_size = 40
|
# board_features_quack has size 28
|
||||||
input_size = 30
|
# board_features_quack_fat has size 30
|
||||||
output_size = 1
|
# board_features_tesauro has size 198
|
||||||
# Can't remember the best learning_rate, look this up
|
|
||||||
learning_rate = 0.01
|
board_reps = {
|
||||||
# board_features_to_own has size 30
|
'quack-fat' : (30, Board.board_features_quack_fat),
|
||||||
# board_features_to_tesauro has size 198
|
'quack' : (28, Board.board_features_quack),
|
||||||
# board_features_to_slimmed_down_own has size 28
|
'tesauro' : (198, Board.board_features_tesauro)
|
||||||
board_rep = Board.board_features_to_own
|
}
|
||||||
|
|
||||||
def custom_tanh(self, x, name=None):
|
def custom_tanh(self, x, name=None):
|
||||||
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
|
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
|
||||||
|
@ -29,6 +29,15 @@ class Network:
|
||||||
|
|
||||||
self.name = name
|
self.name = name
|
||||||
|
|
||||||
|
# Set board representation from config
|
||||||
|
self.input_size, self.board_trans_func = Network.board_reps[
|
||||||
|
self.config['board_representation']
|
||||||
|
]
|
||||||
|
self.output_size = 1
|
||||||
|
self.hidden_size = 40
|
||||||
|
# Can't remember the best learning_rate, look this up
|
||||||
|
self.learning_rate = 0.01
|
||||||
|
|
||||||
# Restore trained episode count for model
|
# Restore trained episode count for model
|
||||||
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
|
||||||
if os.path.isfile(episode_count_path):
|
if os.path.isfile(episode_count_path):
|
||||||
|
@ -38,19 +47,19 @@ class Network:
|
||||||
self.episodes_trained = 0
|
self.episodes_trained = 0
|
||||||
|
|
||||||
# input = x
|
# input = x
|
||||||
self.x = tf.placeholder('float', [1, Network.input_size], name='input')
|
self.x = tf.placeholder('float', [1, self.input_size], name='input')
|
||||||
self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
|
self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
|
||||||
|
|
||||||
xavier_init = tf.contrib.layers.xavier_initializer()
|
xavier_init = tf.contrib.layers.xavier_initializer()
|
||||||
|
|
||||||
W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
|
W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
|
||||||
initializer=xavier_init)
|
initializer=xavier_init)
|
||||||
W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
|
W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
|
||||||
initializer=xavier_init)
|
initializer=xavier_init)
|
||||||
|
|
||||||
b_1 = tf.get_variable("b_1", (Network.hidden_size,),
|
b_1 = tf.get_variable("b_1", (self.hidden_size,),
|
||||||
initializer=tf.zeros_initializer)
|
initializer=tf.zeros_initializer)
|
||||||
b_2 = tf.get_variable("b_2", (Network.output_size,),
|
b_2 = tf.get_variable("b_2", (self.output_size,),
|
||||||
initializer=tf.zeros_initializer)
|
initializer=tf.zeros_initializer)
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,7 +83,7 @@ class Network:
|
||||||
with tf.variable_scope('apply_gradients'):
|
with tf.variable_scope('apply_gradients'):
|
||||||
for gradient, trainable_var in zip(gradients, trainable_vars):
|
for gradient, trainable_var in zip(gradients, trainable_vars):
|
||||||
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
|
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
|
||||||
backprop_calc = Network.learning_rate * difference_in_values * gradient
|
backprop_calc = self.learning_rate * difference_in_values * gradient
|
||||||
grad_apply = trainable_var.assign_add(backprop_calc)
|
grad_apply = trainable_var.assign_add(backprop_calc)
|
||||||
apply_gradients.append(grad_apply)
|
apply_gradients.append(grad_apply)
|
||||||
|
|
||||||
|
@ -148,7 +157,7 @@ class Network:
|
||||||
def make_move(self, sess, board, roll, player):
|
def make_move(self, sess, board, roll, player):
|
||||||
# print(Board.pretty(board))
|
# print(Board.pretty(board))
|
||||||
legal_moves = Board.calculate_legal_states(board, player, roll)
|
legal_moves = Board.calculate_legal_states(board, player, roll)
|
||||||
moves_and_scores = [(move, self.eval_state(sess, Network.board_rep(move, player))) for move in legal_moves]
|
moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
|
||||||
scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
|
scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
|
||||||
best_score_index = np.array(scores).argmax()
|
best_score_index = np.array(scores).argmax()
|
||||||
best_move_pair = moves_and_scores[best_score_index]
|
best_move_pair = moves_and_scores[best_score_index]
|
||||||
|
@ -367,7 +376,7 @@ class Network:
|
||||||
|
|
||||||
# adjust weights
|
# adjust weights
|
||||||
sess.run(self.training_op,
|
sess.run(self.training_op,
|
||||||
feed_dict={self.x: Network.board_rep(prev_board, player),
|
feed_dict={self.x: self.board_trans_func(prev_board, player),
|
||||||
self.value_next: cur_board_value})
|
self.value_next: cur_board_value})
|
||||||
|
|
||||||
player *= -1
|
player *= -1
|
||||||
|
@ -386,7 +395,7 @@ class Network:
|
||||||
with tf.name_scope("final"):
|
with tf.name_scope("final"):
|
||||||
merged = tf.summary.merge_all()
|
merged = tf.summary.merge_all()
|
||||||
summary, _ = sess.run([merged, self.training_op],
|
summary, _ = sess.run([merged, self.training_op],
|
||||||
feed_dict={self.x: Network.board_rep(prev_board, player),
|
feed_dict={self.x: self.board_trans_func(prev_board, player),
|
||||||
self.value_next: scaled_final_score.reshape((1, 1))})
|
self.value_next: scaled_final_score.reshape((1, 1))})
|
||||||
writer.add_summary(summary, episode + trained_eps)
|
writer.add_summary(summary, episode + trained_eps)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user