No longer use n_ply, shit's too slow man.
Added extra logging, now logs the average difference in values between trainings. Also fixed bug with the length of quack-norm. Also added cli argument; use-baseline, if set, the baseline-model will be used.
This commit is contained in:
parent
1db469709a
commit
1f8485f54e
54
board.py
54
board.py
|
@ -62,7 +62,9 @@ class Board:
|
||||||
negatives = [x if x < 0 else 0 for x in board]
|
negatives = [x if x < 0 else 0 for x in board]
|
||||||
board[0] = board[0] / 2
|
board[0] = board[0] / 2
|
||||||
board[25] = board[25] / 2
|
board[25] = board[25] / 2
|
||||||
board = [board[x] / 15 for x in range(1,25)]
|
|
||||||
|
board = [board[x] if x == 0 or 25 else board[x] / 15 for x in range(0, 26)]
|
||||||
|
|
||||||
board.append(15 - sum(positives))
|
board.append(15 - sum(positives))
|
||||||
board.append(-15 - sum(negatives))
|
board.append(-15 - sum(negatives))
|
||||||
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
|
||||||
|
@ -100,31 +102,31 @@ class Board:
|
||||||
return np.array(board_rep).reshape(1,198)
|
return np.array(board_rep).reshape(1,198)
|
||||||
|
|
||||||
|
|
||||||
# @staticmethod
|
@staticmethod
|
||||||
# def board_features_tesauro(board, cur_player):
|
def board_features_tesauro_wrong(board, cur_player):
|
||||||
# features = []
|
features = []
|
||||||
# for player in [-1,1]:
|
for player in [-1,1]:
|
||||||
# sum = 0.0
|
sum = 0.0
|
||||||
# for board_range in range(1,25):
|
for board_range in range(1,25):
|
||||||
# pin = board[board_range]
|
pin = board[board_range]
|
||||||
# #print("PIIIN:",pin)
|
#print("PIIIN:",pin)
|
||||||
# feature = [0.0]*4
|
feature = [0.0]*4
|
||||||
# if np.sign(pin) == np.sign(player):
|
if np.sign(pin) == np.sign(player):
|
||||||
# sum += abs(pin)
|
sum += abs(pin)
|
||||||
# for i in range(min(abs(pin), 3)):
|
for i in range(min(abs(pin), 3)):
|
||||||
# feature[i] = 1
|
feature[i] = 1
|
||||||
# if (abs(pin) > 3):
|
if (abs(pin) > 3):
|
||||||
# feature[3] = (abs(pin)-3)/2
|
feature[3] = (abs(pin)-3)/2
|
||||||
# features += feature
|
features += feature
|
||||||
# #print("SUUUM:",sum)
|
#print("SUUUM:",sum)
|
||||||
# # Append the amount of men on the bar of the current player divided by 2
|
# Append the amount of men on the bar of the current player divided by 2
|
||||||
# features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
|
features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
|
||||||
# # Calculate how many pieces there must be in the home state and divide it by 15
|
# Calculate how many pieces there must be in the home state and divide it by 15
|
||||||
# features.append((15 - sum) / 15)
|
features.append((15 - sum) / 15)
|
||||||
# features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
|
features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
|
||||||
# test = np.array(features).reshape(1,-1)
|
test = np.array(features).reshape(1,-1)
|
||||||
# #print("TEST:",test)
|
#print("TEST:",test)
|
||||||
# return test
|
return test
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
31
main.py
31
main.py
|
@ -33,6 +33,8 @@ parser.add_argument('--list-models', action='store_true',
|
||||||
help='list all known models')
|
help='list all known models')
|
||||||
parser.add_argument('--force-creation', action='store_true',
|
parser.add_argument('--force-creation', action='store_true',
|
||||||
help='force model creation if model does not exist')
|
help='force model creation if model does not exist')
|
||||||
|
parser.add_argument('--use-baseline', action='store_true',
|
||||||
|
help='use the baseline model, note, has size 28')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -53,8 +55,9 @@ config = {
|
||||||
'train_perpetually': args.train_perpetually,
|
'train_perpetually': args.train_perpetually,
|
||||||
'model_storage_path': 'models',
|
'model_storage_path': 'models',
|
||||||
'bench_storage_path': 'bench',
|
'bench_storage_path': 'bench',
|
||||||
'board_representation': 'quack',
|
'board_representation': 'quack-fat',
|
||||||
'force_creation': args.force_creation
|
'force_creation': args.force_creation,
|
||||||
|
'use_baseline': args.use_baseline
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create models folder
|
# Create models folder
|
||||||
|
@ -72,18 +75,26 @@ if not os.path.isdir(log_path):
|
||||||
|
|
||||||
|
|
||||||
# Define helper functions
|
# Define helper functions
|
||||||
def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
|
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
|
||||||
format_vars = { 'trained_eps': trained_eps,
|
format_vars = { 'trained_eps': trained_eps,
|
||||||
'count': len(train_outcome),
|
'count': len(outcome),
|
||||||
'sum': sum(train_outcome),
|
'sum': sum(outcome),
|
||||||
'mean': sum(train_outcome) / len(train_outcome),
|
'mean': sum(outcome) / len(outcome),
|
||||||
'time': int(time.time())
|
'time': int(time.time()),
|
||||||
|
'average_diff_in_vals': diff_in_values/len(outcome)
|
||||||
}
|
}
|
||||||
with open(log_path, 'a+') as f:
|
with open(log_path, 'a+') as f:
|
||||||
f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
|
f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
|
||||||
|
|
||||||
|
|
||||||
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
|
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
|
||||||
|
"""
|
||||||
|
:param outcomes:
|
||||||
|
:param average_diff_in_value:
|
||||||
|
:param trained_eps:
|
||||||
|
:param log_path:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
for outcome in outcomes:
|
for outcome in outcomes:
|
||||||
scores = outcome[1]
|
scores = outcome[1]
|
||||||
format_vars = { 'trained_eps': trained_eps,
|
format_vars = { 'trained_eps': trained_eps,
|
||||||
|
@ -137,9 +148,9 @@ if __name__ == "__main__":
|
||||||
network = Network(config, config['model'])
|
network = Network(config, config['model'])
|
||||||
start_episode = network.episodes_trained
|
start_episode = network.episodes_trained
|
||||||
while True:
|
while True:
|
||||||
train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)
|
train_outcome, diff_in_values = network.train_model(episodes = episode_count, trained_eps = start_episode)
|
||||||
start_episode += episode_count
|
start_episode += episode_count
|
||||||
log_train_outcome(train_outcome, trained_eps = start_episode)
|
log_train_outcome(train_outcome, diff_in_values, trained_eps = start_episode)
|
||||||
if config['eval_after_train']:
|
if config['eval_after_train']:
|
||||||
eval_outcomes = network.eval(trained_eps = start_episode)
|
eval_outcomes = network.eval(trained_eps = start_episode)
|
||||||
log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
|
log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
|
||||||
|
|
42
network.py
42
network.py
|
@ -139,7 +139,7 @@ class Network:
|
||||||
if os.path.isfile(episode_count_path):
|
if os.path.isfile(episode_count_path):
|
||||||
with open(episode_count_path, 'r') as f:
|
with open(episode_count_path, 'r') as f:
|
||||||
self.config['start_episode'] = int(f.read())
|
self.config['start_episode'] = int(f.read())
|
||||||
elif glob.glob(os.path.join(os.path.join(self.config['model_storage_path'], "baseline_model"), 'model.ckpt*.index')):
|
elif self.config['use_baseline'] and glob.glob(os.path.join(os.path.join(self.config['model_storage_path'], "baseline_model"), 'model.ckpt*.index')):
|
||||||
checkpoint_path = os.path.join(self.config['model_storage_path'], "baseline_model")
|
checkpoint_path = os.path.join(self.config['model_storage_path'], "baseline_model")
|
||||||
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
|
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
|
||||||
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
||||||
|
@ -157,7 +157,7 @@ class Network:
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
|
|
||||||
#def make_move(self, sess, board, roll, player):
|
def make_move(self, sess, board, roll, player):
|
||||||
"""
|
"""
|
||||||
Find the best move given a board, roll and a player, by finding all possible states one can go to
|
Find the best move given a board, roll and a player, by finding all possible states one can go to
|
||||||
and then picking the best, by using the network to evaluate each state. The highest score is picked
|
and then picking the best, by using the network to evaluate each state. The highest score is picked
|
||||||
|
@ -169,14 +169,14 @@ class Network:
|
||||||
:param player: Current player
|
:param player: Current player
|
||||||
:return: A pair of the best state to go to, together with the score of that state
|
:return: A pair of the best state to go to, together with the score of that state
|
||||||
"""
|
"""
|
||||||
# legal_moves = Board.calculate_legal_states(board, player, roll)
|
legal_moves = Board.calculate_legal_states(board, player, roll)
|
||||||
# moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
|
moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
|
||||||
# scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
|
scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
|
||||||
# best_score_index = np.array(scores).argmax()
|
best_score_index = np.array(scores).argmax()
|
||||||
# best_move_pair = moves_and_scores[best_score_index]
|
best_move_pair = moves_and_scores[best_score_index]
|
||||||
# return best_move_pair
|
return best_move_pair
|
||||||
|
|
||||||
def make_move(self, sess, board, roll, player, n = 1):
|
def make_move_n_ply(self, sess, board, roll, player, n = 1):
|
||||||
best_pair = self.calc_n_ply(n, sess, board, player, roll)
|
best_pair = self.calc_n_ply(n, sess, board, player, roll)
|
||||||
return best_pair
|
return best_pair
|
||||||
|
|
||||||
|
@ -201,13 +201,7 @@ class Network:
|
||||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
||||||
|
|
||||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
||||||
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
|
best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
|
||||||
|
|
||||||
|
|
||||||
# They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
|
|
||||||
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
|
|
||||||
if player == 1:
|
|
||||||
best_fifteen.reverse()
|
|
||||||
|
|
||||||
best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
|
best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
|
||||||
|
|
||||||
|
@ -228,14 +222,9 @@ class Network:
|
||||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
||||||
|
|
||||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
||||||
sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1))
|
sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
|
||||||
|
|
||||||
|
|
||||||
# They're sorted from smallest to largest, therefore we wan't to reverse if the current player is 1, since
|
|
||||||
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
|
|
||||||
if player == 1:
|
|
||||||
sorted_moves_and_scores.reverse()
|
|
||||||
|
|
||||||
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
|
best_boards = [x[0] for x in sorted_moves_and_scores[:10]]
|
||||||
|
|
||||||
best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
|
best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
|
||||||
|
@ -365,7 +354,7 @@ class Network:
|
||||||
all_rolls = gen_21_rolls()
|
all_rolls = gen_21_rolls()
|
||||||
|
|
||||||
all_rolls_scores = []
|
all_rolls_scores = []
|
||||||
|
count = 0
|
||||||
# loop over boards
|
# loop over boards
|
||||||
for a_board in boards:
|
for a_board in boards:
|
||||||
a_board_scores = []
|
a_board_scores = []
|
||||||
|
@ -375,7 +364,7 @@ class Network:
|
||||||
|
|
||||||
# find all states we can get to, given the board and roll and the opposite player
|
# find all states we can get to, given the board and roll and the opposite player
|
||||||
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
||||||
|
count += len(all_rolls_boards)
|
||||||
# find scores for each board found above
|
# find scores for each board found above
|
||||||
spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
|
spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
|
||||||
for new_board in all_rolls_boards]
|
for new_board in all_rolls_boards]
|
||||||
|
@ -393,6 +382,7 @@ class Network:
|
||||||
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
||||||
|
|
||||||
# return all the average scores
|
# return all the average scores
|
||||||
|
print(count)
|
||||||
return all_rolls_scores
|
return all_rolls_scores
|
||||||
|
|
||||||
|
|
||||||
|
@ -508,6 +498,7 @@ class Network:
|
||||||
|
|
||||||
def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
|
def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
|
||||||
with tf.Session() as sess:
|
with tf.Session() as sess:
|
||||||
|
difference_in_vals = 0
|
||||||
writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
|
writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
|
||||||
|
|
||||||
sess.run(tf.global_variables_initializer())
|
sess.run(tf.global_variables_initializer())
|
||||||
|
@ -552,6 +543,7 @@ class Network:
|
||||||
(random.randrange(1, 7), random.randrange(1, 7)),
|
(random.randrange(1, 7), random.randrange(1, 7)),
|
||||||
player)
|
player)
|
||||||
|
|
||||||
|
difference_in_vals += abs((cur_board_value - self.eval_state(sess, self.board_trans_func(prev_board, player))))
|
||||||
|
|
||||||
|
|
||||||
# adjust weights
|
# adjust weights
|
||||||
|
@ -590,6 +582,6 @@ class Network:
|
||||||
|
|
||||||
writer.close()
|
writer.close()
|
||||||
|
|
||||||
return outcomes
|
return outcomes, difference_in_vals
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user