Functioning network using board representation shamelessly ripped from Tesauro
This commit is contained in:
parent
98c9af72e7
commit
006f791727
25
board.py
25
board.py
|
@ -35,6 +35,31 @@ class Board:
|
|||
board.append(-15 - sum(negatives))
|
||||
return tuple(board)
|
||||
|
||||
@staticmethod
|
||||
def board_features_to_tesauro(board, cur_player):
|
||||
features = []
|
||||
for player in [-1,1]:
|
||||
sum = 0.0
|
||||
for board_range in range(1,25):
|
||||
pin = board[board_range]
|
||||
#print("PIIIN:",pin)
|
||||
feature = [0.0]*4
|
||||
if np.sign(pin) == np.sign(player):
|
||||
sum += abs(pin)
|
||||
for i in range(min(abs(pin), 3)):
|
||||
feature[i] = 1
|
||||
if (abs(pin) > 3):
|
||||
feature[3] = (abs(pin)-3)/2
|
||||
features += feature
|
||||
#print("SUUUM:",sum)
|
||||
# Append the amount of men on the bar of the current player divided by 2
|
||||
features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
|
||||
# Calculate how many pieces there must be in the home state and divide it by 15
|
||||
features.append((15 - sum) / 15)
|
||||
features += ([1,0] if np.sign(cur_player) > 0 else [1,0])
|
||||
test = np.array(features).reshape(1,-1)
|
||||
#print("TEST:",test)
|
||||
return test
|
||||
|
||||
|
||||
|
||||
|
|
13
eval.py
13
eval.py
|
@ -2,6 +2,7 @@ from board import Board
|
|||
|
||||
import numpy as np
|
||||
import pubeval
|
||||
import dumbeval
|
||||
|
||||
|
||||
class Eval:
|
||||
|
@ -24,4 +25,16 @@ class Eval:
|
|||
|
||||
return best_move_pair
|
||||
|
||||
@staticmethod
|
||||
def make_dumbeval_move(board, sym, roll):
|
||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
||||
moves_and_scores = [ ( board,
|
||||
dumbeval.eval(False, Board.board_features_to_pubeval(board, sym)))
|
||||
for board
|
||||
in legal_moves ]
|
||||
scores = [ x[1] for x in moves_and_scores ]
|
||||
best_move_pair = moves_and_scores[np.array(scores).argmax()]
|
||||
|
||||
return best_move_pair
|
||||
|
||||
|
||||
|
|
5
game.py
5
game.py
|
@ -23,18 +23,21 @@ class Game:
|
|||
|
||||
def roll(self):
|
||||
return self.cup.roll()
|
||||
|
||||
'''
|
||||
def best_move_and_score(self):
|
||||
roll = self.roll()
|
||||
move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
|
||||
self.board = move_and_val[0]
|
||||
return move_and_val
|
||||
'''
|
||||
|
||||
'''
|
||||
def next_round(self):
|
||||
roll = self.roll()
|
||||
#print(roll)
|
||||
self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0])
|
||||
return self.board
|
||||
'''
|
||||
|
||||
def board_state(self):
|
||||
return self.board
|
||||
|
|
358
network.py
358
network.py
|
@ -8,15 +8,16 @@ import sys
|
|||
import random
|
||||
from eval import Eval
|
||||
|
||||
|
||||
class Network:
|
||||
hidden_size = 40
|
||||
input_size = 26
|
||||
input_size = 198
|
||||
output_size = 1
|
||||
# Can't remember the best learning_rate, look this up
|
||||
learning_rate = 0.05
|
||||
learning_rate = 0.01
|
||||
|
||||
# TODO: Actually compile tensorflow properly
|
||||
#os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
|
||||
# os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
|
||||
|
||||
def custom_tanh(self, x, name=None):
|
||||
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
|
||||
|
@ -51,8 +52,8 @@ class Network:
|
|||
b_2 = tf.get_variable("b_2", (Network.output_size,),
|
||||
initializer=tf.zeros_initializer)
|
||||
|
||||
normalized_input = tf.nn.l2_normalize(self.x)
|
||||
value_after_input = tf.sigmoid(tf.matmul(normalized_input, W_1) + b_1, name='hidden_layer')
|
||||
|
||||
value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
|
||||
|
||||
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
|
||||
|
||||
|
@ -112,23 +113,22 @@ class Network:
|
|||
# implement learning_rate * (difference_in_values) * gradients (the
|
||||
# before-mentioned calculation.
|
||||
|
||||
|
||||
# print("Network is evaluating")
|
||||
#print("eval ({})".format(self.name), state, val, sep="\n")
|
||||
return sess.run(self.value, feed_dict={self.x: state})
|
||||
# print("eval ({})".format(self.name), state, val, sep="\n")
|
||||
|
||||
return sess.run(self.value, feed_dict={self.x: state})
|
||||
|
||||
def save_model(self, sess, episode_count):
|
||||
self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'))
|
||||
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
|
||||
print("[NETWK] ({name}) Saving model to:".format(name = self.name),
|
||||
print("[NETWK] ({name}) Saving model to:".format(name=self.name),
|
||||
os.path.join(self.checkpoint_path, 'model.ckpt'))
|
||||
f.write(str(episode_count) + "\n")
|
||||
|
||||
def restore_model(self, sess):
|
||||
if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
|
||||
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
|
||||
print("[NETWK] ({name}) Restoring model from:".format(name = self.name),
|
||||
print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
|
||||
str(latest_checkpoint))
|
||||
self.saver.restore(sess, latest_checkpoint)
|
||||
variables_names = [v.name for v in tf.trainable_variables()]
|
||||
|
@ -144,18 +144,167 @@ class Network:
|
|||
with open(episode_count_path, 'r') as f:
|
||||
self.config['start_episode'] = int(f.read())
|
||||
|
||||
def make_move(self, sess, board, roll):
|
||||
def make_move(self, sess, board, roll, player):
|
||||
# print(Board.pretty(board))
|
||||
legal_moves = Board.calculate_legal_states(board, 1, roll)
|
||||
moves_and_scores = [ (move, self.eval_state(sess, np.array(move).reshape(1,26))) for move in legal_moves ]
|
||||
scores = [ x[1] for x in moves_and_scores ]
|
||||
legal_moves = Board.calculate_legal_states(board, player, roll)
|
||||
moves_and_scores = [(move, self.eval_state(sess, Board.board_features_to_tesauro(move, player))) for move in legal_moves]
|
||||
scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
|
||||
best_score_index = np.array(scores).argmax()
|
||||
best_move_pair = moves_and_scores[best_score_index]
|
||||
#print("Found the best state, being:", np.array(move_scores).argmax())
|
||||
# print("Found the best state, being:", np.array(move_scores).argmax())
|
||||
return best_move_pair
|
||||
|
||||
def eval(self, trained_eps=0):
|
||||
def do_eval(sess, method, episodes=1000, trained_eps=trained_eps):
|
||||
start_time = time.time()
|
||||
|
||||
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
|
||||
def print_time_estimate(eps_completed):
|
||||
cur_time = time.time()
|
||||
time_diff = cur_time - start_time
|
||||
eps_per_sec = eps_completed / time_diff
|
||||
secs_per_ep = time_diff / eps_completed
|
||||
eps_remaining = (episodes - eps_completed)
|
||||
sys.stderr.write(
|
||||
"[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
|
||||
sys.stderr.write(
|
||||
"[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
|
||||
eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
|
||||
|
||||
sys.stderr.write(
|
||||
"[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
|
||||
|
||||
if method == 'random':
|
||||
outcomes = []
|
||||
"""for i in range(1, episodes + 1):
|
||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||
board = Board.initial_state
|
||||
while Board.outcome(board) is None:
|
||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||
board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0]
|
||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||
board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
|
||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||
outcomes.append(Board.outcome(board)[1])
|
||||
sys.stderr.write("\n")
|
||||
|
||||
if i % 50 == 0:
|
||||
print_time_estimate(i)"""
|
||||
return outcomes
|
||||
elif method == 'pubeval':
|
||||
outcomes = []
|
||||
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
|
||||
# which can be used to get the best move according to pubeval
|
||||
for i in range(1, episodes + 1):
|
||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||
board = Board.initial_state
|
||||
# print("init:", board, sep="\n")
|
||||
while Board.outcome(board) is None:
|
||||
# print("-"*30)
|
||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||
# print(roll)
|
||||
|
||||
# prev_board = tuple(board)
|
||||
board = (self.make_move(sess, board, roll, 1))[0]
|
||||
# print("post p1:", board, sep="\n")
|
||||
|
||||
# print("."*30)
|
||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||
# print(roll)
|
||||
|
||||
# prev_board = tuple(board)
|
||||
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
|
||||
# print("post pubeval:", board, sep="\n")
|
||||
|
||||
# print("*"*30)
|
||||
# print(board)
|
||||
# print("+"*30)
|
||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||
outcomes.append(Board.outcome(board)[1])
|
||||
sys.stderr.write("\n")
|
||||
|
||||
if i % 10 == 0:
|
||||
print_time_estimate(i)
|
||||
|
||||
return outcomes
|
||||
|
||||
elif method == 'dumbeval':
|
||||
outcomes = []
|
||||
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
|
||||
# which can be used to get the best move according to pubeval
|
||||
for i in range(1, episodes + 1):
|
||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||
board = Board.initial_state
|
||||
# print("init:", board, sep="\n")
|
||||
while Board.outcome(board) is None:
|
||||
# print("-"*30)
|
||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||
# print(roll)
|
||||
|
||||
# prev_board = tuple(board)
|
||||
board = (self.make_move(sess, board, roll, 1))[0]
|
||||
# print("post p1:", board, sep="\n")
|
||||
|
||||
# print("."*30)
|
||||
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||
# print(roll)
|
||||
|
||||
# prev_board = tuple(board)
|
||||
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
|
||||
# print("post pubeval:", board, sep="\n")
|
||||
|
||||
# print("*"*30)
|
||||
# print(board)
|
||||
# print("+"*30)
|
||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||
outcomes.append(Board.outcome(board)[1])
|
||||
sys.stderr.write("\n")
|
||||
|
||||
if i % 10 == 0:
|
||||
print_time_estimate(i)
|
||||
|
||||
return outcomes
|
||||
|
||||
elif method == 'dumbmodel':
|
||||
outcomes = []
|
||||
"""
|
||||
config_prime = self.config.copy()
|
||||
config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
|
||||
eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
|
||||
#print(self.config, "\n", config_prime)
|
||||
outcomes = []
|
||||
for i in range(1, episodes + 1):
|
||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||
board = Board.initial_state
|
||||
while Board.outcome(board) is None:
|
||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
board = (self.make_move(board, self.p1.get_sym(), roll))[0]
|
||||
|
||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
|
||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||
outcomes.append(Board.outcome(board)[1])
|
||||
sys.stderr.write("\n")
|
||||
|
||||
if i % 50 == 0:
|
||||
print_time_estimate(i)
|
||||
"""
|
||||
return outcomes
|
||||
else:
|
||||
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
|
||||
return [0]
|
||||
|
||||
with tf.Session() as session:
|
||||
session.run(tf.global_variables_initializer())
|
||||
self.restore_model(session)
|
||||
outcomes = [(method, do_eval(session,
|
||||
method,
|
||||
self.config['episode_count'],
|
||||
trained_eps=trained_eps))
|
||||
for method
|
||||
in self.config['eval_methods']]
|
||||
return outcomes
|
||||
|
||||
def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
|
||||
with tf.Session() as sess:
|
||||
writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
|
||||
|
||||
|
@ -177,21 +326,30 @@ class Network:
|
|||
eps_per_sec = eps_completed / time_diff
|
||||
secs_per_ep = time_diff / eps_completed
|
||||
eps_remaining = (episodes - eps_completed)
|
||||
sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
|
||||
sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
|
||||
|
||||
sys.stderr.write(
|
||||
"[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
|
||||
sys.stderr.write(
|
||||
"[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
|
||||
eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
|
||||
|
||||
sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
|
||||
outcomes = []
|
||||
for episode in range(1, episodes + 1):
|
||||
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
|
||||
# TODO decide which player should be here
|
||||
|
||||
|
||||
# TEST
|
||||
#if episode % 1000 == 0:
|
||||
# self.config['eval_methods'] = 'dumbeval'
|
||||
# self.config['episodes'] = 300
|
||||
# outcomes = self.eval(trained_eps)
|
||||
# self.log_eval_outcomes(outcomes, trained_eps=self.episodes_trained)
|
||||
|
||||
#player = random.choice([-1, 1])
|
||||
player = 1
|
||||
|
||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
prev_board, _ = self.make_move(sess, Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
|
||||
if player == -1:
|
||||
prev_board = Board.flip(prev_board)
|
||||
prev_board = Board.initial_state
|
||||
|
||||
# find the best move here, make this move, then change turn as the
|
||||
# first thing inside of the while loop and then call
|
||||
|
@ -199,170 +357,66 @@ class Network:
|
|||
|
||||
# i = 0
|
||||
while Board.outcome(prev_board) is None:
|
||||
# print("-"*30)
|
||||
# print(i)
|
||||
# print(roll)
|
||||
# print(Board.pretty(prev_board))
|
||||
# print("/"*30)
|
||||
# i += 1
|
||||
|
||||
player *= -1
|
||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
#print("PREEEV_BOOOOAAARD:",prev_board)
|
||||
cur_board, cur_board_value = self.make_move(sess,
|
||||
prev_board,
|
||||
(random.randrange(1, 7), random.randrange(1, 7)), player)
|
||||
|
||||
cur_board, cur_board_value = self.make_move(sess, Board.flip(prev_board) if player == -1 else prev_board, roll)
|
||||
if player == -1:
|
||||
cur_board = Board.flip(cur_board)
|
||||
|
||||
# print("cur_board_value:", cur_board_value)
|
||||
#print("The current value:",cur_board_value)
|
||||
|
||||
# adjust weights
|
||||
sess.run(self.training_op,
|
||||
feed_dict = { self.x: np.array(prev_board).reshape((1,26)),
|
||||
self.value_next: cur_board_value })
|
||||
feed_dict={self.x: Board.board_features_to_tesauro(prev_board, player),
|
||||
self.value_next: cur_board_value})
|
||||
|
||||
player *= -1
|
||||
|
||||
|
||||
prev_board = cur_board
|
||||
|
||||
final_board = prev_board
|
||||
sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
|
||||
outcomes.append(Board.outcome(final_board)[1])
|
||||
final_score = np.array([ Board.outcome(final_board)[1] ])
|
||||
final_score = np.array([Board.outcome(final_board)[1]])
|
||||
scaled_final_score = ((final_score + 2) / 4)
|
||||
|
||||
#print("The difference in values:", scaled_final_score - cur_board_value)
|
||||
# print("scaled_final_score",scaled_final_score)
|
||||
|
||||
with tf.name_scope("final"):
|
||||
merged = tf.summary.merge_all()
|
||||
summary, _ = sess.run([merged, self.training_op],
|
||||
feed_dict = { self.x: np.array(prev_board).reshape((1,26)),
|
||||
self.value_next: scaled_final_score.reshape((1, 1)) })
|
||||
feed_dict={self.x: Board.board_features_to_tesauro(prev_board, player),
|
||||
self.value_next: scaled_final_score.reshape((1, 1))})
|
||||
writer.add_summary(summary, episode + trained_eps)
|
||||
|
||||
sys.stderr.write("\n")
|
||||
|
||||
if episode % min(save_step_size, episodes) == 0:
|
||||
sys.stderr.write("[TRAIN] Saving model...\n")
|
||||
self.save_model(sess, episode+trained_eps)
|
||||
self.save_model(sess, episode + trained_eps)
|
||||
|
||||
if episode % 50 == 0:
|
||||
print_time_estimate(episode)
|
||||
|
||||
sys.stderr.write("[TRAIN] Saving model for final episode...\n")
|
||||
self.save_model(sess, episode+trained_eps)
|
||||
self.save_model(sess, episode + trained_eps)
|
||||
|
||||
writer.close()
|
||||
|
||||
return outcomes
|
||||
|
||||
|
||||
# take turn, which finds the best state and picks it, based on the current network
|
||||
# save current state
|
||||
# run training operation (session.run(self.training_op, {x:x, value_next, value_next})), (something which does the backprop, based on the state after having taken a turn, found before, and the state we saved in the beginning and from now we'll save it at the end of the turn
|
||||
# run training operation (session.run(self.training_op, {x:x, value_next, value_next})),
|
||||
# (something which does the backprop, based on the state after having taken a turn,
|
||||
# found before, and the state we saved in the beginning and from now we'll
|
||||
# save it at the end of the turn
|
||||
|
||||
# save the current state again, so we can continue running backprop based on the "previous" turn.
|
||||
|
||||
# NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
|
||||
# NOTE: We need to make a method so that we can take a single turn or at least
|
||||
# just pick the next best move, so we know how to evaluate according to TD-learning.
|
||||
# Right now, our game just continues in a while loop without nothing to stop it!
|
||||
|
||||
|
||||
|
||||
def eval(self, trained_eps = 0):
|
||||
def do_eval(sess, method, episodes = 1000, trained_eps = 0):
|
||||
start_time = time.time()
|
||||
|
||||
def print_time_estimate(eps_completed):
|
||||
cur_time = time.time()
|
||||
time_diff = cur_time - start_time
|
||||
eps_per_sec = eps_completed / time_diff
|
||||
secs_per_ep = time_diff / eps_completed
|
||||
eps_remaining = (episodes - eps_completed)
|
||||
sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
|
||||
sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
|
||||
|
||||
sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
|
||||
|
||||
if method == 'random':
|
||||
outcomes = []
|
||||
for i in range(1, episodes + 1):
|
||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||
board = Board.initial_state
|
||||
while Board.outcome(board) is None:
|
||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0]
|
||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
|
||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||
outcomes.append(Board.outcome(board)[1])
|
||||
sys.stderr.write("\n")
|
||||
|
||||
if i % 50 == 0:
|
||||
print_time_estimate(i)
|
||||
return outcomes
|
||||
elif method == 'pubeval':
|
||||
outcomes = []
|
||||
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
|
||||
for i in range(1, episodes + 1):
|
||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||
board = Board.initial_state
|
||||
#print("init:", board, sep="\n")
|
||||
while Board.outcome(board) is None:
|
||||
#print("-"*30)
|
||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
#print(roll)
|
||||
|
||||
prev_board = tuple(board)
|
||||
board = (self.make_move(sess, board, roll))[0]
|
||||
#print("post p1:", board, sep="\n")
|
||||
|
||||
#print("."*30)
|
||||
roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
#print(roll)
|
||||
|
||||
prev_board = tuple(board)
|
||||
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
|
||||
#print("post pubeval:", board, sep="\n")
|
||||
|
||||
|
||||
#print("*"*30)
|
||||
#print(board)
|
||||
#print("+"*30)
|
||||
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||
outcomes.append(Board.outcome(board)[1])
|
||||
sys.stderr.write("\n")
|
||||
|
||||
if i % 10 == 0:
|
||||
print_time_estimate(i)
|
||||
|
||||
return outcomes
|
||||
# elif method == 'dumbmodel':
|
||||
# config_prime = self.config.copy()
|
||||
# config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
|
||||
# eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
|
||||
# #print(self.config, "\n", config_prime)
|
||||
# outcomes = []
|
||||
# for i in range(1, episodes + 1):
|
||||
# sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||
# board = Board.initial_state
|
||||
# while Board.outcome(board) is None:
|
||||
# roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
# board = (self.make_move(board, self.p1.get_sym(), roll))[0]
|
||||
|
||||
# roll = (random.randrange(1,7), random.randrange(1,7))
|
||||
# board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
|
||||
# sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
|
||||
# outcomes.append(Board.outcome(board)[1])
|
||||
# sys.stderr.write("\n")
|
||||
|
||||
# if i % 50 == 0:
|
||||
# print_time_estimate(i)
|
||||
# return outcomes
|
||||
else:
|
||||
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
|
||||
return [0]
|
||||
|
||||
with tf.Session() as session:
|
||||
session .run(tf.global_variables_initializer())
|
||||
self.restore_model(session)
|
||||
outcomes = [ (method, do_eval(session,
|
||||
method,
|
||||
self.config['episode_count'],
|
||||
trained_eps = trained_eps))
|
||||
for method
|
||||
in self.config['eval_methods'] ]
|
||||
return outcomes
|
||||
|
|
170
pubeval/dumbeval.c
Normal file
170
pubeval/dumbeval.c
Normal file
|
@ -0,0 +1,170 @@
|
|||
#include <Python.h>
|
||||
|
||||
static PyObject* DumbevalError;
|
||||
|
||||
static float x[122];
|
||||
|
||||
static const float wc[122] = {
|
||||
5.6477, 6.316649999999999, 7.05515, 6.65315, 9.3171, 17.9777, 2.0235499999999993, 5.1129500000000005, 7.599200000000001, 9.68525, 3.1762, 8.05335, 16.153499999999998, 8.02445, 10.55345, 15.489600000000001, 10.525199999999998, 16.438850000000002, 12.27405, 9.6362, 12.7152, 13.2859, 1.6932499999999995, 26.79045, 10.521899999999999, 6.79635, 5.28135, 6.2059, 10.2306, 10.5485, 3.6000500000000004, 4.07825, 6.951700000000001, 4.413749999999999, 11.271450000000002, 12.9361, 11.087299999999999, 13.10085, 10.411999999999999, 8.084050000000001, 12.4893, 5.96055, 4.69195, 18.9482, 9.0946, 9.1954, 6.2592, 16.180300000000003, 8.3376, 23.24915, 14.32525, -2.6699000000000006, 19.156, 5.81445, 4.7214, 7.63055, 7.039, 5.88075, 2.00765, 14.596800000000002, 11.5208, -3.79, -3.8541000000000003, 5.358499999999999, 14.4516, 2.49015, 11.284799999999999, 14.1066, 16.2306, 5.82875, 9.34505, 16.13685, 8.1893, 2.93145, 7.83185, 12.86765, 6.90115, 20.07255, 8.93355, -0.12434999999999974, 12.0587, 11.83985, 6.34155, 7.1963, 10.571200000000001, 22.38365, 6.50745, 8.94595, 12.0434, 10.79885, 14.055800000000001, 0.022100000000000453, 10.39255, 4.088850000000001, 3.6421499999999996, 38.1298, 6.8957, 0.9804999999999997, 5.9599, 13.16055, 11.55305, 10.65015, 4.6673, 15.770999999999999, 27.700050000000005, 4.4329, 12.6349, 7.037800000000001, 3.4897, 18.91945, 10.239899999999999, 5.4625, 10.29705, 10.492799999999999, 8.850900000000001, -10.575999999999999, 10.6893, 15.30845, 17.8083, 31.88275, 11.225000000000001, 4.4806};
|
||||
|
||||
static const float wr[122] = {
|
||||
-0.7856, -0.50352, 0.12392, -1.00316, -2.46556, -0.1627, 0.18966, 0.0043, 0.0,
|
||||
0.13681, 1.11245, 0.0, 0.0, -0.02781, -2.77982, 0.0, -0.91035, 0.60015,
|
||||
-1.27266, 0.0, 0.0, 0.0, 0.0, -7.26713, -0.19412, -1.05121, 0.27448, -4.94251,
|
||||
-0.06844, 0.37183, -3.66465, -0.8305, 0.09266, 0.07217, 0.0, 0.29906, -1.26062,
|
||||
0.17405, 0.48302, 2.00366, 0.92321, -0.10839, 1.06349, 0.39521, 3.4204,
|
||||
0.00576, 5.35, 3.8539, -0.09308, 0.17253, 0.13978, 0.2701, -0.52728, 0.88296,
|
||||
0.2252, 0.0, 0.0, -0.12707, 3.05454, 0.31202, -0.88035, -0.01351, 0.0,
|
||||
-3.40177, -0.22082, -0.13022, -0.09795, -2.29847, -12.32252, 0.0, -0.13597,
|
||||
0.12039, 0.85631, 0.0, 0.0, -0.3424, 0.24855, 0.20178, 2.30052, 1.5068,
|
||||
0.0, -0.07456, 5.16874, 0.01418, -1.3464, -1.29506, 0.0, 0.0, -1.40375,
|
||||
0.0, -0.11696, 0.05281, -9.67677, 0.05685, -1.09167, 0.0, 0.0, -2.56906,
|
||||
2.19605, 0.0, 0.68178, -0.08471, 0.0, -2.34631, 1.49549, -2.16183, 0.0,
|
||||
1.16242, 1.08744, -0.1716, 0.25236, 0.13246, -0.37646, 0.0, -2.87401,
|
||||
0.74427, 1.07274, -0.01591, -0.14818, -0.06285, 0.08302, -1.03508
|
||||
};
|
||||
|
||||
void setx(int pos[])
|
||||
{
|
||||
/* sets input vector x[] given board position pos[] */
|
||||
extern float x[];
|
||||
int j, jm1, n;
|
||||
/* initialize */
|
||||
for(j=0;j<122;++j) x[j] = 0.0;
|
||||
|
||||
/* first encode board locations 24-1 */
|
||||
for(j=1;j<=24;++j) {
|
||||
jm1 = j - 1;
|
||||
n = pos[25-j];
|
||||
if(n!=0) {
|
||||
if(n==-1) x[5*jm1+0] = 1.0;
|
||||
if(n==1) x[5*jm1+1] = 1.0;
|
||||
if(n>=2) x[5*jm1+2] = 1.0;
|
||||
if(n==3) x[5*jm1+3] = 1.0;
|
||||
if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0;
|
||||
}
|
||||
}
|
||||
/* encode opponent barmen */
|
||||
x[120] = -(float)(pos[0])/2.0;
|
||||
/* encode computer's menoff */
|
||||
x[121] = (float)(pos[26])/15.0;
|
||||
}
|
||||
|
||||
float dumbeval(int race, int pos[])
|
||||
{
|
||||
/* Backgammon move-selection evaluation function
|
||||
for benchmark comparisons. Computes a linear
|
||||
evaluation function: Score = W * X, where X is
|
||||
an input vector encoding the board state (using
|
||||
a raw encoding of the number of men at each location),
|
||||
and W is a weight vector. Separate weight vectors
|
||||
are used for racing positions and contact positions.
|
||||
Makes lots of obvious mistakes, but provides a
|
||||
decent level of play for benchmarking purposes. */
|
||||
|
||||
/* Provided as a public service to the backgammon
|
||||
programming community by Gerry Tesauro, IBM Research.
|
||||
(e-mail: tesauro@watson.ibm.com) */
|
||||
|
||||
/* The following inputs are needed for this routine:
|
||||
|
||||
race is an integer variable which should be set
|
||||
based on the INITIAL position BEFORE the move.
|
||||
Set race=1 if the position is a race (i.e. no contact)
|
||||
and 0 if the position is a contact position.
|
||||
|
||||
pos[] is an integer array of dimension 28 which
|
||||
should represent a legal final board state after
|
||||
the move. Elements 1-24 correspond to board locations
|
||||
1-24 from computer's point of view, i.e. computer's
|
||||
men move in the negative direction from 24 to 1, and
|
||||
opponent's men move in the positive direction from
|
||||
1 to 24. Computer's men are represented by positive
|
||||
integers, and opponent's men are represented by negative
|
||||
integers. Element 25 represents computer's men on the
|
||||
bar (positive integer), and element 0 represents opponent's
|
||||
men on the bar (negative integer). Element 26 represents
|
||||
computer's men off the board (positive integer), and
|
||||
element 27 represents opponent's men off the board
|
||||
(negative integer). */
|
||||
|
||||
/* Also, be sure to call rdwts() at the start of your
|
||||
program to read in the weight values. Happy hacking] */
|
||||
|
||||
int i;
|
||||
float score;
|
||||
|
||||
if(pos[26]==15) return(99999999.);
|
||||
/* all men off, best possible move */
|
||||
|
||||
setx(pos); /* sets input array x[] */
|
||||
score = 0.0;
|
||||
if(race) { /* use race weights */
|
||||
for(i=0;i<122;++i) score += wr[i]*x[i];
|
||||
}
|
||||
else { /* use contact weights */
|
||||
for(i=0;i<122;++i) score += wc[i]*x[i];
|
||||
}
|
||||
return(score);
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
dumbeval_eval(PyObject *self, PyObject *args) {
|
||||
int race;
|
||||
long numValues;
|
||||
int board[28];
|
||||
float eval_score;
|
||||
|
||||
PyObject* tuple_obj;
|
||||
PyObject* val_obj;
|
||||
|
||||
if (! PyArg_ParseTuple(args, "pO!", &race, &PyTuple_Type, &tuple_obj))
|
||||
return NULL;
|
||||
|
||||
numValues = PyTuple_Size(tuple_obj);
|
||||
|
||||
if (numValues < 0) return NULL;
|
||||
if (numValues != 28) {
|
||||
PyErr_SetString(DumbevalError, "Tuple must have 28 entries");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Iterate over tuple to retreive positions
|
||||
for (int i=0; i<numValues; i++) {
|
||||
val_obj = PyTuple_GetItem(tuple_obj, i);
|
||||
board[i] = PyLong_AsLong(val_obj);
|
||||
}
|
||||
|
||||
eval_score = dumbeval(race, board);
|
||||
return Py_BuildValue("f", eval_score);
|
||||
}
|
||||
|
||||
static PyMethodDef dumbeval_methods[] = {
|
||||
{
|
||||
"eval", dumbeval_eval, METH_VARARGS,
|
||||
"Returns evaluation results for the given board position."
|
||||
},
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
static struct PyModuleDef dumbeval_definition = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"dumbeval",
|
||||
"A Python module that implements Gerald Tesauro's dumbeval function for evaluation backgammon positions.",
|
||||
-1,
|
||||
dumbeval_methods
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC PyInit_dumbeval(void) {
|
||||
PyObject* module;
|
||||
|
||||
module = PyModule_Create(&dumbeval_definition);
|
||||
if (module == NULL)
|
||||
return NULL;
|
||||
|
||||
DumbevalError = PyErr_NewException("dumbeval.error", NULL, NULL);
|
||||
Py_INCREF(DumbevalError);
|
||||
PyModule_AddObject(module, "error", DumbevalError);
|
||||
|
||||
return module;
|
||||
}
|
9
pubeval/setup_dumb.py
Normal file
9
pubeval/setup_dumb.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
from distutils.core import setup, Extension
|
||||
|
||||
dumbeval = Extension('dumbeval',
|
||||
sources = ['dumbeval.c'])
|
||||
|
||||
setup (name = 'dumbeval',
|
||||
version = '0.1',
|
||||
description = 'Dumbeval for Python',
|
||||
ext_modules = [dumbeval])
|
Loading…
Reference in New Issue
Block a user