From 006f7917279bbee478e22b8504b84b84c47339da Mon Sep 17 00:00:00 2001 From: alex Date: Tue, 27 Mar 2018 02:26:15 +0200 Subject: [PATCH] Functioning network using board representation shamelessly ripped from Tesauro --- board.py | 29 ++- eval.py | 13 ++ game.py | 5 +- network.py | 412 ++++++++++++++++++++++++------------------ pubeval/dumbeval.c | 170 +++++++++++++++++ pubeval/setup_dumb.py | 9 + 6 files changed, 456 insertions(+), 182 deletions(-) create mode 100644 pubeval/dumbeval.c create mode 100644 pubeval/setup_dumb.py diff --git a/board.py b/board.py index bfa7998..a2b205e 100644 --- a/board.py +++ b/board.py @@ -34,8 +34,33 @@ class Board: board.append(15 - sum(positives)) board.append(-15 - sum(negatives)) return tuple(board) - - + + @staticmethod + def board_features_to_tesauro(board, cur_player): + features = [] + for player in [-1,1]: + sum = 0.0 + for board_range in range(1,25): + pin = board[board_range] + #print("PIIIN:",pin) + feature = [0.0]*4 + if np.sign(pin) == np.sign(player): + sum += abs(pin) + for i in range(min(abs(pin), 3)): + feature[i] = 1 + if (abs(pin) > 3): + feature[3] = (abs(pin)-3)/2 + features += feature + #print("SUUUM:",sum) + # Append the amount of men on the bar of the current player divided by 2 + features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0) + # Calculate how many pieces there must be in the home state and divide it by 15 + features.append((15 - sum) / 15) + features += ([1,0] if np.sign(cur_player) > 0 else [1,0]) + test = np.array(features).reshape(1,-1) + #print("TEST:",test) + return test + @staticmethod diff --git a/eval.py b/eval.py index 1d02a4b..7be0098 100644 --- a/eval.py +++ b/eval.py @@ -2,6 +2,7 @@ from board import Board import numpy as np import pubeval +import dumbeval class Eval: @@ -24,4 +25,16 @@ class Eval: return best_move_pair + @staticmethod + def make_dumbeval_move(board, sym, roll): + legal_moves = Board.calculate_legal_states(board, sym, roll) + moves_and_scores = [ ( board, + dumbeval.eval(False, Board.board_features_to_pubeval(board, sym))) + for board + in legal_moves ] + scores = [ x[1] for x in moves_and_scores ] + best_move_pair = moves_and_scores[np.array(scores).argmax()] + + return best_move_pair + diff --git a/game.py b/game.py index 9469b57..443ac41 100644 --- a/game.py +++ b/game.py @@ -23,18 +23,21 @@ class Game: def roll(self): return self.cup.roll() - + ''' def best_move_and_score(self): roll = self.roll() move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll) self.board = move_and_val[0] return move_and_val + ''' + ''' def next_round(self): roll = self.roll() #print(roll) self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0]) return self.board + ''' def board_state(self): return self.board diff --git a/network.py b/network.py index f058d48..8f8ef18 100644 --- a/network.py +++ b/network.py @@ -8,19 +8,20 @@ import sys import random from eval import Eval + class Network: hidden_size = 40 - input_size = 26 + input_size = 198 output_size = 1 # Can't remember the best learning_rate, look this up - learning_rate = 0.05 + learning_rate = 0.01 # TODO: Actually compile tensorflow properly - #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2" + # os.environ["TF_CPP_MIN_LOG_LEVEL"]="2" def custom_tanh(self, x, name=None): return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) - + def __init__(self, config, name): self.config = config self.checkpoint_path = config['model_path'] @@ -34,13 +35,13 @@ class Network: self.episodes_trained = int(f.read()) else: self.episodes_trained = 0 - + # input = x self.x = tf.placeholder('float', [1, Network.input_size], name='input') self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next") xavier_init = tf.contrib.layers.xavier_initializer() - + W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size), initializer=xavier_init) W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size), @@ -51,8 +52,8 @@ class Network: b_2 = tf.get_variable("b_2", (Network.output_size,), initializer=tf.zeros_initializer) - normalized_input = tf.nn.l2_normalize(self.x) - value_after_input = tf.sigmoid(tf.matmul(normalized_input, W_1) + b_1, name='hidden_layer') + + value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer') self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') @@ -63,23 +64,23 @@ class Network: # TODO: Alexander thinks that self.value will be computed twice (instead of once) difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), []) tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) - + trainable_vars = tf.trainable_variables() gradients = tf.gradients(self.value, trainable_vars) - + apply_gradients = [] - + with tf.variable_scope('apply_gradients'): for gradient, trainable_var in zip(gradients, trainable_vars): # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t. backprop_calc = Network.learning_rate * difference_in_values * gradient grad_apply = trainable_var.assign_add(backprop_calc) apply_gradients.append(grad_apply) - + self.training_op = tf.group(*apply_gradients, name='training_op') self.saver = tf.train.Saver(max_to_keep=1) - + def eval_state(self, sess, state): # Run state through a network @@ -112,23 +113,22 @@ class Network: # implement learning_rate * (difference_in_values) * gradients (the # before-mentioned calculation. - # print("Network is evaluating") - #print("eval ({})".format(self.name), state, val, sep="\n") - return sess.run(self.value, feed_dict={self.x: state}) + # print("eval ({})".format(self.name), state, val, sep="\n") + return sess.run(self.value, feed_dict={self.x: state}) def save_model(self, sess, episode_count): self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt')) with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: - print("[NETWK] ({name}) Saving model to:".format(name = self.name), + print("[NETWK] ({name}) Saving model to:".format(name=self.name), os.path.join(self.checkpoint_path, 'model.ckpt')) f.write(str(episode_count) + "\n") - + def restore_model(self, sess): if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')): latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) - print("[NETWK] ({name}) Restoring model from:".format(name = self.name), + print("[NETWK] ({name}) Restoring model from:".format(name=self.name), str(latest_checkpoint)) self.saver.restore(sess, latest_checkpoint) variables_names = [v.name for v in tf.trainable_variables()] @@ -144,24 +144,173 @@ class Network: with open(episode_count_path, 'r') as f: self.config['start_episode'] = int(f.read()) - def make_move(self, sess, board, roll): + def make_move(self, sess, board, roll, player): # print(Board.pretty(board)) - legal_moves = Board.calculate_legal_states(board, 1, roll) - moves_and_scores = [ (move, self.eval_state(sess, np.array(move).reshape(1,26))) for move in legal_moves ] - scores = [ x[1] for x in moves_and_scores ] + legal_moves = Board.calculate_legal_states(board, player, roll) + moves_and_scores = [(move, self.eval_state(sess, Board.board_features_to_tesauro(move, player))) for move in legal_moves] + scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores] best_score_index = np.array(scores).argmax() best_move_pair = moves_and_scores[best_score_index] - #print("Found the best state, being:", np.array(move_scores).argmax()) + # print("Found the best state, being:", np.array(move_scores).argmax()) return best_move_pair - - - def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0): + + def eval(self, trained_eps=0): + def do_eval(sess, method, episodes=1000, trained_eps=trained_eps): + start_time = time.time() + + def print_time_estimate(eps_completed): + cur_time = time.time() + time_diff = cur_time - start_time + eps_per_sec = eps_completed / time_diff + secs_per_ep = time_diff / eps_completed + eps_remaining = (episodes - eps_completed) + sys.stderr.write( + "[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) + sys.stderr.write( + "[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( + eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) + + sys.stderr.write( + "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) + + if method == 'random': + outcomes = [] + """for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + while Board.outcome(board) is None: + roll = (random.randrange(1, 7), random.randrange(1, 7)) + board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0] + roll = (random.randrange(1, 7), random.randrange(1, 7)) + board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll)) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") + + if i % 50 == 0: + print_time_estimate(i)""" + return outcomes + elif method == 'pubeval': + outcomes = [] + # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), + # which can be used to get the best move according to pubeval + for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + # print("init:", board, sep="\n") + while Board.outcome(board) is None: + # print("-"*30) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + # print(roll) + + # prev_board = tuple(board) + board = (self.make_move(sess, board, roll, 1))[0] + # print("post p1:", board, sep="\n") + + # print("."*30) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + # print(roll) + + # prev_board = tuple(board) + board = Eval.make_pubeval_move(board, -1, roll)[0][0:26] + # print("post pubeval:", board, sep="\n") + + # print("*"*30) + # print(board) + # print("+"*30) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") + + if i % 10 == 0: + print_time_estimate(i) + + return outcomes + + elif method == 'dumbeval': + outcomes = [] + # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), + # which can be used to get the best move according to pubeval + for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + # print("init:", board, sep="\n") + while Board.outcome(board) is None: + # print("-"*30) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + # print(roll) + + # prev_board = tuple(board) + board = (self.make_move(sess, board, roll, 1))[0] + # print("post p1:", board, sep="\n") + + # print("."*30) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + # print(roll) + + # prev_board = tuple(board) + board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26] + # print("post pubeval:", board, sep="\n") + + # print("*"*30) + # print(board) + # print("+"*30) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") + + if i % 10 == 0: + print_time_estimate(i) + + return outcomes + + elif method == 'dumbmodel': + outcomes = [] + """ + config_prime = self.config.copy() + config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel') + eval_bot = Bot(1, config = config_prime, name = "dumbmodel") + #print(self.config, "\n", config_prime) + outcomes = [] + for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + while Board.outcome(board) is None: + roll = (random.randrange(1,7), random.randrange(1,7)) + board = (self.make_move(board, self.p1.get_sym(), roll))[0] + + roll = (random.randrange(1,7), random.randrange(1,7)) + board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0]) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") + + if i % 50 == 0: + print_time_estimate(i) + """ + return outcomes + else: + sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) + return [0] + + with tf.Session() as session: + session.run(tf.global_variables_initializer()) + self.restore_model(session) + outcomes = [(method, do_eval(session, + method, + self.config['episode_count'], + trained_eps=trained_eps)) + for method + in self.config['eval_methods']] + return outcomes + + def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): with tf.Session() as sess: writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph) - + sess.run(tf.global_variables_initializer()) self.restore_model(sess) - + variables_names = [v.name for v in tf.trainable_variables()] values = sess.run(variables_names) for k, v in zip(variables_names, values): @@ -172,197 +321,102 @@ class Network: start_time = time.time() def print_time_estimate(eps_completed): - cur_time = time.time() - time_diff = cur_time - start_time - eps_per_sec = eps_completed / time_diff - secs_per_ep = time_diff / eps_completed + cur_time = time.time() + time_diff = cur_time - start_time + eps_per_sec = eps_completed / time_diff + secs_per_ep = time_diff / eps_completed eps_remaining = (episodes - eps_completed) - sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) - sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) + sys.stderr.write( + "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) + sys.stderr.write( + "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( + eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) - sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) outcomes = [] for episode in range(1, episodes + 1): sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) # TODO decide which player should be here + + + # TEST + #if episode % 1000 == 0: + # self.config['eval_methods'] = 'dumbeval' + # self.config['episodes'] = 300 + # outcomes = self.eval(trained_eps) + # self.log_eval_outcomes(outcomes, trained_eps=self.episodes_trained) + + #player = random.choice([-1, 1]) player = 1 - - roll = (random.randrange(1,7), random.randrange(1,7)) - prev_board, _ = self.make_move(sess, Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll) - if player == -1: - prev_board = Board.flip(prev_board) - + + prev_board = Board.initial_state + # find the best move here, make this move, then change turn as the # first thing inside of the while loop and then call # best_move_and_score to get V_t+1 # i = 0 while Board.outcome(prev_board) is None: - # print("-"*30) - # print(i) - # print(roll) - # print(Board.pretty(prev_board)) - # print("/"*30) - # i += 1 + + #print("PREEEV_BOOOOAAARD:",prev_board) + cur_board, cur_board_value = self.make_move(sess, + prev_board, + (random.randrange(1, 7), random.randrange(1, 7)), player) - player *= -1 - roll = (random.randrange(1,7), random.randrange(1,7)) + #print("The current value:",cur_board_value) - cur_board, cur_board_value = self.make_move(sess, Board.flip(prev_board) if player == -1 else prev_board, roll) - if player == -1: - cur_board = Board.flip(cur_board) - - # print("cur_board_value:", cur_board_value) - # adjust weights sess.run(self.training_op, - feed_dict = { self.x: np.array(prev_board).reshape((1,26)), - self.value_next: cur_board_value }) + feed_dict={self.x: Board.board_features_to_tesauro(prev_board, player), + self.value_next: cur_board_value}) + + player *= -1 + + prev_board = cur_board final_board = prev_board sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1])) outcomes.append(Board.outcome(final_board)[1]) - final_score = np.array([ Board.outcome(final_board)[1] ]) + final_score = np.array([Board.outcome(final_board)[1]]) scaled_final_score = ((final_score + 2) / 4) - + #print("The difference in values:", scaled_final_score - cur_board_value) # print("scaled_final_score",scaled_final_score) with tf.name_scope("final"): merged = tf.summary.merge_all() summary, _ = sess.run([merged, self.training_op], - feed_dict = { self.x: np.array(prev_board).reshape((1,26)), - self.value_next: scaled_final_score.reshape((1, 1)) }) + feed_dict={self.x: Board.board_features_to_tesauro(prev_board, player), + self.value_next: scaled_final_score.reshape((1, 1))}) writer.add_summary(summary, episode + trained_eps) - + sys.stderr.write("\n") - + if episode % min(save_step_size, episodes) == 0: sys.stderr.write("[TRAIN] Saving model...\n") - self.save_model(sess, episode+trained_eps) + self.save_model(sess, episode + trained_eps) if episode % 50 == 0: print_time_estimate(episode) sys.stderr.write("[TRAIN] Saving model for final episode...\n") - self.save_model(sess, episode+trained_eps) - + self.save_model(sess, episode + trained_eps) + writer.close() - + return outcomes - - # take turn, which finds the best state and picks it, based on the current network - # save current state - # run training operation (session.run(self.training_op, {x:x, value_next, value_next})), (something which does the backprop, based on the state after having taken a turn, found before, and the state we saved in the beginning and from now we'll save it at the end of the turn - # save the current state again, so we can continue running backprop based on the "previous" turn. + # take turn, which finds the best state and picks it, based on the current network + # save current state + # run training operation (session.run(self.training_op, {x:x, value_next, value_next})), + # (something which does the backprop, based on the state after having taken a turn, + # found before, and the state we saved in the beginning and from now we'll + # save it at the end of the turn - # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it! - + # save the current state again, so we can continue running backprop based on the "previous" turn. + + # NOTE: We need to make a method so that we can take a single turn or at least + # just pick the next best move, so we know how to evaluate according to TD-learning. + # Right now, our game just continues in a while loop without nothing to stop it! - def eval(self, trained_eps = 0): - def do_eval(sess, method, episodes = 1000, trained_eps = 0): - start_time = time.time() - - def print_time_estimate(eps_completed): - cur_time = time.time() - time_diff = cur_time - start_time - eps_per_sec = eps_completed / time_diff - secs_per_ep = time_diff / eps_completed - eps_remaining = (episodes - eps_completed) - sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) - sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) - - sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) - - if method == 'random': - outcomes = [] - for i in range(1, episodes + 1): - sys.stderr.write("[EVAL ] Episode {}".format(i)) - board = Board.initial_state - while Board.outcome(board) is None: - roll = (random.randrange(1,7), random.randrange(1,7)) - board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0] - roll = (random.randrange(1,7), random.randrange(1,7)) - board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll)) - sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) - outcomes.append(Board.outcome(board)[1]) - sys.stderr.write("\n") - - if i % 50 == 0: - print_time_estimate(i) - return outcomes - elif method == 'pubeval': - outcomes = [] - # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval - for i in range(1, episodes + 1): - sys.stderr.write("[EVAL ] Episode {}".format(i)) - board = Board.initial_state - #print("init:", board, sep="\n") - while Board.outcome(board) is None: - #print("-"*30) - roll = (random.randrange(1,7), random.randrange(1,7)) - #print(roll) - - prev_board = tuple(board) - board = (self.make_move(sess, board, roll))[0] - #print("post p1:", board, sep="\n") - - #print("."*30) - roll = (random.randrange(1,7), random.randrange(1,7)) - #print(roll) - - prev_board = tuple(board) - board = Eval.make_pubeval_move(board, -1, roll)[0][0:26] - #print("post pubeval:", board, sep="\n") - - - #print("*"*30) - #print(board) - #print("+"*30) - sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) - outcomes.append(Board.outcome(board)[1]) - sys.stderr.write("\n") - - if i % 10 == 0: - print_time_estimate(i) - - return outcomes - # elif method == 'dumbmodel': - # config_prime = self.config.copy() - # config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel') - # eval_bot = Bot(1, config = config_prime, name = "dumbmodel") - # #print(self.config, "\n", config_prime) - # outcomes = [] - # for i in range(1, episodes + 1): - # sys.stderr.write("[EVAL ] Episode {}".format(i)) - # board = Board.initial_state - # while Board.outcome(board) is None: - # roll = (random.randrange(1,7), random.randrange(1,7)) - # board = (self.make_move(board, self.p1.get_sym(), roll))[0] - - # roll = (random.randrange(1,7), random.randrange(1,7)) - # board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0]) - # sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) - # outcomes.append(Board.outcome(board)[1]) - # sys.stderr.write("\n") - - # if i % 50 == 0: - # print_time_estimate(i) - # return outcomes - else: - sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) - return [0] - - with tf.Session() as session: - session .run(tf.global_variables_initializer()) - self.restore_model(session) - outcomes = [ (method, do_eval(session, - method, - self.config['episode_count'], - trained_eps = trained_eps)) - for method - in self.config['eval_methods'] ] - return outcomes diff --git a/pubeval/dumbeval.c b/pubeval/dumbeval.c new file mode 100644 index 0000000..f9e6039 --- /dev/null +++ b/pubeval/dumbeval.c @@ -0,0 +1,170 @@ +#include + +static PyObject* DumbevalError; + +static float x[122]; + +static const float wc[122] = { +5.6477, 6.316649999999999, 7.05515, 6.65315, 9.3171, 17.9777, 2.0235499999999993, 5.1129500000000005, 7.599200000000001, 9.68525, 3.1762, 8.05335, 16.153499999999998, 8.02445, 10.55345, 15.489600000000001, 10.525199999999998, 16.438850000000002, 12.27405, 9.6362, 12.7152, 13.2859, 1.6932499999999995, 26.79045, 10.521899999999999, 6.79635, 5.28135, 6.2059, 10.2306, 10.5485, 3.6000500000000004, 4.07825, 6.951700000000001, 4.413749999999999, 11.271450000000002, 12.9361, 11.087299999999999, 13.10085, 10.411999999999999, 8.084050000000001, 12.4893, 5.96055, 4.69195, 18.9482, 9.0946, 9.1954, 6.2592, 16.180300000000003, 8.3376, 23.24915, 14.32525, -2.6699000000000006, 19.156, 5.81445, 4.7214, 7.63055, 7.039, 5.88075, 2.00765, 14.596800000000002, 11.5208, -3.79, -3.8541000000000003, 5.358499999999999, 14.4516, 2.49015, 11.284799999999999, 14.1066, 16.2306, 5.82875, 9.34505, 16.13685, 8.1893, 2.93145, 7.83185, 12.86765, 6.90115, 20.07255, 8.93355, -0.12434999999999974, 12.0587, 11.83985, 6.34155, 7.1963, 10.571200000000001, 22.38365, 6.50745, 8.94595, 12.0434, 10.79885, 14.055800000000001, 0.022100000000000453, 10.39255, 4.088850000000001, 3.6421499999999996, 38.1298, 6.8957, 0.9804999999999997, 5.9599, 13.16055, 11.55305, 10.65015, 4.6673, 15.770999999999999, 27.700050000000005, 4.4329, 12.6349, 7.037800000000001, 3.4897, 18.91945, 10.239899999999999, 5.4625, 10.29705, 10.492799999999999, 8.850900000000001, -10.575999999999999, 10.6893, 15.30845, 17.8083, 31.88275, 11.225000000000001, 4.4806}; + +static const float wr[122] = { +-0.7856, -0.50352, 0.12392, -1.00316, -2.46556, -0.1627, 0.18966, 0.0043, 0.0, +0.13681, 1.11245, 0.0, 0.0, -0.02781, -2.77982, 0.0, -0.91035, 0.60015, +-1.27266, 0.0, 0.0, 0.0, 0.0, -7.26713, -0.19412, -1.05121, 0.27448, -4.94251, + -0.06844, 0.37183, -3.66465, -0.8305, 0.09266, 0.07217, 0.0, 0.29906, -1.26062, +0.17405, 0.48302, 2.00366, 0.92321, -0.10839, 1.06349, 0.39521, 3.4204, +0.00576, 5.35, 3.8539, -0.09308, 0.17253, 0.13978, 0.2701, -0.52728, 0.88296, +0.2252, 0.0, 0.0, -0.12707, 3.05454, 0.31202, -0.88035, -0.01351, 0.0, +-3.40177, -0.22082, -0.13022, -0.09795, -2.29847, -12.32252, 0.0, -0.13597, +0.12039, 0.85631, 0.0, 0.0, -0.3424, 0.24855, 0.20178, 2.30052, 1.5068, +0.0, -0.07456, 5.16874, 0.01418, -1.3464, -1.29506, 0.0, 0.0, -1.40375, +0.0, -0.11696, 0.05281, -9.67677, 0.05685, -1.09167, 0.0, 0.0, -2.56906, +2.19605, 0.0, 0.68178, -0.08471, 0.0, -2.34631, 1.49549, -2.16183, 0.0, +1.16242, 1.08744, -0.1716, 0.25236, 0.13246, -0.37646, 0.0, -2.87401, +0.74427, 1.07274, -0.01591, -0.14818, -0.06285, 0.08302, -1.03508 +}; + +void setx(int pos[]) +{ + /* sets input vector x[] given board position pos[] */ + extern float x[]; + int j, jm1, n; + /* initialize */ + for(j=0;j<122;++j) x[j] = 0.0; + + /* first encode board locations 24-1 */ + for(j=1;j<=24;++j) { + jm1 = j - 1; + n = pos[25-j]; + if(n!=0) { + if(n==-1) x[5*jm1+0] = 1.0; + if(n==1) x[5*jm1+1] = 1.0; + if(n>=2) x[5*jm1+2] = 1.0; + if(n==3) x[5*jm1+3] = 1.0; + if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0; + } + } + /* encode opponent barmen */ + x[120] = -(float)(pos[0])/2.0; + /* encode computer's menoff */ + x[121] = (float)(pos[26])/15.0; +} + +float dumbeval(int race, int pos[]) +{ + /* Backgammon move-selection evaluation function + for benchmark comparisons. Computes a linear + evaluation function: Score = W * X, where X is + an input vector encoding the board state (using + a raw encoding of the number of men at each location), + and W is a weight vector. Separate weight vectors + are used for racing positions and contact positions. + Makes lots of obvious mistakes, but provides a + decent level of play for benchmarking purposes. */ + + /* Provided as a public service to the backgammon + programming community by Gerry Tesauro, IBM Research. + (e-mail: tesauro@watson.ibm.com) */ + + /* The following inputs are needed for this routine: + + race is an integer variable which should be set + based on the INITIAL position BEFORE the move. + Set race=1 if the position is a race (i.e. no contact) + and 0 if the position is a contact position. + + pos[] is an integer array of dimension 28 which + should represent a legal final board state after + the move. Elements 1-24 correspond to board locations + 1-24 from computer's point of view, i.e. computer's + men move in the negative direction from 24 to 1, and + opponent's men move in the positive direction from + 1 to 24. Computer's men are represented by positive + integers, and opponent's men are represented by negative + integers. Element 25 represents computer's men on the + bar (positive integer), and element 0 represents opponent's + men on the bar (negative integer). Element 26 represents + computer's men off the board (positive integer), and + element 27 represents opponent's men off the board + (negative integer). */ + + /* Also, be sure to call rdwts() at the start of your + program to read in the weight values. Happy hacking] */ + + int i; + float score; + + if(pos[26]==15) return(99999999.); + /* all men off, best possible move */ + + setx(pos); /* sets input array x[] */ + score = 0.0; + if(race) { /* use race weights */ + for(i=0;i<122;++i) score += wr[i]*x[i]; + } + else { /* use contact weights */ + for(i=0;i<122;++i) score += wc[i]*x[i]; + } + return(score); +} + +static PyObject* +dumbeval_eval(PyObject *self, PyObject *args) { + int race; + long numValues; + int board[28]; + float eval_score; + + PyObject* tuple_obj; + PyObject* val_obj; + + if (! PyArg_ParseTuple(args, "pO!", &race, &PyTuple_Type, &tuple_obj)) + return NULL; + + numValues = PyTuple_Size(tuple_obj); + + if (numValues < 0) return NULL; + if (numValues != 28) { + PyErr_SetString(DumbevalError, "Tuple must have 28 entries"); + return NULL; + } + + // Iterate over tuple to retreive positions + for (int i=0; i