Merge branch 'fuck_git' into 'rework-1'

# Conflicts: # network.py
2018-03-27 10:15:51 +00:00 · 2018-03-27 10:15:51 +00:00 · c248ca0452
commit c248ca0452
parent 0eac5434d6 f43108c239
6 changed files with 481 additions and 156 deletions
--- a/board.py
+++ b/board.py
@ -34,8 +34,44 @@ class Board:
        board.append(15 - sum(positives))
        board.append(-15 - sum(negatives))
        return tuple(board)
-            
+
-            
+    @staticmethod
    def board_features_to_own(board, player):
        board = list(board)
        positives = [x if x > 0 else 0 for x in board]
        negatives = [x if x < 0 else 0 for x in board]
        board.append(15 - sum(positives))
        board.append(-15 - sum(negatives))
        board += ([1, 0] if np.sign(player) > 0 else [1, 0])
        return np.array(board).reshape(1,-1)
    @staticmethod
    def board_features_to_tesauro(board, cur_player):
        features = []
        for player in [-1,1]:
            sum = 0.0
            for board_range in range(1,25):
                pin = board[board_range]
                #print("PIIIN:",pin)
                feature = [0.0]*4
                if np.sign(pin) == np.sign(player):
                    sum += abs(pin)
                    for i in range(min(abs(pin), 3)):
                        feature[i] = 1
                        if (abs(pin) > 3):
                            feature[3] = (abs(pin)-3)/2
                features += feature
            #print("SUUUM:",sum)
            # Append the amount of men on the bar of the current player divided by 2
            features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
            # Calculate how many pieces there must be in the home state and divide it by 15
            features.append((15 - sum) / 15)
        features += ([1,0] if np.sign(cur_player) > 0 else [1,0])
        test = np.array(features).reshape(1,-1)
        #print("TEST:",test)
        return test
    @staticmethod
--- a/eval.py
+++ b/eval.py
@ -2,6 +2,7 @@ from board import Board
 import numpy as np
 import pubeval
 import dumbeval
 class Eval:
@ -24,4 +25,16 @@ class Eval:
        return best_move_pair
    @staticmethod
    def make_dumbeval_move(board, sym, roll):
        legal_moves = Board.calculate_legal_states(board, sym, roll)
        moves_and_scores = [ ( board,
                               dumbeval.eval(False, Board.board_features_to_pubeval(board, sym)))
                             for board
                             in legal_moves ]
        scores = [ x[1] for x in moves_and_scores ]
        best_move_pair = moves_and_scores[np.array(scores).argmax()]
        return best_move_pair
--- a/game.py
+++ b/game.py
@ -23,18 +23,21 @@ class Game:
    def roll(self):
        return self.cup.roll()
-
+    '''
    def best_move_and_score(self):
        roll = self.roll()
        move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
        self.board = move_and_val[0]
        return move_and_val
    '''
    '''
    def next_round(self):
        roll = self.roll()
        #print(roll)
        self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0])
        return self.board
    '''
    def board_state(self):
        return self.board
--- a/network.py
+++ b/network.py
@ -8,19 +8,18 @@ import sys
 import random
 from eval import Eval
 class Network:
    hidden_size = 40
-    input_size = 26
+    input_size = 30
    output_size = 1
    # Can't remember the best learning_rate, look this up
    learning_rate = 0.01
-
+    board_rep = Board.board_features_to_own
    # TODO: Actually compile tensorflow properly
    #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
-    
+
    def __init__(self, config, name):
        self.config = config
        self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
@ -34,13 +33,13 @@ class Network:
                self.episodes_trained = int(f.read())
        else:
            self.episodes_trained = 0
-        
+
        # input = x
        self.x = tf.placeholder('float', [1, Network.input_size], name='input')
        self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
        xavier_init = tf.contrib.layers.xavier_initializer()
-        
+
        W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
                              initializer=xavier_init)
        W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
@ -51,8 +50,8 @@ class Network:
        b_2 = tf.get_variable("b_2", (Network.output_size,),
                              initializer=tf.zeros_initializer)
-        normalized_input  = tf.nn.l2_normalize(self.x)
+
-        value_after_input = tf.sigmoid(tf.matmul(normalized_input, W_1) + b_1, name='hidden_layer')
+        value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
@ -63,23 +62,23 @@ class Network:
        # TODO: Alexander thinks that self.value will be computed twice (instead of once)
        difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
-        
+
        trainable_vars = tf.trainable_variables()
        gradients = tf.gradients(self.value, trainable_vars)
-        
+
        apply_gradients = []
-        
+
        with tf.variable_scope('apply_gradients'):
            for gradient, trainable_var in zip(gradients, trainable_vars):
                # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
                backprop_calc = Network.learning_rate * difference_in_values * gradient
                grad_apply = trainable_var.assign_add(backprop_calc)
                apply_gradients.append(grad_apply)
-            
+
            self.training_op = tf.group(*apply_gradients, name='training_op')
        self.saver = tf.train.Saver(max_to_keep=1)
-        
+
    def eval_state(self, sess, state):
        # Run state through a network
@ -112,23 +111,22 @@ class Network:
        # implement learning_rate * (difference_in_values) * gradients (the
        # before-mentioned calculation.
        # print("Network is evaluating")
-        #print("eval ({})".format(self.name), state, val, sep="\n")
+        # print("eval ({})".format(self.name), state, val, sep="\n")
        return sess.run(self.value, feed_dict={self.x: state})
        return sess.run(self.value, feed_dict={self.x: state})
    def save_model(self, sess, episode_count):
        self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'))
        with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
-            print("[NETWK] ({name}) Saving model to:".format(name = self.name),
+            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
            f.write(str(episode_count) + "\n")
-    
+
    def restore_model(self, sess):
        if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
            latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
-            print("[NETWK] ({name}) Restoring model from:".format(name = self.name),
+            print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
                  str(latest_checkpoint))
            self.saver.restore(sess, latest_checkpoint)
            variables_names = [v.name for v in tf.trainable_variables()]
@ -144,24 +142,173 @@ class Network:
                with open(episode_count_path, 'r') as f:
                    self.config['start_episode'] = int(f.read())
-    def make_move(self, sess, board, roll):
+    def make_move(self, sess, board, roll, player):
        # print(Board.pretty(board))
-        legal_moves = Board.calculate_legal_states(board, 1, roll)
+        legal_moves = Board.calculate_legal_states(board, player, roll)
-        moves_and_scores = [ (move, self.eval_state(sess, np.array(move).reshape(1,26))) for move in legal_moves ]
+        moves_and_scores = [(move, self.eval_state(sess, Network.board_rep(move, player))) for move in legal_moves]
-        scores = [ x[1] for x in moves_and_scores ]
+        scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
        best_score_index = np.array(scores).argmax()
        best_move_pair = moves_and_scores[best_score_index]
-        #print("Found the best state, being:", np.array(move_scores).argmax())
+        # print("Found the best state, being:", np.array(move_scores).argmax())
        return best_move_pair
-        
+
-                
+    def eval(self, trained_eps=0):
-    def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
+        def do_eval(sess, method, episodes=1000, trained_eps=trained_eps):
            start_time = time.time()
            def print_time_estimate(eps_completed):
                cur_time = time.time()
                time_diff = cur_time - start_time
                eps_per_sec = eps_completed / time_diff
                secs_per_ep = time_diff / eps_completed
                eps_remaining = (episodes - eps_completed)
                sys.stderr.write(
                    "[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
                sys.stderr.write(
                    "[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
                        eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
            sys.stderr.write(
                "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
            if method == 'random':
                outcomes = []
                """for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0]
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
                    if i % 50 == 0:
                        print_time_estimate(i)"""
                return outcomes
            elif method == 'pubeval':
                outcomes = []
                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
                #  which can be used to get the best move according to pubeval
                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    # print("init:", board, sep="\n")
                    while Board.outcome(board) is None:
                        # print("-"*30)
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        # print(roll)
                        # prev_board = tuple(board)
                        board = (self.make_move(sess, board, roll, 1))[0]
                        # print("post p1:", board, sep="\n")
                        # print("."*30)
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        # print(roll)
                        # prev_board = tuple(board)
                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
                        # print("post pubeval:", board, sep="\n")
                    # print("*"*30)
                    # print(board)
                    # print("+"*30)
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
                    if i % 10 == 0:
                        print_time_estimate(i)
                return outcomes
            elif method == 'dumbeval':
                outcomes = []
                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
                #  which can be used to get the best move according to pubeval
                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    # print("init:", board, sep="\n")
                    while Board.outcome(board) is None:
                        # print("-"*30)
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        # print(roll)
                        # prev_board = tuple(board)
                        board = (self.make_move(sess, board, roll, 1))[0]
                        # print("post p1:", board, sep="\n")
                        # print("."*30)
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        # print(roll)
                        # prev_board = tuple(board)
                        board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
                        # print("post pubeval:", board, sep="\n")
                    # print("*"*30)
                    # print(board)
                    # print("+"*30)
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
                    if i % 10 == 0:
                        print_time_estimate(i)
                return outcomes
            elif method == 'dumbmodel':
                outcomes = []
                """
                config_prime = self.config.copy()
                config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
                eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
                #print(self.config, "\n", config_prime)
                outcomes = []
                for i in range(1, episodes + 1):
                sys.stderr.write("[EVAL ] Episode {}".format(i))
                board = Board.initial_state
                while Board.outcome(board) is None:
                roll = (random.randrange(1,7), random.randrange(1,7))
                board = (self.make_move(board, self.p1.get_sym(), roll))[0]
                roll = (random.randrange(1,7), random.randrange(1,7))
                board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
                sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                outcomes.append(Board.outcome(board)[1])
                sys.stderr.write("\n")
                if i % 50 == 0:
                print_time_estimate(i)
                """
                return outcomes
            else:
                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                return [0]
        with tf.Session() as session:
            session.run(tf.global_variables_initializer())
            self.restore_model(session)
            outcomes = [(method, do_eval(session,
                                         method,
                                         self.config['episode_count'],
                                         trained_eps=trained_eps))
                        for method
                        in self.config['eval_methods']]
        return outcomes
    def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
        with tf.Session() as sess:
            writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
-            
+
            sess.run(tf.global_variables_initializer())
            self.restore_model(sess)
-            
+
            variables_names = [v.name for v in tf.trainable_variables()]
            values = sess.run(variables_names)
            for k, v in zip(variables_names, values):
@ -172,75 +319,70 @@ class Network:
            start_time = time.time()
            def print_time_estimate(eps_completed):
-                cur_time      = time.time()
+                cur_time = time.time()
-                time_diff     = cur_time - start_time
+                time_diff = cur_time - start_time
-                eps_per_sec   = eps_completed / time_diff
+                eps_per_sec = eps_completed / time_diff
-                secs_per_ep   = time_diff / eps_completed
+                secs_per_ep = time_diff / eps_completed
                eps_remaining = (episodes - eps_completed)
-                sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
+                sys.stderr.write(
-                sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
+                    "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
                sys.stderr.write(
                    "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
                        eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
            sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
            outcomes = []
            for episode in range(1, episodes + 1):
                sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
                # TODO decide which player should be here
                player = 1
-                
+
-                roll = (random.randrange(1,7), random.randrange(1,7))
+                prev_board = Board.initial_state
-                prev_board, _ = self.make_move(sess, Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
+
                if player == -1:
                    prev_board = Board.flip(prev_board)
                # find the best move here, make this move, then change turn as the
                # first thing inside of the while loop and then call
                # best_move_and_score to get V_t+1
                # i = 0
                while Board.outcome(prev_board) is None:
-                    # print("-"*30)
+
-                    # print(i)
+                    #print("PREEEV_BOOOOAAARD:",prev_board)
-                    # print(roll)
+                    cur_board, cur_board_value = self.make_move(sess,
-                    # print(Board.pretty(prev_board))
+                                                                prev_board,
-                    # print("/"*30)
+                                                                (random.randrange(1, 7), random.randrange(1, 7)), player)
                    # i += 1
-                    player *= -1
+                    #print("The current value:",cur_board_value)
                    roll = (random.randrange(1,7), random.randrange(1,7))
                    cur_board, cur_board_value = self.make_move(sess, Board.flip(prev_board) if player == -1 else prev_board, roll)
                    if player == -1:
                        cur_board  = Board.flip(cur_board)
                    # print("cur_board_value:", cur_board_value)
                    # adjust weights
                    sess.run(self.training_op,
-                             feed_dict = { self.x: np.array(prev_board).reshape((1,26)),
+                             feed_dict={self.x: Network.board_rep(prev_board, player),
-                                           self.value_next: cur_board_value })
+                                        self.value_next: cur_board_value})
                    player *= -1
                    prev_board = cur_board
                final_board = prev_board
                sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
                outcomes.append(Board.outcome(final_board)[1])
-                final_score = np.array([ Board.outcome(final_board)[1] ])
+                final_score = np.array([Board.outcome(final_board)[1]])
                scaled_final_score = ((final_score + 2) / 4)
-
+                #print("The difference in values:", scaled_final_score - cur_board_value)
                # print("scaled_final_score",scaled_final_score)
                with tf.name_scope("final"):
                    merged = tf.summary.merge_all()
                    summary, _ = sess.run([merged, self.training_op],
-                                          feed_dict = { self.x: np.array(prev_board).reshape((1,26)),
+                                          feed_dict={self.x: Network.board_rep(prev_board, player),
-                                                        self.value_next: scaled_final_score.reshape((1, 1)) })
+                                                     self.value_next: scaled_final_score.reshape((1, 1))})
                    writer.add_summary(summary, episode + trained_eps)
-                    
+
                sys.stderr.write("\n")
-            
+
                if episode % min(save_step_size, episodes) == 0:
                    sys.stderr.write("[TRAIN] Saving model...\n")
-                    self.save_model(sess, episode+trained_eps)
+                    self.save_model(sess, episode + trained_eps)
                if episode % 50 == 0:
                    print_time_estimate(episode)
@ -266,95 +408,18 @@ class Network:
        def do_eval(sess, method, episodes = 1000, trained_eps = 0):
            start_time = time.time()
-            def print_time_estimate(eps_completed):
+            writer.close()
                cur_time      = time.time()
                time_diff     = cur_time - start_time
                eps_per_sec   = eps_completed / time_diff
                secs_per_ep   = time_diff / eps_completed
                eps_remaining = (episodes - eps_completed)
                sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
                sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
-            sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
+            return outcomes
            if method == 'random':
                outcomes = []
                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1,7), random.randrange(1,7))
                        board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0]
                        roll = (random.randrange(1,7), random.randrange(1,7))
                        board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
-                    if i % 50 == 0:
+            # take turn, which finds the best state and picks it, based on the current network
-                        print_time_estimate(i)
+            # save current state
-                return outcomes
+            # run training operation (session.run(self.training_op, {x:x, value_next, value_next})),
-            elif method == 'pubeval':
+            #  (something which does the backprop, based on the state after having taken a turn,
-                outcomes = []
+            # found before, and the state we saved in the beginning and from now we'll
-                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
+            #  save it at the end of the turn
                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    #print("init:", board, sep="\n")
                    while Board.outcome(board) is None:
                        #print("-"*30)
                        roll = (random.randrange(1,7), random.randrange(1,7))
                        #print(roll)
-                        prev_board = tuple(board)
+            # save the current state again, so we can continue running backprop based on the "previous" turn.
                        board = (self.make_move(sess, board, roll))[0]
                        #print("post p1:", board, sep="\n")
                        #print("."*30)
                        roll = (random.randrange(1,7), random.randrange(1,7))
                        #print(roll)
                        prev_board = tuple(board)
                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
                        #print("post pubeval:", board, sep="\n")
                    #print("*"*30)
                    #print(board)
                    #print("+"*30)
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
                    if i % 10 == 0:
                        print_time_estimate(i)
                return outcomes
            # elif method == 'dumbmodel':
            #     config_prime = self.config.copy()
            #     config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
            #     eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
            #     #print(self.config, "\n", config_prime)
            #     outcomes = []
            #     for i in range(1, episodes + 1):
            #         sys.stderr.write("[EVAL ] Episode {}".format(i))
            #         board = Board.initial_state
            #         while Board.outcome(board) is None:
            #             roll = (random.randrange(1,7), random.randrange(1,7))
            #             board = (self.make_move(board, self.p1.get_sym(), roll))[0]
            #             roll = (random.randrange(1,7), random.randrange(1,7))
            #             board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
            #         sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
            #         outcomes.append(Board.outcome(board)[1])
            #         sys.stderr.write("\n")
            #         if i % 50 == 0:
            #             print_time_estimate(i)
            #     return outcomes
            else:
                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                return [0]
        if tf_session == None:
--- a/pubeval/dumbeval.c
+++ b/pubeval/dumbeval.c
@ -0,0 +1,199 @@
 #include <Python.h>
 static PyObject* DumbevalError;
 static float x[122];
 static const float wc[122] = {
 5.6477, 6.316649999999999, 7.05515, 6.65315, 9.3171, 17.9777, 2.0235499999999993, 5.1129500000000005, 7.599200000000001, 9.68525, 3.1762, 8.05335, 16.153499999999998, 8.02445, 10.55345, 15.489600000000001, 10.525199999999998, 16.438850000000002, 12.27405, 9.6362, 12.7152, 13.2859, 1.6932499999999995, 26.79045, 10.521899999999999, 6.79635, 5.28135, 6.2059, 10.2306, 10.5485, 3.6000500000000004, 4.07825, 6.951700000000001, 4.413749999999999, 11.271450000000002, 12.9361, 11.087299999999999, 13.10085, 10.411999999999999, 8.084050000000001, 12.4893, 5.96055, 4.69195, 18.9482, 9.0946, 9.1954, 6.2592, 16.180300000000003, 8.3376, 23.24915, 14.32525, -2.6699000000000006, 19.156, 5.81445, 4.7214, 7.63055, 7.039, 5.88075, 2.00765, 14.596800000000002, 11.5208, -3.79, -3.8541000000000003, 5.358499999999999, 14.4516, 2.49015, 11.284799999999999, 14.1066, 16.2306, 5.82875, 9.34505, 16.13685, 8.1893, 2.93145, 7.83185, 12.86765, 6.90115, 20.07255, 8.93355, -0.12434999999999974, 12.0587, 11.83985, 6.34155, 7.1963, 10.571200000000001, 22.38365, 6.50745, 8.94595, 12.0434, 10.79885, 14.055800000000001, 0.022100000000000453, 10.39255, 4.088850000000001, 3.6421499999999996, 38.1298, 6.8957, 0.9804999999999997, 5.9599, 13.16055, 11.55305, 10.65015, 4.6673, 15.770999999999999, 27.700050000000005, 4.4329, 12.6349, 7.037800000000001, 3.4897, 18.91945, 10.239899999999999, 5.4625, 10.29705, 10.492799999999999, 8.850900000000001, -10.575999999999999, 10.6893, 15.30845, 17.8083, 31.88275, 11.225000000000001, 4.4806};
 /*
 1.5790816238841092, 1.6374860177130541, -1.7131823639980923, -0.9286186784962336, -1.0732080528763888,
 -0.33851674519289876, 1.5798155080270462, 2.3161915581553414, 1.5625330782392322, 0.9397141260075461,
 0.8386342522957442, 1.2380864901133144, -2.803703105809909, -1.6033863837759044, -1.9297462408169208,
 2.804924084193149, 0.9270839975087402, 0.9877927467766145, -1.0075116465703597, -0.9456578829797895,
 -2.592017567014881, 0.6309857231907587, 2.04590249003744, -0.7982917574924828, -1.4539868823698936,
 1.0841407450630234, 0.45211788236898887, -1.2713606178159307, 0.8688872440724307, -0.6732738151904405,
 2.2362742485632294, -0.6581729637609781, -1.7948051663967473, 2.1883788452643564, 2.1598171424723214,
 0.40802272166662146, -0.9708789129385202, -0.28407011999124165, 1.132858480655588, 0.35009713673111253,
 2.396877030228498, -2.9621397724422653, 1.607067798976531, 1.0644990486021744, 0.31954763526104113,
 1.3044736141405133, -2.7454899725805606, -2.7379143210889545, -1.803990720175892, 0.46979843403681576,
 -1.7142750941084806, -0.8151527229519924, -2.009462889335147, -0.3918389579023729, -1.2877598286852634,
 2.555703689627613, 0.9185193346378826, -2.4440956502956404, -1.5557875467629176, 1.6171292628313898,
 -0.7350519162308693, 2.9185129503030653, -0.02369662637182124, 0.9957404325370858, -0.6504711593915609,
 2.6190546093943468, -0.36103491516117003, -0.5988376927918715, 0.16399156134136383, 0.3254074568551131,
 -1.5638349190057885, 0.8561543642997189, -0.0880209333042492, 1.323918411026094, -0.9498883976797834,
 2.3050169940592458, -2.859322940360703, 2.1798224505428836, 0.03769734441005257, 2.806706515762855,
 -0.514728418369482, -2.7130236727731454, 1.343193402901159, -1.542350700154035, 1.1197565339573625,
 -1.4498511795864624, 1.3472224178544003, 0.7044576479382245, -2.284211306571646, -1.7289596273930532,
 -1.7276292685923906, -0.1945401442950634, 2.0338744133468643, 2.001064062247366, 1.9649901287717713,
 1.5235253273336475, 0.40016636047698606, -1.3276206938801058, 0.8496121993449899, 1.054662320349336,
 -1.1897996492934584, 0.49610727347392025, -1.8539475848522708, 0.4713599305742626, -2.8424352653158573,
 -2.526691049928613, 2.1369664337786274, 1.0616438676464632, 1.9487914860665452, 2.822108017102477,
 -0.3393405083020449, 2.787144781914554, -2.401723402781605, -1.1675562811241997, -1.1542961327714207,
 0.18253192955355502, -2.418436664206371, 0.7423935287565309, 2.9903418274144666, -1.3503112004693552,
 -2.649146174480099, -0.5447080156947952
 };*/
 static const float wr[122] = {
 -0.7856, -0.50352, 0.12392, -1.00316, -2.46556, -0.1627, 0.18966, 0.0043, 0.0,
 0.13681, 1.11245, 0.0, 0.0, -0.02781, -2.77982, 0.0, -0.91035, 0.60015,
 -1.27266, 0.0, 0.0, 0.0, 0.0, -7.26713, -0.19412, -1.05121, 0.27448, -4.94251,
 -0.06844, 0.37183, -3.66465, -0.8305, 0.09266, 0.07217, 0.0, 0.29906, -1.26062,
 0.17405, 0.48302, 2.00366, 0.92321, -0.10839, 1.06349, 0.39521, 3.4204, 
 0.00576, 5.35, 3.8539, -0.09308, 0.17253, 0.13978, 0.2701, -0.52728, 0.88296,
 0.2252, 0.0, 0.0, -0.12707, 3.05454, 0.31202, -0.88035, -0.01351, 0.0, 
 -3.40177, -0.22082, -0.13022, -0.09795, -2.29847, -12.32252, 0.0, -0.13597,
 0.12039, 0.85631, 0.0, 0.0, -0.3424, 0.24855, 0.20178, 2.30052, 1.5068,
 0.0, -0.07456, 5.16874, 0.01418, -1.3464, -1.29506, 0.0, 0.0, -1.40375,
 0.0, -0.11696, 0.05281, -9.67677, 0.05685, -1.09167, 0.0, 0.0, -2.56906,
 2.19605, 0.0, 0.68178, -0.08471, 0.0, -2.34631, 1.49549, -2.16183, 0.0,
 1.16242, 1.08744, -0.1716, 0.25236, 0.13246, -0.37646, 0.0, -2.87401,
 0.74427, 1.07274, -0.01591, -0.14818, -0.06285, 0.08302, -1.03508
 };
 void setx(int pos[])
 {
        /* sets input vector x[] given board position pos[] */
        extern float x[];
        int j, jm1, n;
        /* initialize */
        for(j=0;j<122;++j) x[j] = 0.0;
        /* first encode board locations 24-1 */
        for(j=1;j<=24;++j) {
            jm1 = j - 1;
            n = pos[25-j];
            if(n!=0) {
                if(n==-1) x[5*jm1+0] = 1.0;
                if(n==1) x[5*jm1+1] = 1.0;
                if(n>=2) x[5*jm1+2] = 1.0;
                if(n==3) x[5*jm1+3] = 1.0;
                if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0;
            }
        }
        /* encode opponent barmen */
        x[120] = -(float)(pos[0])/2.0;
        /* encode computer's menoff */
        x[121] = (float)(pos[26])/15.0;
 }
 float dumbeval(int race, int pos[])
 {
        /* Backgammon move-selection evaluation function
           for benchmark comparisons.  Computes a linear
           evaluation function:  Score = W * X, where X is
           an input vector encoding the board state (using
           a raw encoding of the number of men at each location),
           and W is a weight vector.  Separate weight vectors
           are used for racing positions and contact positions.
           Makes lots of obvious mistakes, but provides a
           decent level of play for benchmarking purposes. */
        /* Provided as a public service to the backgammon
           programming community by Gerry Tesauro, IBM Research.
           (e-mail: tesauro@watson.ibm.com)                     */
        /* The following inputs are needed for this routine:
           race   is an integer variable which should be set
           based on the INITIAL position BEFORE the move.
           Set race=1 if the position is a race (i.e. no contact)
           and 0 if the position is a contact position.
           pos[]  is an integer array of dimension 28 which
           should represent a legal final board state after
           the move. Elements 1-24 correspond to board locations
           1-24 from computer's point of view, i.e. computer's
           men move in the negative direction from 24 to 1, and
           opponent's men move in the positive direction from
           1 to 24. Computer's men are represented by positive
           integers, and opponent's men are represented by negative
           integers. Element 25 represents computer's men on the
           bar (positive integer), and element 0 represents opponent's
           men on the bar (negative integer). Element 26 represents
           computer's men off the board (positive integer), and
           element 27 represents opponent's men off the board
           (negative integer).                                  */
        /* Also, be sure to call rdwts() at the start of your
           program to read in the weight values. Happy hacking] */
        int i;
        float score;
        if(pos[26]==15) return(99999999.);
        /* all men off, best possible move */
        setx(pos); /* sets input array x[] */
        score = 0.0;
        if(race) {  /* use race weights */
            for(i=0;i<122;++i) score += wr[i]*x[i];
        }
        else {  /* use contact weights */
            for(i=0;i<122;++i) score += wc[i]*x[i];
        }
        return(score);
 }
 static PyObject*
 dumbeval_eval(PyObject *self, PyObject *args) {
  int race;
  long numValues;
  int board[28];
  float eval_score;
  PyObject* tuple_obj;
  PyObject* val_obj;
  if (! PyArg_ParseTuple(args, "pO!", &race, &PyTuple_Type, &tuple_obj))
    return NULL;
  numValues = PyTuple_Size(tuple_obj);
  if (numValues < 0) return NULL;
  if (numValues != 28) {
    PyErr_SetString(DumbevalError, "Tuple must have 28 entries");
    return NULL;
  }
  // Iterate over tuple to retreive positions
  for (int i=0; i<numValues; i++) {
    val_obj = PyTuple_GetItem(tuple_obj, i);
    board[i] = PyLong_AsLong(val_obj);
  }
  eval_score = dumbeval(race, board);
  return Py_BuildValue("f", eval_score);
 }
 static PyMethodDef dumbeval_methods[] = {
  {
    "eval", dumbeval_eval, METH_VARARGS,
    "Returns evaluation results for the given board position."
  },
  {NULL, NULL, 0, NULL}
 };
 static struct PyModuleDef dumbeval_definition = {
  PyModuleDef_HEAD_INIT,
  "dumbeval",
  "A Python module that implements Gerald Tesauro's dumbeval function for evaluation backgammon positions.",
  -1,
  dumbeval_methods
 };
 PyMODINIT_FUNC PyInit_dumbeval(void) {
  PyObject* module;
  module = PyModule_Create(&dumbeval_definition);
  if (module == NULL)
    return NULL;
  DumbevalError = PyErr_NewException("dumbeval.error", NULL, NULL);
  Py_INCREF(DumbevalError);
  PyModule_AddObject(module, "error", DumbevalError);
  return module;
 }
--- a/pubeval/setup_dumb.py
+++ b/pubeval/setup_dumb.py
@ -0,0 +1,9 @@
 from distutils.core import setup, Extension
 dumbeval = Extension('dumbeval',
                    sources = ['dumbeval.c'])
 setup (name = 'dumbeval',
       version = '0.1',
       description = 'Dumbeval for Python',
       ext_modules = [dumbeval])