From 98c9af72e7a0c9332f31ab7c88987851630eca1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Thu, 22 Mar 2018 15:30:47 +0100
Subject: [PATCH 01/17] rework network

---
 main.py    |  46 +++++------
 network.py | 230 +++++++++++++++++++++++++++++------------------------
 plot.py    |   2 +-
 3 files changed, 151 insertions(+), 127 deletions(-)

diff --git a/main.py b/main.py
index 8d42e2b..bc8de09 100644
--- a/main.py
+++ b/main.py
@@ -102,29 +102,29 @@ if args.list_models:
         sys.stderr.write("  {name}: {eps_trained}\n".format(name = model[0], eps_trained = model[1]))
 
     exit()
-    
-# Set up network
-from network import Network
-network = Network(config, config['model'])
-eps = config['start_episode']
 
-# Set up variables
-episode_count = config['episode_count']
+if __name__ == "__main__":
+    # Set up network
+    from network import Network
+    network = Network(config, config['model'])
+    start_episode = network.episodes_trained
+
+    # Set up variables
+    episode_count = config['episode_count']
                   
-if args.train:
-    while True:
-        train_outcome = network.train_model(episodes = episode_count, trained_eps = eps)
-        eps += episode_count
-        log_train_outcome(train_outcome, trained_eps = eps)
-        if config['eval_after_train']:
-            eval_outcomes = network.eval(trained_eps = eps)
-            log_eval_outcomes(eval_outcomes, trained_eps = eps)
-        if not config['train_perpetually']:
-            break
-elif args.eval:
-    eps = config['start_episode']
-    outcomes = network.eval()
-    log_eval_outcomes(outcomes, trained_eps = eps)
-#elif args.play:
-#    g.play(episodes = episode_count)
+    if args.train:
+        while True:
+            train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)
+            start_episode += episode_count
+            log_train_outcome(train_outcome, trained_eps = start_episode)
+            if config['eval_after_train']:
+                eval_outcomes = network.eval(trained_eps = start_episode)
+                log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
+            if not config['train_perpetually']:
+                break
+    elif args.eval:
+        outcomes = network.eval()
+        log_eval_outcomes(outcomes, trained_eps = start_episode)
+        # elif args.play:
+        # g.play(episodes = episode_count)
     
diff --git a/network.py b/network.py
index 62b1d17..f058d48 100644
--- a/network.py
+++ b/network.py
@@ -13,7 +13,7 @@ class Network:
     input_size = 26
     output_size = 1
     # Can't remember the best learning_rate, look this up
-    learning_rate = 0.1
+    learning_rate = 0.05
 
     # TODO: Actually compile tensorflow properly
     #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
@@ -23,12 +23,20 @@ class Network:
     
     def __init__(self, config, name):
         self.config = config
-        self.session = tf.Session()
         self.checkpoint_path = config['model_path']
+
         self.name = name
+
+        # Restore trained episode count for model
+        episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
+        if os.path.isfile(episode_count_path):
+            with open(episode_count_path, 'r') as f:
+                self.episodes_trained = int(f.read())
+        else:
+            self.episodes_trained = 0
         
         # input = x
-        self.x = tf.placeholder('float', [1, Network.input_size], name='x')
+        self.x = tf.placeholder('float', [1, Network.input_size], name='input')
         self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
 
         xavier_init = tf.contrib.layers.xavier_initializer()
@@ -43,20 +51,22 @@ class Network:
         b_2 = tf.get_variable("b_2", (Network.output_size,),
                               initializer=tf.zeros_initializer)
 
-        value_after_input = self.custom_tanh(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
+        normalized_input  = tf.nn.l2_normalize(self.x)
+        value_after_input = tf.sigmoid(tf.matmul(normalized_input, W_1) + b_1, name='hidden_layer')
 
-        self.value = self.custom_tanh(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
+        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
 
         # tf.reduce_sum basically finds the sum of its input, so this gives the
         # difference between the two values, in case they should be lists, which
         # they might be if our input changes
 
         # TODO: Alexander thinks that self.value will be computed twice (instead of once)
-        difference_in_values = tf.reduce_sum(self.value_next - self.value, name='difference')
+        difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
+        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
         
         trainable_vars = tf.trainable_variables()
         gradients = tf.gradients(self.value, trainable_vars)
-
+        
         apply_gradients = []
         
         with tf.variable_scope('apply_gradients'):
@@ -67,13 +77,10 @@ class Network:
                 apply_gradients.append(grad_apply)
             
             self.training_op = tf.group(*apply_gradients, name='training_op')
-            
-        self.saver = tf.train.Saver(max_to_keep=1)
-        self.session.run(tf.global_variables_initializer())
 
-        self.restore_model()
+        self.saver = tf.train.Saver(max_to_keep=1)
         
-    def eval_state(self, state):
+    def eval_state(self, sess, state):
         # Run state through a network
 
         # Remember to create placeholders for everything because wtf tensorflow
@@ -107,25 +114,25 @@ class Network:
 
         
         # print("Network is evaluating")
-        val = self.session.run(self.value, feed_dict={self.x: state})
         #print("eval ({})".format(self.name), state, val, sep="\n")
-        return val
+        return sess.run(self.value, feed_dict={self.x: state})
 
-    def save_model(self, episode_count):
-        self.saver.save(self.session, os.path.join(self.checkpoint_path, 'model.ckpt'))
+
+    def save_model(self, sess, episode_count):
+        self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'))
         with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
             print("[NETWK] ({name}) Saving model to:".format(name = self.name),
                   os.path.join(self.checkpoint_path, 'model.ckpt'))
             f.write(str(episode_count) + "\n")
     
-    def restore_model(self):
+    def restore_model(self, sess):
         if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
             latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
             print("[NETWK] ({name}) Restoring model from:".format(name = self.name),
                   str(latest_checkpoint))
-            self.saver.restore(self.session, latest_checkpoint)
+            self.saver.restore(sess, latest_checkpoint)
             variables_names = [v.name for v in tf.trainable_variables()]
-            values = self.session.run(variables_names)
+            values = sess.run(variables_names)
             for k, v in zip(variables_names, values):
                 print("Variable: ", k)
                 print("Shape: ", v.shape)
@@ -137,26 +144,10 @@ class Network:
                 with open(episode_count_path, 'r') as f:
                     self.config['start_episode'] = int(f.read())
 
-    # Have a circular dependency, #fuck, need to rewrite something
-    def adjust_weights(self, board, v_next):
-#        print("lol")
-        board = np.array(board).reshape((1,26))
-        self.session.run(self.training_op, feed_dict = { self.x: board,
-                                                         self.value_next: v_next })
-                
-
-            # while game isn't done:
-                #x_next = g.next_move()
-                #value_next = network.eval_state(x_next)
-                #self.session.run(self.training_op, feed_dict={self.x: x, self.value_next: value_next})
-                #x = x_next
-
-
-
-    def make_move(self, board, roll):
+    def make_move(self, sess, board, roll):
         # print(Board.pretty(board))
         legal_moves = Board.calculate_legal_states(board, 1, roll)
-        moves_and_scores = [ (move, self.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
+        moves_and_scores = [ (move, self.eval_state(sess, np.array(move).reshape(1,26))) for move in legal_moves ]
         scores = [ x[1] for x in moves_and_scores ]
         best_score_index = np.array(scores).argmax()
         best_move_pair = moves_and_scores[best_score_index]
@@ -165,73 +156,101 @@ class Network:
         
                 
     def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
-        start_time = time.time()
+        with tf.Session() as sess:
+            writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
+            
+            sess.run(tf.global_variables_initializer())
+            self.restore_model(sess)
+            
+            variables_names = [v.name for v in tf.trainable_variables()]
+            values = sess.run(variables_names)
+            for k, v in zip(variables_names, values):
+                print("Variable: ", k)
+                print("Shape: ", v.shape)
+                print(v)
 
-        def print_time_estimate(eps_completed):
-            cur_time      = time.time()
-            time_diff     = cur_time - start_time
-            eps_per_sec   = eps_completed / time_diff
-            secs_per_ep   = time_diff / eps_completed
-            eps_remaining = (episodes - eps_completed)
-            sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
-            sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
+            start_time = time.time()
+
+            def print_time_estimate(eps_completed):
+                cur_time      = time.time()
+                time_diff     = cur_time - start_time
+                eps_per_sec   = eps_completed / time_diff
+                secs_per_ep   = time_diff / eps_completed
+                eps_remaining = (episodes - eps_completed)
+                sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
+                sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
 
         
-        sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
-        outcomes = []
-        for episode in range(1, episodes + 1):
-            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
-            # TODO decide which player should be here
-            player = 1
+            sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
+            outcomes = []
+            for episode in range(1, episodes + 1):
+                sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
+                # TODO decide which player should be here
+                player = 1
                 
-            roll = (random.randrange(1,7), random.randrange(1,7))
-            prev_board, _ = self.make_move(Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
-            if player == -1:
-                prev_board = Board.flip(prev_board)
-            
-            # find the best move here, make this move, then change turn as the
-            # first thing inside of the while loop and then call
-            # best_move_and_score to get V_t+1
-
-            # i = 0
-            while Board.outcome(prev_board) is None:
-                # print("-"*30)
-                # print(i)
-                # print(roll)
-                # print(Board.pretty(prev_board))
-                # print("/"*30)
-                # i += 1
-                
-                player *= -1
                 roll = (random.randrange(1,7), random.randrange(1,7))
-
-                cur_board, cur_board_value = self.make_move(Board.flip(prev_board) if player == -1 else prev_board, roll)
+                prev_board, _ = self.make_move(sess, Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
                 if player == -1:
-                    cur_board  = Board.flip(cur_board)
-
-                self.adjust_weights(prev_board, cur_board_value)
-
-                prev_board = cur_board
-
-            final_board = prev_board
-            sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
-            outcomes.append(Board.outcome(final_board)[1])
-            final_score = np.array([ Board.outcome(final_board)[1] ])
-            self.adjust_weights(prev_board, final_score.reshape((1, 1)))
-
-            sys.stderr.write("\n")
+                    prev_board = Board.flip(prev_board)
             
-            if episode % min(save_step_size, episodes) == 0:
-                sys.stderr.write("[TRAIN] Saving model...\n")
-                self.save_model(episode+trained_eps)
+                # find the best move here, make this move, then change turn as the
+                # first thing inside of the while loop and then call
+                # best_move_and_score to get V_t+1
 
-            if episode % 50 == 0:
-                print_time_estimate(episode)
+                # i = 0
+                while Board.outcome(prev_board) is None:
+                    # print("-"*30)
+                    # print(i)
+                    # print(roll)
+                    # print(Board.pretty(prev_board))
+                    # print("/"*30)
+                    # i += 1
+                
+                    player *= -1
+                    roll = (random.randrange(1,7), random.randrange(1,7))
 
-        sys.stderr.write("[TRAIN] Saving model for final episode...\n")
-        self.save_model(episode+trained_eps)
+                    cur_board, cur_board_value = self.make_move(sess, Board.flip(prev_board) if player == -1 else prev_board, roll)
+                    if player == -1:
+                        cur_board  = Board.flip(cur_board)
+
+                    # print("cur_board_value:", cur_board_value)
+                    
+                    # adjust weights
+                    sess.run(self.training_op,
+                             feed_dict = { self.x: np.array(prev_board).reshape((1,26)),
+                                           self.value_next: cur_board_value })
+                    prev_board = cur_board
+
+                final_board = prev_board
+                sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
+                outcomes.append(Board.outcome(final_board)[1])
+                final_score = np.array([ Board.outcome(final_board)[1] ])
+                scaled_final_score = ((final_score + 2) / 4)
+
+                # print("scaled_final_score",scaled_final_score)
+
+                with tf.name_scope("final"):
+                    merged = tf.summary.merge_all()
+                    summary, _ = sess.run([merged, self.training_op],
+                                          feed_dict = { self.x: np.array(prev_board).reshape((1,26)),
+                                                        self.value_next: scaled_final_score.reshape((1, 1)) })
+                    writer.add_summary(summary, episode + trained_eps)
+                    
+                sys.stderr.write("\n")
+            
+                if episode % min(save_step_size, episodes) == 0:
+                    sys.stderr.write("[TRAIN] Saving model...\n")
+                    self.save_model(sess, episode+trained_eps)
+
+                if episode % 50 == 0:
+                    print_time_estimate(episode)
+
+            sys.stderr.write("[TRAIN] Saving model for final episode...\n")
+            self.save_model(sess, episode+trained_eps)
+            
+            writer.close()
         
-        return outcomes
+            return outcomes
 
             
                 # take turn, which finds the best state and picks it, based on the current network
@@ -244,7 +263,7 @@ class Network:
 
 
     def eval(self, trained_eps = 0):
-        def do_eval(method, episodes = 1000, trained_eps = 0):
+        def do_eval(sess, method, episodes = 1000, trained_eps = 0):
             start_time = time.time()
 
             def print_time_estimate(eps_completed):
@@ -265,7 +284,7 @@ class Network:
                     board = Board.initial_state
                     while Board.outcome(board) is None:
                         roll = (random.randrange(1,7), random.randrange(1,7))
-                        board = (self.p1.make_move(board, self.p1.get_sym(), roll))[0]
+                        board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0]
                         roll = (random.randrange(1,7), random.randrange(1,7))
                         board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
                     sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@@ -288,7 +307,7 @@ class Network:
                         #print(roll)
 
                         prev_board = tuple(board)
-                        board = (self.make_move(board, roll))[0]
+                        board = (self.make_move(sess, board, roll))[0]
                         #print("post p1:", board, sep="\n")
 
                         #print("."*30)
@@ -336,9 +355,14 @@ class Network:
             else:
                 sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                 return [0]
-            
-        return [ (method, do_eval(method,
-                                  self.config['episode_count'],
-                                  trained_eps = trained_eps))
-                 for method
-                 in self.config['eval_methods'] ]
+
+        with tf.Session() as session:
+            session .run(tf.global_variables_initializer())
+            self.restore_model(session)
+            outcomes = [ (method, do_eval(session,
+                                          method,
+                                          self.config['episode_count'],
+                                          trained_eps = trained_eps))
+                         for method
+                         in self.config['eval_methods'] ]
+        return outcomes
diff --git a/plot.py b/plot.py
index 8261cde..5a94f51 100644
--- a/plot.py
+++ b/plot.py
@@ -44,7 +44,7 @@ if __name__ == '__main__':
     plt.show()
 
     while True:
-        df = dataframes('default')['eval']
+        df = dataframes('a')['eval']
 
         print(df)
         

From 1f1e806306eb0aded61c2582f416b55655145d94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Mon, 26 Mar 2018 15:55:48 +0200
Subject: [PATCH 02/17] fix errant whitespace

---
 network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/network.py b/network.py
index f058d48..d32c6b9 100644
--- a/network.py
+++ b/network.py
@@ -357,7 +357,7 @@ class Network:
                 return [0]
 
         with tf.Session() as session:
-            session .run(tf.global_variables_initializer())
+            session.run(tf.global_variables_initializer())
             self.restore_model(session)
             outcomes = [ (method, do_eval(session,
                                           method,

From 4c43bf19a3f38bf424922631157ea09f3902ea34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Mon, 26 Mar 2018 16:45:26 +0200
Subject: [PATCH 03/17] Add evaluation variance benchmark

To do a benchmark for `pubeval`, run `python3 main.py --bench-eval-scores
--eval-methods pubeval`

Logs will be placed in directory `bench`

Use `plot_bench(data_path)` in `plot.py` for plotting
---
 main.py    | 159 +++++++++++++++++++++++++++++++++++++----------------
 network.py |  28 +++++++---
 plot.py    |  12 ++++
 3 files changed, 143 insertions(+), 56 deletions(-)

diff --git a/main.py b/main.py
index bc8de09..b5a8ad0 100644
--- a/main.py
+++ b/main.py
@@ -3,38 +3,6 @@ import sys
 import os
 import time
 
-model_storage_path = 'models'
-
-# Create models folder
-if not os.path.exists(model_storage_path):
-    os.makedirs(model_storage_path)
-
-# Define helper functions
-def log_train_outcome(outcome, trained_eps = 0):
-    format_vars = { 'trained_eps': trained_eps,
-                    'count': len(train_outcome),
-                    'sum': sum(train_outcome),
-                    'mean': sum(train_outcome) / len(train_outcome),
-                    'time': int(time.time())
-    }
-    with open(os.path.join(config['model_path'], 'logs', "train.log"), 'a+') as f:
-        f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
-    
-
-def log_eval_outcomes(outcomes, trained_eps = 0):
-    for outcome in outcomes:
-        scores = outcome[1]
-        format_vars = { 'trained_eps': trained_eps,
-                        'method': outcome[0],
-                        'count': len(scores),
-                        'sum': sum(scores),
-                        'mean': sum(scores) / len(scores),
-                        'time': int(time.time())
-        }
-        with open(os.path.join(config['model_path'], 'logs', "eval.log"), 'a+') as f:
-            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
-
-            
 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Backgammon games")
 parser.add_argument('--episodes', action='store', dest='episode_count',
@@ -47,13 +15,15 @@ parser.add_argument('--eval-methods', action='store',
                     default=['random'], nargs='*',
                     help='specifies evaluation methods')
 parser.add_argument('--eval', action='store_true',
-                    help='whether to evaluate the neural network with a random choice bot')
+                    help='evaluate the neural network with a random choice bot')
+parser.add_argument('--bench-eval-scores', action='store_true',
+                    help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
 parser.add_argument('--train', action='store_true',
-                    help='whether to train the neural network')
+                    help='train the neural network')
 parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
-                    help='whether to evaluate after each training session')
+                    help='evaluate after each training session')
 parser.add_argument('--play', action='store_true',
-                    help='whether to play with the neural network')
+                    help='play with the neural network')
 parser.add_argument('--start-episode', action='store', dest='start_episode',
                     type=int, default=0,
                     help='episode count to start at; purely for display purposes')
@@ -66,27 +36,73 @@ args = parser.parse_args()
 
 config = {
     'model': args.model,
-    'model_path': os.path.join(model_storage_path, args.model),
     'episode_count': args.episode_count,
     'eval_methods': args.eval_methods,
     'train': args.train,
     'play': args.play,
     'eval': args.eval,
+    'bench_eval_scores': args.bench_eval_scores,
     'eval_after_train': args.eval_after_train,
     'start_episode': args.start_episode,
     'train_perpetually': args.train_perpetually,
-    'model_storage_path': model_storage_path
+    'model_storage_path': 'models',
+    'bench_storage_path': 'bench'
 }
 
+# Create models folder
+if not os.path.exists(config['model_storage_path']):
+    os.makedirs(config['model_storage_path'])
+
+model_path = lambda: os.path.join(config['model_storage_path'], config['model'])
+
 # Make sure directories exist
-model_path = os.path.join(config['model_path'])
-log_path   = os.path.join(model_path, 'logs')
-if not os.path.isdir(model_path):
-    os.mkdir(model_path)
+log_path = os.path.join(model_path(), 'logs')
+if not os.path.isdir(model_path()):
+    os.mkdir(model_path())
 if not os.path.isdir(log_path):
     os.mkdir(log_path)
+
+        
+
+# Define helper functions
+def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
+    format_vars = { 'trained_eps': trained_eps,
+                    'count': len(train_outcome),
+                    'sum': sum(train_outcome),
+                    'mean': sum(train_outcome) / len(train_outcome),
+                    'time': int(time.time())
+    }
+    with open(log_path, 'a+') as f:
+        f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
     
 
+def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
+    for outcome in outcomes:
+        scores = outcome[1]
+        format_vars = { 'trained_eps': trained_eps,
+                        'method': outcome[0],
+                        'count': len(scores),
+                        'sum': sum(scores),
+                        'mean': sum(scores) / len(scores),
+                        'time': int(time.time())
+        }
+        with open(log_path, 'a+') as f:
+            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
+
+def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
+    for outcome in outcomes:
+        scores = outcome[1]
+        format_vars = { 'trained_eps': trained_eps,
+                        'method': outcome[0],
+                        'count': len(scores),
+                        'sum': sum(scores),
+                        'mean': sum(scores) / len(scores),
+                        'time': time,
+                        'index': index,
+        }
+        with open(log_path, 'a+') as f:
+            f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
+
 # Do actions specified by command-line
 if args.list_models:
     def get_eps_trained(folder):
@@ -94,7 +110,7 @@ if args.list_models:
             return int(f.read())
     model_folders = [ f.path
                       for f
-                      in os.scandir(model_storage_path)
+                      in os.scandir(config['model_storage_path'])
                       if f.is_dir() ]
     models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
     sys.stderr.write("Found {} model(s)\n".format(len(models)))
@@ -106,13 +122,13 @@ if args.list_models:
 if __name__ == "__main__":
     # Set up network
     from network import Network
-    network = Network(config, config['model'])
-    start_episode = network.episodes_trained
 
     # Set up variables
     episode_count = config['episode_count']
                   
     if args.train:
+        network = Network(config, config['model'])
+        start_episode = network.episodes_trained
         while True:
             train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)
             start_episode += episode_count
@@ -122,9 +138,58 @@ if __name__ == "__main__":
                 log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
             if not config['train_perpetually']:
                 break
+
+            
     elif args.eval:
-        outcomes = network.eval()
+        network = Network(config, config['model'])
+        start_episode = network.episodes_trained
+        # Evaluation measures are described in `config`
+        outcomes = network.eval(config['episode_count'])
         log_eval_outcomes(outcomes, trained_eps = start_episode)
         # elif args.play:
         # g.play(episodes = episode_count)
-    
+
+        
+    elif args.bench_eval_scores:
+        # Make sure benchmark directory exists
+        if not os.path.isdir(config['bench_storage_path']):
+            os.mkdir(config['bench_storage_path'])
+
+        config = config.copy()
+        config['model'] = 'bench'
+        
+        network = Network(config, config['model'])
+        start_episode = network.episodes_trained
+
+        if start_episode == 0:
+            print("Model not trained! Beware of using non-existing models!")
+            exit()
+        
+        sample_count = 20
+        episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
+                          10000, 20000]
+
+        def do_eval(sess):
+            for eval_method in config['eval_methods']:
+                result_path = os.path.join(config['bench_storage_path'],
+                                           eval_method) + "-{}.log".format(int(time.time()))
+                for n in episode_counts:
+                    for i in range(sample_count):
+                        start_time = time.time()
+                        # Evaluation measure to be benchmarked are described in `config`
+                        outcomes = network.eval(episode_count = n,
+                                                tf_session = sess)
+                        time_diff = time.time() - start_time
+                        log_bench_eval_outcomes(outcomes,
+                                                time = time_diff,
+                                                index = i,
+                                                trained_eps = start_episode,
+                                                log_path = result_path)
+
+        # CMM: oh no
+        import tensorflow as tf
+        with tf.Session() as session:
+            network.restore_model(session)
+            do_eval(session)
+        
+        
diff --git a/network.py b/network.py
index d32c6b9..d9a9f52 100644
--- a/network.py
+++ b/network.py
@@ -13,7 +13,7 @@ class Network:
     input_size = 26
     output_size = 1
     # Can't remember the best learning_rate, look this up
-    learning_rate = 0.05
+    learning_rate = 0.01
 
     # TODO: Actually compile tensorflow properly
     #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
@@ -23,7 +23,7 @@ class Network:
     
     def __init__(self, config, name):
         self.config = config
-        self.checkpoint_path = config['model_path']
+        self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
 
         self.name = name
 
@@ -262,7 +262,7 @@ class Network:
         
 
 
-    def eval(self, trained_eps = 0):
+    def eval(self, episode_count, trained_eps = 0, tf_session = None):
         def do_eval(sess, method, episodes = 1000, trained_eps = 0):
             start_time = time.time()
 
@@ -356,13 +356,23 @@ class Network:
                 sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                 return [0]
 
-        with tf.Session() as session:
-            session.run(tf.global_variables_initializer())
-            self.restore_model(session)
-            outcomes = [ (method, do_eval(session,
+
+        if tf_session == None:
+            with tf.Session():
+                session.run(tf.global_variables_initializer())
+                self.restore_model(session)
+                outcomes = [ (method, do_eval(session,
+                                              method,
+                                              episode_count,
+                                              trained_eps = trained_eps))
+                             for method
+                             in self.config['eval_methods'] ]
+                return outcomes
+        else:
+            outcomes = [ (method, do_eval(tf_session,
                                           method,
-                                          self.config['episode_count'],
+                                          episode_count,
                                           trained_eps = trained_eps))
                          for method
                          in self.config['eval_methods'] ]
-        return outcomes
+            return outcomes
diff --git a/plot.py b/plot.py
index 5a94f51..c820c55 100644
--- a/plot.py
+++ b/plot.py
@@ -9,9 +9,21 @@ import matplotlib.dates as mdates
 
 train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean']
 eval_headers  = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean']
+bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean']
 
 model_path = 'models'
 
+def plot_bench(data_path):
+    df = pd.read_csv(data_path, sep=";",
+                     names=bench_headers, index_col=[0,1,2])
+    for method_label in df.index.levels[0]:
+        cur_df = df.loc[method_label]
+        plot = df[['mean']].loc['pubeval'].unstack().T.plot.box()
+        plot.set_title("Evaluation variance, {}".format(method_label))
+        plot.set_xlabel("Sample count")
+        plot.set_ylabel("Mean score")
+        plt.show(plot.figure)
+        del cur_df, plot
 
 def dataframes(model_name):
     def df_timestamp_to_datetime(df):

From 9b2bbfb4d1a855f4fbcc4f3bed5f93929a0d7aaf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Mon, 26 Mar 2018 17:06:12 +0200
Subject: [PATCH 04/17] print variances when plotting evaluation variance
 benchmark

---
 plot.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/plot.py b/plot.py
index c820c55..5957854 100644
--- a/plot.py
+++ b/plot.py
@@ -17,13 +17,18 @@ def plot_bench(data_path):
     df = pd.read_csv(data_path, sep=";",
                      names=bench_headers, index_col=[0,1,2])
     for method_label in df.index.levels[0]:
-        cur_df = df.loc[method_label]
-        plot = df[['mean']].loc['pubeval'].unstack().T.plot.box()
+        df_prime = df[['mean']].loc[method_label].unstack().T
+        plot = df_prime.plot.box()
         plot.set_title("Evaluation variance, {}".format(method_label))
         plot.set_xlabel("Sample count")
         plot.set_ylabel("Mean score")
         plt.show(plot.figure)
-        del cur_df, plot
+
+        # for later use:
+        variances = df_prime.var()
+        print(variances)
+
+        del df_prime, plot, variances
 
 def dataframes(model_name):
     def df_timestamp_to_datetime(df):

From 006f7917279bbee478e22b8504b84b84c47339da Mon Sep 17 00:00:00 2001
From: alex <alexmunchhansen@gmail.com>
Date: Tue, 27 Mar 2018 02:26:15 +0200
Subject: [PATCH 05/17] Functioning network using board representation
 shamelessly ripped from Tesauro

---
 board.py              |  29 ++-
 eval.py               |  13 ++
 game.py               |   5 +-
 network.py            | 412 ++++++++++++++++++++++++------------------
 pubeval/dumbeval.c    | 170 +++++++++++++++++
 pubeval/setup_dumb.py |   9 +
 6 files changed, 456 insertions(+), 182 deletions(-)
 create mode 100644 pubeval/dumbeval.c
 create mode 100644 pubeval/setup_dumb.py

diff --git a/board.py b/board.py
index bfa7998..a2b205e 100644
--- a/board.py
+++ b/board.py
@@ -34,8 +34,33 @@ class Board:
         board.append(15 - sum(positives))
         board.append(-15 - sum(negatives))
         return tuple(board)
-            
-            
+
+    @staticmethod
+    def board_features_to_tesauro(board, cur_player):
+        features = []
+        for player in [-1,1]:
+            sum = 0.0
+            for board_range in range(1,25):
+                pin = board[board_range]
+                #print("PIIIN:",pin)
+                feature = [0.0]*4
+                if np.sign(pin) == np.sign(player):
+                    sum += abs(pin)
+                    for i in range(min(abs(pin), 3)):
+                        feature[i] = 1
+                        if (abs(pin) > 3):
+                            feature[3] = (abs(pin)-3)/2
+                features += feature
+            #print("SUUUM:",sum)
+            # Append the amount of men on the bar of the current player divided by 2
+            features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
+            # Calculate how many pieces there must be in the home state and divide it by 15
+            features.append((15 - sum) / 15)
+        features += ([1,0] if np.sign(cur_player) > 0 else [1,0])
+        test = np.array(features).reshape(1,-1)
+        #print("TEST:",test)
+        return test
+
 
 
     @staticmethod
diff --git a/eval.py b/eval.py
index 1d02a4b..7be0098 100644
--- a/eval.py
+++ b/eval.py
@@ -2,6 +2,7 @@ from board import Board
 
 import numpy as np
 import pubeval
+import dumbeval
 
 
 class Eval:
@@ -24,4 +25,16 @@ class Eval:
 
         return best_move_pair
 
+    @staticmethod
+    def make_dumbeval_move(board, sym, roll):
+        legal_moves = Board.calculate_legal_states(board, sym, roll)
+        moves_and_scores = [ ( board,
+                               dumbeval.eval(False, Board.board_features_to_pubeval(board, sym)))
+                             for board
+                             in legal_moves ]
+        scores = [ x[1] for x in moves_and_scores ]
+        best_move_pair = moves_and_scores[np.array(scores).argmax()]
+
+        return best_move_pair
+
     
diff --git a/game.py b/game.py
index 9469b57..443ac41 100644
--- a/game.py
+++ b/game.py
@@ -23,18 +23,21 @@ class Game:
         
     def roll(self):
         return self.cup.roll()
-
+    '''
     def best_move_and_score(self):
         roll = self.roll()
         move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
         self.board = move_and_val[0]
         return move_and_val
+    '''
 
+    '''
     def next_round(self):
         roll = self.roll()
         #print(roll)
         self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0])
         return self.board
+    '''
 
     def board_state(self):
         return self.board
diff --git a/network.py b/network.py
index f058d48..8f8ef18 100644
--- a/network.py
+++ b/network.py
@@ -8,19 +8,20 @@ import sys
 import random
 from eval import Eval
 
+
 class Network:
     hidden_size = 40
-    input_size = 26
+    input_size = 198
     output_size = 1
     # Can't remember the best learning_rate, look this up
-    learning_rate = 0.05
+    learning_rate = 0.01
 
     # TODO: Actually compile tensorflow properly
-    #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
+    # os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
 
     def custom_tanh(self, x, name=None):
         return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
-    
+
     def __init__(self, config, name):
         self.config = config
         self.checkpoint_path = config['model_path']
@@ -34,13 +35,13 @@ class Network:
                 self.episodes_trained = int(f.read())
         else:
             self.episodes_trained = 0
-        
+
         # input = x
         self.x = tf.placeholder('float', [1, Network.input_size], name='input')
         self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
 
         xavier_init = tf.contrib.layers.xavier_initializer()
-        
+
         W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
                               initializer=xavier_init)
         W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
@@ -51,8 +52,8 @@ class Network:
         b_2 = tf.get_variable("b_2", (Network.output_size,),
                               initializer=tf.zeros_initializer)
 
-        normalized_input  = tf.nn.l2_normalize(self.x)
-        value_after_input = tf.sigmoid(tf.matmul(normalized_input, W_1) + b_1, name='hidden_layer')
+
+        value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
 
         self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
 
@@ -63,23 +64,23 @@ class Network:
         # TODO: Alexander thinks that self.value will be computed twice (instead of once)
         difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
         tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
-        
+
         trainable_vars = tf.trainable_variables()
         gradients = tf.gradients(self.value, trainable_vars)
-        
+
         apply_gradients = []
-        
+
         with tf.variable_scope('apply_gradients'):
             for gradient, trainable_var in zip(gradients, trainable_vars):
                 # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
                 backprop_calc = Network.learning_rate * difference_in_values * gradient
                 grad_apply = trainable_var.assign_add(backprop_calc)
                 apply_gradients.append(grad_apply)
-            
+
             self.training_op = tf.group(*apply_gradients, name='training_op')
 
         self.saver = tf.train.Saver(max_to_keep=1)
-        
+
     def eval_state(self, sess, state):
         # Run state through a network
 
@@ -112,23 +113,22 @@ class Network:
         # implement learning_rate * (difference_in_values) * gradients (the
         # before-mentioned calculation.
 
-        
         # print("Network is evaluating")
-        #print("eval ({})".format(self.name), state, val, sep="\n")
-        return sess.run(self.value, feed_dict={self.x: state})
+        # print("eval ({})".format(self.name), state, val, sep="\n")
 
+        return sess.run(self.value, feed_dict={self.x: state})
 
     def save_model(self, sess, episode_count):
         self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'))
         with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
-            print("[NETWK] ({name}) Saving model to:".format(name = self.name),
+            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
                   os.path.join(self.checkpoint_path, 'model.ckpt'))
             f.write(str(episode_count) + "\n")
-    
+
     def restore_model(self, sess):
         if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
             latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
-            print("[NETWK] ({name}) Restoring model from:".format(name = self.name),
+            print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
                   str(latest_checkpoint))
             self.saver.restore(sess, latest_checkpoint)
             variables_names = [v.name for v in tf.trainable_variables()]
@@ -144,24 +144,173 @@ class Network:
                 with open(episode_count_path, 'r') as f:
                     self.config['start_episode'] = int(f.read())
 
-    def make_move(self, sess, board, roll):
+    def make_move(self, sess, board, roll, player):
         # print(Board.pretty(board))
-        legal_moves = Board.calculate_legal_states(board, 1, roll)
-        moves_and_scores = [ (move, self.eval_state(sess, np.array(move).reshape(1,26))) for move in legal_moves ]
-        scores = [ x[1] for x in moves_and_scores ]
+        legal_moves = Board.calculate_legal_states(board, player, roll)
+        moves_and_scores = [(move, self.eval_state(sess, Board.board_features_to_tesauro(move, player))) for move in legal_moves]
+        scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
         best_score_index = np.array(scores).argmax()
         best_move_pair = moves_and_scores[best_score_index]
-        #print("Found the best state, being:", np.array(move_scores).argmax())
+        # print("Found the best state, being:", np.array(move_scores).argmax())
         return best_move_pair
-        
-                
-    def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
+
+    def eval(self, trained_eps=0):
+        def do_eval(sess, method, episodes=1000, trained_eps=trained_eps):
+            start_time = time.time()
+
+            def print_time_estimate(eps_completed):
+                cur_time = time.time()
+                time_diff = cur_time - start_time
+                eps_per_sec = eps_completed / time_diff
+                secs_per_ep = time_diff / eps_completed
+                eps_remaining = (episodes - eps_completed)
+                sys.stderr.write(
+                    "[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
+                sys.stderr.write(
+                    "[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
+                        eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
+
+            sys.stderr.write(
+                "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
+
+            if method == 'random':
+                outcomes = []
+                """for i in range(1, episodes + 1):
+                    sys.stderr.write("[EVAL ] Episode {}".format(i))
+                    board = Board.initial_state
+                    while Board.outcome(board) is None:
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0]
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
+                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
+                    outcomes.append(Board.outcome(board)[1])
+                    sys.stderr.write("\n")
+
+                    if i % 50 == 0:
+                        print_time_estimate(i)"""
+                return outcomes
+            elif method == 'pubeval':
+                outcomes = []
+                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
+                #  which can be used to get the best move according to pubeval
+                for i in range(1, episodes + 1):
+                    sys.stderr.write("[EVAL ] Episode {}".format(i))
+                    board = Board.initial_state
+                    # print("init:", board, sep="\n")
+                    while Board.outcome(board) is None:
+                        # print("-"*30)
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        # print(roll)
+
+                        # prev_board = tuple(board)
+                        board = (self.make_move(sess, board, roll, 1))[0]
+                        # print("post p1:", board, sep="\n")
+
+                        # print("."*30)
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        # print(roll)
+
+                        # prev_board = tuple(board)
+                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
+                        # print("post pubeval:", board, sep="\n")
+
+                    # print("*"*30)
+                    # print(board)
+                    # print("+"*30)
+                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
+                    outcomes.append(Board.outcome(board)[1])
+                    sys.stderr.write("\n")
+
+                    if i % 10 == 0:
+                        print_time_estimate(i)
+
+                return outcomes
+
+            elif method == 'dumbeval':
+                outcomes = []
+                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
+                #  which can be used to get the best move according to pubeval
+                for i in range(1, episodes + 1):
+                    sys.stderr.write("[EVAL ] Episode {}".format(i))
+                    board = Board.initial_state
+                    # print("init:", board, sep="\n")
+                    while Board.outcome(board) is None:
+                        # print("-"*30)
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        # print(roll)
+
+                        # prev_board = tuple(board)
+                        board = (self.make_move(sess, board, roll, 1))[0]
+                        # print("post p1:", board, sep="\n")
+
+                        # print("."*30)
+                        roll = (random.randrange(1, 7), random.randrange(1, 7))
+                        # print(roll)
+
+                        # prev_board = tuple(board)
+                        board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
+                        # print("post pubeval:", board, sep="\n")
+
+                    # print("*"*30)
+                    # print(board)
+                    # print("+"*30)
+                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
+                    outcomes.append(Board.outcome(board)[1])
+                    sys.stderr.write("\n")
+
+                    if i % 10 == 0:
+                        print_time_estimate(i)
+
+                return outcomes
+
+            elif method == 'dumbmodel':
+                outcomes = []
+                """
+                config_prime = self.config.copy()
+                config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
+                eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
+                #print(self.config, "\n", config_prime)
+                outcomes = []
+                for i in range(1, episodes + 1):
+                sys.stderr.write("[EVAL ] Episode {}".format(i))
+                board = Board.initial_state
+                while Board.outcome(board) is None:
+                roll = (random.randrange(1,7), random.randrange(1,7))
+                board = (self.make_move(board, self.p1.get_sym(), roll))[0]
+
+                roll = (random.randrange(1,7), random.randrange(1,7))
+                board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
+                sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
+                outcomes.append(Board.outcome(board)[1])
+                sys.stderr.write("\n")
+
+                if i % 50 == 0:
+                print_time_estimate(i)
+                """
+                return outcomes
+            else:
+                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
+                return [0]
+
+        with tf.Session() as session:
+            session.run(tf.global_variables_initializer())
+            self.restore_model(session)
+            outcomes = [(method, do_eval(session,
+                                         method,
+                                         self.config['episode_count'],
+                                         trained_eps=trained_eps))
+                        for method
+                        in self.config['eval_methods']]
+        return outcomes
+
+    def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
         with tf.Session() as sess:
             writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
-            
+
             sess.run(tf.global_variables_initializer())
             self.restore_model(sess)
-            
+
             variables_names = [v.name for v in tf.trainable_variables()]
             values = sess.run(variables_names)
             for k, v in zip(variables_names, values):
@@ -172,197 +321,102 @@ class Network:
             start_time = time.time()
 
             def print_time_estimate(eps_completed):
-                cur_time      = time.time()
-                time_diff     = cur_time - start_time
-                eps_per_sec   = eps_completed / time_diff
-                secs_per_ep   = time_diff / eps_completed
+                cur_time = time.time()
+                time_diff = cur_time - start_time
+                eps_per_sec = eps_completed / time_diff
+                secs_per_ep = time_diff / eps_completed
                 eps_remaining = (episodes - eps_completed)
-                sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
-                sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
+                sys.stderr.write(
+                    "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
+                sys.stderr.write(
+                    "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
+                        eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
 
-        
             sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
             outcomes = []
             for episode in range(1, episodes + 1):
                 sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
                 # TODO decide which player should be here
+
+
+                # TEST
+                #if episode % 1000 == 0:
+                #    self.config['eval_methods'] = 'dumbeval'
+                #    self.config['episodes'] = 300
+                #    outcomes = self.eval(trained_eps)
+                #    self.log_eval_outcomes(outcomes, trained_eps=self.episodes_trained)
+
+                #player = random.choice([-1, 1])
                 player = 1
-                
-                roll = (random.randrange(1,7), random.randrange(1,7))
-                prev_board, _ = self.make_move(sess, Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
-                if player == -1:
-                    prev_board = Board.flip(prev_board)
-            
+
+                prev_board = Board.initial_state
+
                 # find the best move here, make this move, then change turn as the
                 # first thing inside of the while loop and then call
                 # best_move_and_score to get V_t+1
 
                 # i = 0
                 while Board.outcome(prev_board) is None:
-                    # print("-"*30)
-                    # print(i)
-                    # print(roll)
-                    # print(Board.pretty(prev_board))
-                    # print("/"*30)
-                    # i += 1
+
+                    #print("PREEEV_BOOOOAAARD:",prev_board)
+                    cur_board, cur_board_value = self.make_move(sess,
+                                                                prev_board,
+                                                                (random.randrange(1, 7), random.randrange(1, 7)), player)
                 
-                    player *= -1
-                    roll = (random.randrange(1,7), random.randrange(1,7))
+                    #print("The current value:",cur_board_value)
 
-                    cur_board, cur_board_value = self.make_move(sess, Board.flip(prev_board) if player == -1 else prev_board, roll)
-                    if player == -1:
-                        cur_board  = Board.flip(cur_board)
-
-                    # print("cur_board_value:", cur_board_value)
-                    
                     # adjust weights
                     sess.run(self.training_op,
-                             feed_dict = { self.x: np.array(prev_board).reshape((1,26)),
-                                           self.value_next: cur_board_value })
+                             feed_dict={self.x: Board.board_features_to_tesauro(prev_board, player),
+                                        self.value_next: cur_board_value})
+
+                    player *= -1
+
+
                     prev_board = cur_board
 
                 final_board = prev_board
                 sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
                 outcomes.append(Board.outcome(final_board)[1])
-                final_score = np.array([ Board.outcome(final_board)[1] ])
+                final_score = np.array([Board.outcome(final_board)[1]])
                 scaled_final_score = ((final_score + 2) / 4)
-
+                #print("The difference in values:", scaled_final_score - cur_board_value)
                 # print("scaled_final_score",scaled_final_score)
 
                 with tf.name_scope("final"):
                     merged = tf.summary.merge_all()
                     summary, _ = sess.run([merged, self.training_op],
-                                          feed_dict = { self.x: np.array(prev_board).reshape((1,26)),
-                                                        self.value_next: scaled_final_score.reshape((1, 1)) })
+                                          feed_dict={self.x: Board.board_features_to_tesauro(prev_board, player),
+                                                     self.value_next: scaled_final_score.reshape((1, 1))})
                     writer.add_summary(summary, episode + trained_eps)
-                    
+
                 sys.stderr.write("\n")
-            
+
                 if episode % min(save_step_size, episodes) == 0:
                     sys.stderr.write("[TRAIN] Saving model...\n")
-                    self.save_model(sess, episode+trained_eps)
+                    self.save_model(sess, episode + trained_eps)
 
                 if episode % 50 == 0:
                     print_time_estimate(episode)
 
             sys.stderr.write("[TRAIN] Saving model for final episode...\n")
-            self.save_model(sess, episode+trained_eps)
-            
+            self.save_model(sess, episode + trained_eps)
+
             writer.close()
-        
+
             return outcomes
 
-            
-                # take turn, which finds the best state and picks it, based on the current network
-                # save current state
-                # run training operation (session.run(self.training_op, {x:x, value_next, value_next})), (something which does the backprop, based on the state after having taken a turn, found before, and the state we saved in the beginning and from now we'll save it at the end of the turn
-                # save the current state again, so we can continue running backprop based on the "previous" turn.
+            # take turn, which finds the best state and picks it, based on the current network
+            # save current state
+            # run training operation (session.run(self.training_op, {x:x, value_next, value_next})),
+            #  (something which does the backprop, based on the state after having taken a turn,
+            # found before, and the state we saved in the beginning and from now we'll
+            #  save it at the end of the turn
 
-        # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
-        
+            # save the current state again, so we can continue running backprop based on the "previous" turn.
+
+        # NOTE: We need to make a method so that we can take a single turn or at least
+        # just pick the next best move, so we know how to evaluate according to TD-learning.
+        # Right now, our game just continues in a while loop without nothing to stop it!
 
 
-    def eval(self, trained_eps = 0):
-        def do_eval(sess, method, episodes = 1000, trained_eps = 0):
-            start_time = time.time()
-
-            def print_time_estimate(eps_completed):
-                cur_time      = time.time()
-                time_diff     = cur_time - start_time
-                eps_per_sec   = eps_completed / time_diff
-                secs_per_ep   = time_diff / eps_completed
-                eps_remaining = (episodes - eps_completed)
-                sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
-                sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
-
-            sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
-            
-            if method == 'random':
-                outcomes = []
-                for i in range(1, episodes + 1):
-                    sys.stderr.write("[EVAL ] Episode {}".format(i))
-                    board = Board.initial_state
-                    while Board.outcome(board) is None:
-                        roll = (random.randrange(1,7), random.randrange(1,7))
-                        board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0]
-                        roll = (random.randrange(1,7), random.randrange(1,7))
-                        board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
-                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
-                    outcomes.append(Board.outcome(board)[1])
-                    sys.stderr.write("\n")
-
-                    if i % 50 == 0:
-                        print_time_estimate(i)
-                return outcomes
-            elif method == 'pubeval':
-                outcomes = []
-                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
-                for i in range(1, episodes + 1):
-                    sys.stderr.write("[EVAL ] Episode {}".format(i))
-                    board = Board.initial_state
-                    #print("init:", board, sep="\n")
-                    while Board.outcome(board) is None:
-                        #print("-"*30)
-                        roll = (random.randrange(1,7), random.randrange(1,7))
-                        #print(roll)
-
-                        prev_board = tuple(board)
-                        board = (self.make_move(sess, board, roll))[0]
-                        #print("post p1:", board, sep="\n")
-
-                        #print("."*30)
-                        roll = (random.randrange(1,7), random.randrange(1,7))
-                        #print(roll)
-                        
-                        prev_board = tuple(board)
-                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
-                        #print("post pubeval:", board, sep="\n")
-
-                        
-                    #print("*"*30)
-                    #print(board)
-                    #print("+"*30)
-                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
-                    outcomes.append(Board.outcome(board)[1])
-                    sys.stderr.write("\n")
-
-                    if i % 10 == 0:
-                        print_time_estimate(i)
-                    
-                return outcomes
-            # elif method == 'dumbmodel':
-            #     config_prime = self.config.copy()
-            #     config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
-            #     eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
-            #     #print(self.config, "\n", config_prime)
-            #     outcomes = []
-            #     for i in range(1, episodes + 1):
-            #         sys.stderr.write("[EVAL ] Episode {}".format(i))
-            #         board = Board.initial_state
-            #         while Board.outcome(board) is None:
-            #             roll = (random.randrange(1,7), random.randrange(1,7))
-            #             board = (self.make_move(board, self.p1.get_sym(), roll))[0]
-                        
-            #             roll = (random.randrange(1,7), random.randrange(1,7))
-            #             board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
-            #         sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
-            #         outcomes.append(Board.outcome(board)[1])
-            #         sys.stderr.write("\n")
-
-            #         if i % 50 == 0:
-            #             print_time_estimate(i)
-            #     return outcomes
-            else:
-                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
-                return [0]
-
-        with tf.Session() as session:
-            session .run(tf.global_variables_initializer())
-            self.restore_model(session)
-            outcomes = [ (method, do_eval(session,
-                                          method,
-                                          self.config['episode_count'],
-                                          trained_eps = trained_eps))
-                         for method
-                         in self.config['eval_methods'] ]
-        return outcomes
diff --git a/pubeval/dumbeval.c b/pubeval/dumbeval.c
new file mode 100644
index 0000000..f9e6039
--- /dev/null
+++ b/pubeval/dumbeval.c
@@ -0,0 +1,170 @@
+#include <Python.h>
+
+static PyObject* DumbevalError;
+
+static float x[122];
+
+static const float wc[122] = {
+5.6477, 6.316649999999999, 7.05515, 6.65315, 9.3171, 17.9777, 2.0235499999999993, 5.1129500000000005, 7.599200000000001, 9.68525, 3.1762, 8.05335, 16.153499999999998, 8.02445, 10.55345, 15.489600000000001, 10.525199999999998, 16.438850000000002, 12.27405, 9.6362, 12.7152, 13.2859, 1.6932499999999995, 26.79045, 10.521899999999999, 6.79635, 5.28135, 6.2059, 10.2306, 10.5485, 3.6000500000000004, 4.07825, 6.951700000000001, 4.413749999999999, 11.271450000000002, 12.9361, 11.087299999999999, 13.10085, 10.411999999999999, 8.084050000000001, 12.4893, 5.96055, 4.69195, 18.9482, 9.0946, 9.1954, 6.2592, 16.180300000000003, 8.3376, 23.24915, 14.32525, -2.6699000000000006, 19.156, 5.81445, 4.7214, 7.63055, 7.039, 5.88075, 2.00765, 14.596800000000002, 11.5208, -3.79, -3.8541000000000003, 5.358499999999999, 14.4516, 2.49015, 11.284799999999999, 14.1066, 16.2306, 5.82875, 9.34505, 16.13685, 8.1893, 2.93145, 7.83185, 12.86765, 6.90115, 20.07255, 8.93355, -0.12434999999999974, 12.0587, 11.83985, 6.34155, 7.1963, 10.571200000000001, 22.38365, 6.50745, 8.94595, 12.0434, 10.79885, 14.055800000000001, 0.022100000000000453, 10.39255, 4.088850000000001, 3.6421499999999996, 38.1298, 6.8957, 0.9804999999999997, 5.9599, 13.16055, 11.55305, 10.65015, 4.6673, 15.770999999999999, 27.700050000000005, 4.4329, 12.6349, 7.037800000000001, 3.4897, 18.91945, 10.239899999999999, 5.4625, 10.29705, 10.492799999999999, 8.850900000000001, -10.575999999999999, 10.6893, 15.30845, 17.8083, 31.88275, 11.225000000000001, 4.4806};
+
+static const float wr[122] = {
+-0.7856, -0.50352, 0.12392, -1.00316, -2.46556, -0.1627, 0.18966, 0.0043, 0.0,
+0.13681, 1.11245, 0.0, 0.0, -0.02781, -2.77982, 0.0, -0.91035, 0.60015,
+-1.27266, 0.0, 0.0, 0.0, 0.0, -7.26713, -0.19412, -1.05121, 0.27448, -4.94251,
+ -0.06844, 0.37183, -3.66465, -0.8305, 0.09266, 0.07217, 0.0, 0.29906, -1.26062,
+0.17405, 0.48302, 2.00366, 0.92321, -0.10839, 1.06349, 0.39521, 3.4204, 
+0.00576, 5.35, 3.8539, -0.09308, 0.17253, 0.13978, 0.2701, -0.52728, 0.88296,
+0.2252, 0.0, 0.0, -0.12707, 3.05454, 0.31202, -0.88035, -0.01351, 0.0, 
+-3.40177, -0.22082, -0.13022, -0.09795, -2.29847, -12.32252, 0.0, -0.13597,
+0.12039, 0.85631, 0.0, 0.0, -0.3424, 0.24855, 0.20178, 2.30052, 1.5068,
+0.0, -0.07456, 5.16874, 0.01418, -1.3464, -1.29506, 0.0, 0.0, -1.40375,
+0.0, -0.11696, 0.05281, -9.67677, 0.05685, -1.09167, 0.0, 0.0, -2.56906,
+2.19605, 0.0, 0.68178, -0.08471, 0.0, -2.34631, 1.49549, -2.16183, 0.0,
+1.16242, 1.08744, -0.1716, 0.25236, 0.13246, -0.37646, 0.0, -2.87401,
+0.74427, 1.07274, -0.01591, -0.14818, -0.06285, 0.08302, -1.03508
+};
+
+void setx(int pos[])
+{
+        /* sets input vector x[] given board position pos[] */
+        extern float x[];
+        int j, jm1, n;
+        /* initialize */
+        for(j=0;j<122;++j) x[j] = 0.0;
+
+        /* first encode board locations 24-1 */
+        for(j=1;j<=24;++j) {
+            jm1 = j - 1;
+            n = pos[25-j];
+            if(n!=0) {
+                if(n==-1) x[5*jm1+0] = 1.0;
+                if(n==1) x[5*jm1+1] = 1.0;
+                if(n>=2) x[5*jm1+2] = 1.0;
+                if(n==3) x[5*jm1+3] = 1.0;
+                if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0;
+            }
+        }
+        /* encode opponent barmen */
+        x[120] = -(float)(pos[0])/2.0;
+        /* encode computer's menoff */
+        x[121] = (float)(pos[26])/15.0;
+}
+
+float dumbeval(int race, int pos[])
+{
+        /* Backgammon move-selection evaluation function
+           for benchmark comparisons.  Computes a linear
+           evaluation function:  Score = W * X, where X is
+           an input vector encoding the board state (using
+           a raw encoding of the number of men at each location),
+           and W is a weight vector.  Separate weight vectors
+           are used for racing positions and contact positions.
+           Makes lots of obvious mistakes, but provides a
+           decent level of play for benchmarking purposes. */
+
+        /* Provided as a public service to the backgammon
+           programming community by Gerry Tesauro, IBM Research.
+           (e-mail: tesauro@watson.ibm.com)                     */
+
+        /* The following inputs are needed for this routine:
+
+           race   is an integer variable which should be set
+           based on the INITIAL position BEFORE the move.
+           Set race=1 if the position is a race (i.e. no contact)
+           and 0 if the position is a contact position.
+
+           pos[]  is an integer array of dimension 28 which
+           should represent a legal final board state after
+           the move. Elements 1-24 correspond to board locations
+           1-24 from computer's point of view, i.e. computer's
+           men move in the negative direction from 24 to 1, and
+           opponent's men move in the positive direction from
+           1 to 24. Computer's men are represented by positive
+           integers, and opponent's men are represented by negative
+           integers. Element 25 represents computer's men on the
+           bar (positive integer), and element 0 represents opponent's
+           men on the bar (negative integer). Element 26 represents
+           computer's men off the board (positive integer), and
+           element 27 represents opponent's men off the board
+           (negative integer).                                  */
+
+        /* Also, be sure to call rdwts() at the start of your
+           program to read in the weight values. Happy hacking] */
+
+        int i;
+        float score;
+
+        if(pos[26]==15) return(99999999.);
+        /* all men off, best possible move */
+
+        setx(pos); /* sets input array x[] */
+        score = 0.0;
+        if(race) {  /* use race weights */
+            for(i=0;i<122;++i) score += wr[i]*x[i];
+        }
+        else {  /* use contact weights */
+            for(i=0;i<122;++i) score += wc[i]*x[i];
+        }
+        return(score);
+}
+
+static PyObject*
+dumbeval_eval(PyObject *self, PyObject *args) {
+  int race;
+  long numValues;
+  int board[28];
+  float eval_score;
+
+  PyObject* tuple_obj;
+  PyObject* val_obj;
+
+  if (! PyArg_ParseTuple(args, "pO!", &race, &PyTuple_Type, &tuple_obj))
+    return NULL;
+
+  numValues = PyTuple_Size(tuple_obj);
+
+  if (numValues < 0) return NULL;
+  if (numValues != 28) {
+    PyErr_SetString(DumbevalError, "Tuple must have 28 entries");
+    return NULL;
+  }
+
+  // Iterate over tuple to retreive positions
+  for (int i=0; i<numValues; i++) {
+    val_obj = PyTuple_GetItem(tuple_obj, i);
+    board[i] = PyLong_AsLong(val_obj);
+  }
+
+  eval_score = dumbeval(race, board);
+  return Py_BuildValue("f", eval_score);
+}
+
+static PyMethodDef dumbeval_methods[] = {
+  {
+    "eval", dumbeval_eval, METH_VARARGS,
+    "Returns evaluation results for the given board position."
+  },
+  {NULL, NULL, 0, NULL}
+};
+
+static struct PyModuleDef dumbeval_definition = {
+  PyModuleDef_HEAD_INIT,
+  "dumbeval",
+  "A Python module that implements Gerald Tesauro's dumbeval function for evaluation backgammon positions.",
+  -1,
+  dumbeval_methods
+};
+
+PyMODINIT_FUNC PyInit_dumbeval(void) {
+  PyObject* module;
+
+  module = PyModule_Create(&dumbeval_definition);
+  if (module == NULL)
+    return NULL;
+
+  DumbevalError = PyErr_NewException("dumbeval.error", NULL, NULL);
+  Py_INCREF(DumbevalError);
+  PyModule_AddObject(module, "error", DumbevalError);
+
+  return module;
+}
diff --git a/pubeval/setup_dumb.py b/pubeval/setup_dumb.py
new file mode 100644
index 0000000..3e7d82e
--- /dev/null
+++ b/pubeval/setup_dumb.py
@@ -0,0 +1,9 @@
+from distutils.core import setup, Extension
+
+dumbeval = Extension('dumbeval',
+                    sources = ['dumbeval.c'])
+
+setup (name = 'dumbeval',
+       version = '0.1',
+       description = 'Dumbeval for Python',
+       ext_modules = [dumbeval])

From ab5d2aabb240f66be46fe6054cae745d0d0494f5 Mon Sep 17 00:00:00 2001
From: alex <alexmunchhansen@gmail.com>
Date: Tue, 27 Mar 2018 02:41:58 +0200
Subject: [PATCH 06/17] Initialized weights completely randomly for dumbeval

---
 pubeval/dumbeval.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/pubeval/dumbeval.c b/pubeval/dumbeval.c
index f9e6039..0c33ccc 100644
--- a/pubeval/dumbeval.c
+++ b/pubeval/dumbeval.c
@@ -5,7 +5,32 @@ static PyObject* DumbevalError;
 static float x[122];
 
 static const float wc[122] = {
-5.6477, 6.316649999999999, 7.05515, 6.65315, 9.3171, 17.9777, 2.0235499999999993, 5.1129500000000005, 7.599200000000001, 9.68525, 3.1762, 8.05335, 16.153499999999998, 8.02445, 10.55345, 15.489600000000001, 10.525199999999998, 16.438850000000002, 12.27405, 9.6362, 12.7152, 13.2859, 1.6932499999999995, 26.79045, 10.521899999999999, 6.79635, 5.28135, 6.2059, 10.2306, 10.5485, 3.6000500000000004, 4.07825, 6.951700000000001, 4.413749999999999, 11.271450000000002, 12.9361, 11.087299999999999, 13.10085, 10.411999999999999, 8.084050000000001, 12.4893, 5.96055, 4.69195, 18.9482, 9.0946, 9.1954, 6.2592, 16.180300000000003, 8.3376, 23.24915, 14.32525, -2.6699000000000006, 19.156, 5.81445, 4.7214, 7.63055, 7.039, 5.88075, 2.00765, 14.596800000000002, 11.5208, -3.79, -3.8541000000000003, 5.358499999999999, 14.4516, 2.49015, 11.284799999999999, 14.1066, 16.2306, 5.82875, 9.34505, 16.13685, 8.1893, 2.93145, 7.83185, 12.86765, 6.90115, 20.07255, 8.93355, -0.12434999999999974, 12.0587, 11.83985, 6.34155, 7.1963, 10.571200000000001, 22.38365, 6.50745, 8.94595, 12.0434, 10.79885, 14.055800000000001, 0.022100000000000453, 10.39255, 4.088850000000001, 3.6421499999999996, 38.1298, 6.8957, 0.9804999999999997, 5.9599, 13.16055, 11.55305, 10.65015, 4.6673, 15.770999999999999, 27.700050000000005, 4.4329, 12.6349, 7.037800000000001, 3.4897, 18.91945, 10.239899999999999, 5.4625, 10.29705, 10.492799999999999, 8.850900000000001, -10.575999999999999, 10.6893, 15.30845, 17.8083, 31.88275, 11.225000000000001, 4.4806};
+1.5790816238841092, 1.6374860177130541, -1.7131823639980923, -0.9286186784962336, -1.0732080528763888,
+ -0.33851674519289876, 1.5798155080270462, 2.3161915581553414, 1.5625330782392322, 0.9397141260075461,
+0.8386342522957442, 1.2380864901133144, -2.803703105809909, -1.6033863837759044, -1.9297462408169208,
+2.804924084193149, 0.9270839975087402, 0.9877927467766145, -1.0075116465703597, -0.9456578829797895,
+-2.592017567014881, 0.6309857231907587, 2.04590249003744, -0.7982917574924828, -1.4539868823698936,
+1.0841407450630234, 0.45211788236898887, -1.2713606178159307, 0.8688872440724307, -0.6732738151904405,
+2.2362742485632294, -0.6581729637609781, -1.7948051663967473, 2.1883788452643564, 2.1598171424723214,
+0.40802272166662146, -0.9708789129385202, -0.28407011999124165, 1.132858480655588, 0.35009713673111253,
+2.396877030228498, -2.9621397724422653, 1.607067798976531, 1.0644990486021744, 0.31954763526104113,
+1.3044736141405133, -2.7454899725805606, -2.7379143210889545, -1.803990720175892, 0.46979843403681576,
+-1.7142750941084806, -0.8151527229519924, -2.009462889335147, -0.3918389579023729, -1.2877598286852634,
+2.555703689627613, 0.9185193346378826, -2.4440956502956404, -1.5557875467629176, 1.6171292628313898,
+-0.7350519162308693, 2.9185129503030653, -0.02369662637182124, 0.9957404325370858, -0.6504711593915609,
+ 2.6190546093943468, -0.36103491516117003, -0.5988376927918715, 0.16399156134136383, 0.3254074568551131,
+-1.5638349190057885, 0.8561543642997189, -0.0880209333042492, 1.323918411026094, -0.9498883976797834,
+2.3050169940592458, -2.859322940360703, 2.1798224505428836, 0.03769734441005257, 2.806706515762855,
+-0.514728418369482, -2.7130236727731454, 1.343193402901159, -1.542350700154035, 1.1197565339573625,
+-1.4498511795864624, 1.3472224178544003, 0.7044576479382245, -2.284211306571646, -1.7289596273930532,
+-1.7276292685923906, -0.1945401442950634, 2.0338744133468643, 2.001064062247366, 1.9649901287717713,
+ 1.5235253273336475, 0.40016636047698606, -1.3276206938801058, 0.8496121993449899, 1.054662320349336,
+-1.1897996492934584, 0.49610727347392025, -1.8539475848522708, 0.4713599305742626, -2.8424352653158573,
+-2.526691049928613, 2.1369664337786274, 1.0616438676464632, 1.9487914860665452, 2.822108017102477,
+-0.3393405083020449, 2.787144781914554, -2.401723402781605, -1.1675562811241997, -1.1542961327714207,
+0.18253192955355502, -2.418436664206371, 0.7423935287565309, 2.9903418274144666, -1.3503112004693552,
+-2.649146174480099, -0.5447080156947952
+};
 
 static const float wr[122] = {
 -0.7856, -0.50352, 0.12392, -1.00316, -2.46556, -0.1627, 0.18966, 0.0043, 0.0,

From f43108c239bb2160bf0bb2ed83fd0b19f335ea4a Mon Sep 17 00:00:00 2001
From: alex <alexmunchhansen@gmail.com>
Date: Tue, 27 Mar 2018 04:06:08 +0200
Subject: [PATCH 07/17] Training using slightly revamped version of our own
 board rep. Not sure if works yet.

---
 board.py           | 11 +++++++++++
 network.py         | 27 +++++----------------------
 pubeval/dumbeval.c |  6 +++++-
 3 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/board.py b/board.py
index a2b205e..767ae45 100644
--- a/board.py
+++ b/board.py
@@ -35,6 +35,17 @@ class Board:
         board.append(-15 - sum(negatives))
         return tuple(board)
 
+    @staticmethod
+    def board_features_to_own(board, player):
+        board = list(board)
+        positives = [x if x > 0 else 0 for x in board]
+        negatives = [x if x < 0 else 0 for x in board]
+        board.append(15 - sum(positives))
+        board.append(-15 - sum(negatives))
+        board += ([1, 0] if np.sign(player) > 0 else [1, 0])
+        return np.array(board).reshape(1,-1)
+
+
     @staticmethod
     def board_features_to_tesauro(board, cur_player):
         features = []
diff --git a/network.py b/network.py
index 8f8ef18..1dc4b62 100644
--- a/network.py
+++ b/network.py
@@ -11,13 +11,11 @@ from eval import Eval
 
 class Network:
     hidden_size = 40
-    input_size = 198
+    input_size = 30
     output_size = 1
     # Can't remember the best learning_rate, look this up
     learning_rate = 0.01
-
-    # TODO: Actually compile tensorflow properly
-    # os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
+    board_rep = Board.board_features_to_own
 
     def custom_tanh(self, x, name=None):
         return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
@@ -147,7 +145,7 @@ class Network:
     def make_move(self, sess, board, roll, player):
         # print(Board.pretty(board))
         legal_moves = Board.calculate_legal_states(board, player, roll)
-        moves_and_scores = [(move, self.eval_state(sess, Board.board_features_to_tesauro(move, player))) for move in legal_moves]
+        moves_and_scores = [(move, self.eval_state(sess, Network.board_rep(move, player))) for move in legal_moves]
         scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
         best_score_index = np.array(scores).argmax()
         best_move_pair = moves_and_scores[best_score_index]
@@ -338,15 +336,6 @@ class Network:
                 sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
                 # TODO decide which player should be here
 
-
-                # TEST
-                #if episode % 1000 == 0:
-                #    self.config['eval_methods'] = 'dumbeval'
-                #    self.config['episodes'] = 300
-                #    outcomes = self.eval(trained_eps)
-                #    self.log_eval_outcomes(outcomes, trained_eps=self.episodes_trained)
-
-                #player = random.choice([-1, 1])
                 player = 1
 
                 prev_board = Board.initial_state
@@ -355,7 +344,6 @@ class Network:
                 # first thing inside of the while loop and then call
                 # best_move_and_score to get V_t+1
 
-                # i = 0
                 while Board.outcome(prev_board) is None:
 
                     #print("PREEEV_BOOOOAAARD:",prev_board)
@@ -367,7 +355,7 @@ class Network:
 
                     # adjust weights
                     sess.run(self.training_op,
-                             feed_dict={self.x: Board.board_features_to_tesauro(prev_board, player),
+                             feed_dict={self.x: Network.board_rep(prev_board, player),
                                         self.value_next: cur_board_value})
 
                     player *= -1
@@ -386,7 +374,7 @@ class Network:
                 with tf.name_scope("final"):
                     merged = tf.summary.merge_all()
                     summary, _ = sess.run([merged, self.training_op],
-                                          feed_dict={self.x: Board.board_features_to_tesauro(prev_board, player),
+                                          feed_dict={self.x: Network.board_rep(prev_board, player),
                                                      self.value_next: scaled_final_score.reshape((1, 1))})
                     writer.add_summary(summary, episode + trained_eps)
 
@@ -415,8 +403,3 @@ class Network:
 
             # save the current state again, so we can continue running backprop based on the "previous" turn.
 
-        # NOTE: We need to make a method so that we can take a single turn or at least
-        # just pick the next best move, so we know how to evaluate according to TD-learning.
-        # Right now, our game just continues in a while loop without nothing to stop it!
-
-
diff --git a/pubeval/dumbeval.c b/pubeval/dumbeval.c
index 0c33ccc..6e29de3 100644
--- a/pubeval/dumbeval.c
+++ b/pubeval/dumbeval.c
@@ -5,6 +5,10 @@ static PyObject* DumbevalError;
 static float x[122];
 
 static const float wc[122] = {
+5.6477, 6.316649999999999, 7.05515, 6.65315, 9.3171, 17.9777, 2.0235499999999993, 5.1129500000000005, 7.599200000000001, 9.68525, 3.1762, 8.05335, 16.153499999999998, 8.02445, 10.55345, 15.489600000000001, 10.525199999999998, 16.438850000000002, 12.27405, 9.6362, 12.7152, 13.2859, 1.6932499999999995, 26.79045, 10.521899999999999, 6.79635, 5.28135, 6.2059, 10.2306, 10.5485, 3.6000500000000004, 4.07825, 6.951700000000001, 4.413749999999999, 11.271450000000002, 12.9361, 11.087299999999999, 13.10085, 10.411999999999999, 8.084050000000001, 12.4893, 5.96055, 4.69195, 18.9482, 9.0946, 9.1954, 6.2592, 16.180300000000003, 8.3376, 23.24915, 14.32525, -2.6699000000000006, 19.156, 5.81445, 4.7214, 7.63055, 7.039, 5.88075, 2.00765, 14.596800000000002, 11.5208, -3.79, -3.8541000000000003, 5.358499999999999, 14.4516, 2.49015, 11.284799999999999, 14.1066, 16.2306, 5.82875, 9.34505, 16.13685, 8.1893, 2.93145, 7.83185, 12.86765, 6.90115, 20.07255, 8.93355, -0.12434999999999974, 12.0587, 11.83985, 6.34155, 7.1963, 10.571200000000001, 22.38365, 6.50745, 8.94595, 12.0434, 10.79885, 14.055800000000001, 0.022100000000000453, 10.39255, 4.088850000000001, 3.6421499999999996, 38.1298, 6.8957, 0.9804999999999997, 5.9599, 13.16055, 11.55305, 10.65015, 4.6673, 15.770999999999999, 27.700050000000005, 4.4329, 12.6349, 7.037800000000001, 3.4897, 18.91945, 10.239899999999999, 5.4625, 10.29705, 10.492799999999999, 8.850900000000001, -10.575999999999999, 10.6893, 15.30845, 17.8083, 31.88275, 11.225000000000001, 4.4806};
+
+
+/*
 1.5790816238841092, 1.6374860177130541, -1.7131823639980923, -0.9286186784962336, -1.0732080528763888,
  -0.33851674519289876, 1.5798155080270462, 2.3161915581553414, 1.5625330782392322, 0.9397141260075461,
 0.8386342522957442, 1.2380864901133144, -2.803703105809909, -1.6033863837759044, -1.9297462408169208,
@@ -30,7 +34,7 @@ static const float wc[122] = {
 -0.3393405083020449, 2.787144781914554, -2.401723402781605, -1.1675562811241997, -1.1542961327714207,
 0.18253192955355502, -2.418436664206371, 0.7423935287565309, 2.9903418274144666, -1.3503112004693552,
 -2.649146174480099, -0.5447080156947952
-};
+};*/
 
 static const float wr[122] = {
 -0.7856, -0.50352, 0.12392, -1.00316, -2.46556, -0.1627, 0.18966, 0.0043, 0.0,

From 0eac5434d65db4a91b2a6a401421b7257c36aee4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Tue, 27 Mar 2018 11:55:32 +0200
Subject: [PATCH 08/17] update .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 08bc86a..03ee050 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,3 +169,6 @@ venv.bak/
 README.*
 !README.org
 models/
+.DS_Store
+bench/
+

From 8822af81e6b6d892c8d9c8e522161ab7402bd937 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Tue, 27 Mar 2018 12:23:15 +0200
Subject: [PATCH 09/17] move dumbeval code to separate directory

---
 dumbeval/.gitignore                        | 1 +
 {pubeval => dumbeval}/dumbeval.c           | 0
 pubeval/setup_dumb.py => dumbeval/setup.py | 0
 3 files changed, 1 insertion(+)
 create mode 100644 dumbeval/.gitignore
 rename {pubeval => dumbeval}/dumbeval.c (100%)
 rename pubeval/setup_dumb.py => dumbeval/setup.py (100%)

diff --git a/dumbeval/.gitignore b/dumbeval/.gitignore
new file mode 100644
index 0000000..567609b
--- /dev/null
+++ b/dumbeval/.gitignore
@@ -0,0 +1 @@
+build/
diff --git a/pubeval/dumbeval.c b/dumbeval/dumbeval.c
similarity index 100%
rename from pubeval/dumbeval.c
rename to dumbeval/dumbeval.c
diff --git a/pubeval/setup_dumb.py b/dumbeval/setup.py
similarity index 100%
rename from pubeval/setup_dumb.py
rename to dumbeval/setup.py

From 28b82e8228030fe58322b0b2f3ae9c433fcab29e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Tue, 27 Mar 2018 12:57:06 +0200
Subject: [PATCH 10/17] update dumbeval weights

---
 dumbeval/dumbeval.c | 87 +++++++++++++++++++++------------------------
 dumbeval/weights.py | 14 ++++++++
 2 files changed, 55 insertions(+), 46 deletions(-)
 create mode 100644 dumbeval/weights.py

diff --git a/dumbeval/dumbeval.c b/dumbeval/dumbeval.c
index 6e29de3..4d2579a 100644
--- a/dumbeval/dumbeval.c
+++ b/dumbeval/dumbeval.c
@@ -4,54 +4,49 @@ static PyObject* DumbevalError;
 
 static float x[122];
 
+
+/* With apologies to Gerry Tesauro */
+
+/* Weights generated by weights.py */
 static const float wc[122] = {
-5.6477, 6.316649999999999, 7.05515, 6.65315, 9.3171, 17.9777, 2.0235499999999993, 5.1129500000000005, 7.599200000000001, 9.68525, 3.1762, 8.05335, 16.153499999999998, 8.02445, 10.55345, 15.489600000000001, 10.525199999999998, 16.438850000000002, 12.27405, 9.6362, 12.7152, 13.2859, 1.6932499999999995, 26.79045, 10.521899999999999, 6.79635, 5.28135, 6.2059, 10.2306, 10.5485, 3.6000500000000004, 4.07825, 6.951700000000001, 4.413749999999999, 11.271450000000002, 12.9361, 11.087299999999999, 13.10085, 10.411999999999999, 8.084050000000001, 12.4893, 5.96055, 4.69195, 18.9482, 9.0946, 9.1954, 6.2592, 16.180300000000003, 8.3376, 23.24915, 14.32525, -2.6699000000000006, 19.156, 5.81445, 4.7214, 7.63055, 7.039, 5.88075, 2.00765, 14.596800000000002, 11.5208, -3.79, -3.8541000000000003, 5.358499999999999, 14.4516, 2.49015, 11.284799999999999, 14.1066, 16.2306, 5.82875, 9.34505, 16.13685, 8.1893, 2.93145, 7.83185, 12.86765, 6.90115, 20.07255, 8.93355, -0.12434999999999974, 12.0587, 11.83985, 6.34155, 7.1963, 10.571200000000001, 22.38365, 6.50745, 8.94595, 12.0434, 10.79885, 14.055800000000001, 0.022100000000000453, 10.39255, 4.088850000000001, 3.6421499999999996, 38.1298, 6.8957, 0.9804999999999997, 5.9599, 13.16055, 11.55305, 10.65015, 4.6673, 15.770999999999999, 27.700050000000005, 4.4329, 12.6349, 7.037800000000001, 3.4897, 18.91945, 10.239899999999999, 5.4625, 10.29705, 10.492799999999999, 8.850900000000001, -10.575999999999999, 10.6893, 15.30845, 17.8083, 31.88275, 11.225000000000001, 4.4806};
-
-
-/*
-1.5790816238841092, 1.6374860177130541, -1.7131823639980923, -0.9286186784962336, -1.0732080528763888,
- -0.33851674519289876, 1.5798155080270462, 2.3161915581553414, 1.5625330782392322, 0.9397141260075461,
-0.8386342522957442, 1.2380864901133144, -2.803703105809909, -1.6033863837759044, -1.9297462408169208,
-2.804924084193149, 0.9270839975087402, 0.9877927467766145, -1.0075116465703597, -0.9456578829797895,
--2.592017567014881, 0.6309857231907587, 2.04590249003744, -0.7982917574924828, -1.4539868823698936,
-1.0841407450630234, 0.45211788236898887, -1.2713606178159307, 0.8688872440724307, -0.6732738151904405,
-2.2362742485632294, -0.6581729637609781, -1.7948051663967473, 2.1883788452643564, 2.1598171424723214,
-0.40802272166662146, -0.9708789129385202, -0.28407011999124165, 1.132858480655588, 0.35009713673111253,
-2.396877030228498, -2.9621397724422653, 1.607067798976531, 1.0644990486021744, 0.31954763526104113,
-1.3044736141405133, -2.7454899725805606, -2.7379143210889545, -1.803990720175892, 0.46979843403681576,
--1.7142750941084806, -0.8151527229519924, -2.009462889335147, -0.3918389579023729, -1.2877598286852634,
-2.555703689627613, 0.9185193346378826, -2.4440956502956404, -1.5557875467629176, 1.6171292628313898,
--0.7350519162308693, 2.9185129503030653, -0.02369662637182124, 0.9957404325370858, -0.6504711593915609,
- 2.6190546093943468, -0.36103491516117003, -0.5988376927918715, 0.16399156134136383, 0.3254074568551131,
--1.5638349190057885, 0.8561543642997189, -0.0880209333042492, 1.323918411026094, -0.9498883976797834,
-2.3050169940592458, -2.859322940360703, 2.1798224505428836, 0.03769734441005257, 2.806706515762855,
--0.514728418369482, -2.7130236727731454, 1.343193402901159, -1.542350700154035, 1.1197565339573625,
--1.4498511795864624, 1.3472224178544003, 0.7044576479382245, -2.284211306571646, -1.7289596273930532,
--1.7276292685923906, -0.1945401442950634, 2.0338744133468643, 2.001064062247366, 1.9649901287717713,
- 1.5235253273336475, 0.40016636047698606, -1.3276206938801058, 0.8496121993449899, 1.054662320349336,
--1.1897996492934584, 0.49610727347392025, -1.8539475848522708, 0.4713599305742626, -2.8424352653158573,
--2.526691049928613, 2.1369664337786274, 1.0616438676464632, 1.9487914860665452, 2.822108017102477,
--0.3393405083020449, 2.787144781914554, -2.401723402781605, -1.1675562811241997, -1.1542961327714207,
-0.18253192955355502, -2.418436664206371, 0.7423935287565309, 2.9903418274144666, -1.3503112004693552,
--2.649146174480099, -0.5447080156947952
-};*/
+-1.91222,  1.45979,  0.40657, -1.39159,  3.64558, -0.45381, -0.03157,
+  0.14539,  0.80232,  0.87558,  2.36202, -2.01887, -0.88918,  2.65871,
+ -1.31587,  1.07476,  0.30491, -1.32892,  0.38018, -0.30714, -1.16178,
+  0.71481, -1.01334, -0.44373,  0.51255, -0.17171, -0.88886,  0.02071,
+ -0.53279, -0.22139, -1.02436,  0.17948,  0.95697,  0.49272,  0.31848,
+ -0.58293,  0.14484,  0.22063,  1.0336 , -1.90554,  1.10291, -2.05589,
+ -0.16964, -0.82442,  1.27217, -1.24968, -0.90372,  0.05546,  0.2535 ,
+ -0.03533, -0.31773,  0.43704,  0.21699,  0.10519,  2.12775, -0.48196,
+ -0.08445, -0.13156, -0.68362,  0.64765,  0.32537,  0.79493,  1.94577,
+ -0.63827,  0.97057, -0.46039,  1.51801, -0.62955, -0.43632,  0.25876,
+ -0.46623, -0.46963,  1.3532 , -0.07362, -1.53211,  0.69676, -0.92407,
+  0.07153,  0.67173,  0.27661, -0.51579, -0.49019,  1.06603, -0.97673,
+ -1.21231, -1.54966, -0.07795,  0.32697,  0.02873,  1.38703,  0.41725,
+  0.78326, -0.7257 ,  0.54165,  1.38882,  0.27304,  1.0739 ,  0.74654,
+  1.35561,  1.18697,  1.09146,  0.17552, -0.30773,  0.27812, -1.674  ,
+ -0.31073, -0.40745,  0.51546, -1.10875,  2.0081 , -1.27931, -1.16321,
+  0.95652,  0.7487 , -0.2347 ,  0.20324, -0.41417,  0.05929,  0.72632,
+ -1.15223,  1.2745 , -0.15947 };
 
 static const float wr[122] = {
--0.7856, -0.50352, 0.12392, -1.00316, -2.46556, -0.1627, 0.18966, 0.0043, 0.0,
-0.13681, 1.11245, 0.0, 0.0, -0.02781, -2.77982, 0.0, -0.91035, 0.60015,
--1.27266, 0.0, 0.0, 0.0, 0.0, -7.26713, -0.19412, -1.05121, 0.27448, -4.94251,
- -0.06844, 0.37183, -3.66465, -0.8305, 0.09266, 0.07217, 0.0, 0.29906, -1.26062,
-0.17405, 0.48302, 2.00366, 0.92321, -0.10839, 1.06349, 0.39521, 3.4204, 
-0.00576, 5.35, 3.8539, -0.09308, 0.17253, 0.13978, 0.2701, -0.52728, 0.88296,
-0.2252, 0.0, 0.0, -0.12707, 3.05454, 0.31202, -0.88035, -0.01351, 0.0, 
--3.40177, -0.22082, -0.13022, -0.09795, -2.29847, -12.32252, 0.0, -0.13597,
-0.12039, 0.85631, 0.0, 0.0, -0.3424, 0.24855, 0.20178, 2.30052, 1.5068,
-0.0, -0.07456, 5.16874, 0.01418, -1.3464, -1.29506, 0.0, 0.0, -1.40375,
-0.0, -0.11696, 0.05281, -9.67677, 0.05685, -1.09167, 0.0, 0.0, -2.56906,
-2.19605, 0.0, 0.68178, -0.08471, 0.0, -2.34631, 1.49549, -2.16183, 0.0,
-1.16242, 1.08744, -0.1716, 0.25236, 0.13246, -0.37646, 0.0, -2.87401,
-0.74427, 1.07274, -0.01591, -0.14818, -0.06285, 0.08302, -1.03508
-};
+ 0.13119, -0.13164, -1.2736 ,  1.06352, -1.34749, -1.03086, -0.27417,
+ -0.27762,  0.79454, -1.12623,  2.1134 , -0.7003 ,  0.26056, -1.13518,
+ -1.64548, -1.30828, -0.96589, -0.36258, -1.14323, -0.2006 , -1.00307,
+  0.57739, -0.62693,  0.29721, -0.36996, -0.17462,  0.96704,  0.08902,
+  1.4337 , -0.47107,  0.82156,  0.14988,  1.74034,  1.13313, -0.32083,
+ -0.00048, -0.86622,  1.12808,  0.99875,  0.8049 , -0.16841, -0.42677,
+ -1.9409 , -0.53565, -0.83708,  0.69603,  0.32079,  0.56942,  0.67965,
+  1.49328, -1.65885,  0.96284,  0.63196, -0.27504,  0.39174,  0.71225,
+ -0.3614 ,  0.88761,  1.12882,  0.77764,  1.02618, -0.20245, -0.39245,
+ -1.56799,  1.04888, -1.20858, -0.24361, -1.85157, -0.16912,  0.50512,
+ -2.93122,  0.70477, -0.93066,  1.74867,  0.23963, -0.00699, -1.27183,
+ -0.30604,  1.71039,  0.82202, -1.36734, -1.08352, -1.25054,  0.49436,
+ -1.5037 , -0.73143,  0.74189,  0.32365,  0.30539, -0.72169,  0.41088,
+ -1.56632, -0.63526,  0.58779, -0.05653,  0.76713, -1.40898, -0.33683,
+  1.86802,  0.59773,  1.28668, -0.65817,  2.46829, -0.09331,  2.9034 ,
+  1.04809,  0.73222, -0.44372,  0.53044, -1.9274 , -1.57183, -1.14068,
+  1.26036, -0.9296 ,  0.06662, -0.26572, -0.30862,  0.72915,  0.98977,
+  0.63513, -1.43917, -0.12523 };
 
 void setx(int pos[])
 {
@@ -179,7 +174,7 @@ static PyMethodDef dumbeval_methods[] = {
 static struct PyModuleDef dumbeval_definition = {
   PyModuleDef_HEAD_INIT,
   "dumbeval",
-  "A Python module that implements Gerald Tesauro's dumbeval function for evaluation backgammon positions.",
+  "A Python module that implements Gerald Tesauro's pubeval function for evaluation backgammon positions with badly initialized weights.",
   -1,
   dumbeval_methods
 };
diff --git a/dumbeval/weights.py b/dumbeval/weights.py
new file mode 100644
index 0000000..bf02340
--- /dev/null
+++ b/dumbeval/weights.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+import numpy as np
+import re
+
+re.DOTALL = True
+
+np.set_printoptions(precision=5, suppress=True, threshold=np.nan)
+def random_array_string():
+    return re.sub(r'^\[(.*)\]$(?s)', r'{\n\1 };', np.array2string(np.random.normal(0,1,122), separator=', '))
+
+print("/* Weights generated by weights.py */")
+print("static const float wc[122] =", random_array_string())
+print()
+print("static const float wr[122] =", random_array_string())

From 26540062225dcc8a5399b5f74baf926f4b1e4d77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Tue, 27 Mar 2018 13:02:36 +0200
Subject: [PATCH 11/17] fix wrongful mergings

---
 network.py | 75 ++++++++++++++++--------------------------------------
 1 file changed, 22 insertions(+), 53 deletions(-)

diff --git a/network.py b/network.py
index 6358761..6ceefbe 100644
--- a/network.py
+++ b/network.py
@@ -152,8 +152,8 @@ class Network:
         # print("Found the best state, being:", np.array(move_scores).argmax())
         return best_move_pair
 
-    def eval(self, trained_eps=0):
-        def do_eval(sess, method, episodes=1000, trained_eps=trained_eps):
+    def eval(self, episode_count, trained_eps = 0, tf_session = None):
+        def do_eval(sess, method, episodes = 1000, trained_eps = 0):
             start_time = time.time()
 
             def print_time_estimate(eps_completed):
@@ -290,17 +290,26 @@ class Network:
             else:
                 sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                 return [0]
-
-        with tf.Session() as session:
-            session.run(tf.global_variables_initializer())
-            self.restore_model(session)
-            outcomes = [(method, do_eval(session,
-                                         method,
-                                         self.config['episode_count'],
-                                         trained_eps=trained_eps))
-                        for method
-                        in self.config['eval_methods']]
-        return outcomes
+            
+        if tf_session == None:
+            with tf.Session():
+                session.run(tf.global_variables_initializer())
+                self.restore_model(session)
+                outcomes = [ (method, do_eval(session,
+                                              method,
+                                              episode_count,
+                                              trained_eps = trained_eps))
+                             for method
+                             in self.config['eval_methods'] ]
+                return outcomes
+        else:
+            outcomes = [ (method, do_eval(tf_session,
+                                          method,
+                                          episode_count,
+                                          trained_eps = trained_eps))
+                         for method
+                         in self.config['eval_methods'] ]
+            return outcomes
 
     def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
         with tf.Session() as sess:
@@ -401,43 +410,3 @@ class Network:
                 # save the current state again, so we can continue running backprop based on the "previous" turn.
 
         # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
-        
-
-
-    def eval(self, episode_count, trained_eps = 0, tf_session = None):
-        def do_eval(sess, method, episodes = 1000, trained_eps = 0):
-            start_time = time.time()
-
-            writer.close()
-
-            return outcomes
-
-            # take turn, which finds the best state and picks it, based on the current network
-            # save current state
-            # run training operation (session.run(self.training_op, {x:x, value_next, value_next})),
-            #  (something which does the backprop, based on the state after having taken a turn,
-            # found before, and the state we saved in the beginning and from now we'll
-            #  save it at the end of the turn
-
-            # save the current state again, so we can continue running backprop based on the "previous" turn.
-
-
-        if tf_session == None:
-            with tf.Session():
-                session.run(tf.global_variables_initializer())
-                self.restore_model(session)
-                outcomes = [ (method, do_eval(session,
-                                              method,
-                                              episode_count,
-                                              trained_eps = trained_eps))
-                             for method
-                             in self.config['eval_methods'] ]
-                return outcomes
-        else:
-            outcomes = [ (method, do_eval(tf_session,
-                                          method,
-                                          episode_count,
-                                          trained_eps = trained_eps))
-                         for method
-                         in self.config['eval_methods'] ]
-            return outcomes

From 785ae6a5be445e4511d389d3b4ec780094cad276 Mon Sep 17 00:00:00 2001
From: alex <alexmunchhansen@gmail.com>
Date: Wed, 28 Mar 2018 00:16:50 +0200
Subject: [PATCH 12/17] Fixed wrongful appending of current player to board rep

---
 board.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/board.py b/board.py
index 767ae45..35038ed 100644
--- a/board.py
+++ b/board.py
@@ -42,7 +42,7 @@ class Board:
         negatives = [x if x < 0 else 0 for x in board]
         board.append(15 - sum(positives))
         board.append(-15 - sum(negatives))
-        board += ([1, 0] if np.sign(player) > 0 else [1, 0])
+        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
         return np.array(board).reshape(1,-1)
 
 
@@ -67,7 +67,7 @@ class Board:
             features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
             # Calculate how many pieces there must be in the home state and divide it by 15
             features.append((15 - sum) / 15)
-        features += ([1,0] if np.sign(cur_player) > 0 else [1,0])
+        features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
         test = np.array(features).reshape(1,-1)
         #print("TEST:",test)
         return test

From 95b12a6c35e94e01efffe54fc6d8204960994d51 Mon Sep 17 00:00:00 2001
From: alex <alexmunchhansen@gmail.com>
Date: Wed, 28 Mar 2018 00:33:39 +0200
Subject: [PATCH 13/17] Added another board_rep

---
 board.py   | 6 ++++++
 network.py | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/board.py b/board.py
index 35038ed..1b018d9 100644
--- a/board.py
+++ b/board.py
@@ -35,6 +35,12 @@ class Board:
         board.append(-15 - sum(negatives))
         return tuple(board)
 
+    @staticmethod
+    def board_features_to_slimmed_down_own(board, player):
+        board = list(board)
+        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
+        return np.array(board).reshape(1, -1)
+
     @staticmethod
     def board_features_to_own(board, player):
         board = list(board)
diff --git a/network.py b/network.py
index 6ceefbe..082f322 100644
--- a/network.py
+++ b/network.py
@@ -15,6 +15,9 @@ class Network:
     output_size = 1
     # Can't remember the best learning_rate, look this up
     learning_rate = 0.01
+    # board_features_to_own has size 30
+    # board_features_to_tesauro has size 198
+    # board_features_to_slimmed_down_own has size 28
     board_rep = Board.board_features_to_own
 
     def custom_tanh(self, x, name=None):

From abce56dd4009d946ff9f792f0d4e3987dac4db0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Tue, 27 Mar 2018 23:13:59 +0000
Subject: [PATCH 14/17] fix typo

---
 network.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/network.py b/network.py
index 082f322..f1f1859 100644
--- a/network.py
+++ b/network.py
@@ -295,7 +295,7 @@ class Network:
                 return [0]
             
         if tf_session == None:
-            with tf.Session():
+            with tf.Session() as session:
                 session.run(tf.global_variables_initializer())
                 self.restore_model(session)
                 outcomes = [ (method, do_eval(session,

From fda2c6e08d39eca1e6c618db9bcba9fe5092f15c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Wed, 28 Mar 2018 12:00:47 +0200
Subject: [PATCH 15/17] parametric board representation in network

---
 board.py   | 13 ++++++++-----
 main.py    |  3 ++-
 network.py | 47 ++++++++++++++++++++++++++++-------------------
 3 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/board.py b/board.py
index 1b018d9..33303c1 100644
--- a/board.py
+++ b/board.py
@@ -31,29 +31,32 @@ class Board:
         board = list(board)
         positives = [x if x > 0 else 0 for x in board]
         negatives = [x if x < 0 else 0 for x in board]
-        board.append(15 - sum(positives))
+        board.append( 15 - sum(positives))
         board.append(-15 - sum(negatives))
         return tuple(board)
 
+    # quack
     @staticmethod
-    def board_features_to_slimmed_down_own(board, player):
+    def board_features_quack(board, player):
         board = list(board)
         board += ([1, 0] if np.sign(player) > 0 else [0, 1])
         return np.array(board).reshape(1, -1)
 
+    # quack-fat
     @staticmethod
-    def board_features_to_own(board, player):
+    def board_features_quack_fat(board, player):
         board = list(board)
         positives = [x if x > 0 else 0 for x in board]
         negatives = [x if x < 0 else 0 for x in board]
-        board.append(15 - sum(positives))
+        board.append( 15 - sum(positives))
         board.append(-15 - sum(negatives))
         board += ([1, 0] if np.sign(player) > 0 else [0, 1])
         return np.array(board).reshape(1,-1)
 
 
+    # tesauro
     @staticmethod
-    def board_features_to_tesauro(board, cur_player):
+    def board_features_tesauro(board, cur_player):
         features = []
         for player in [-1,1]:
             sum = 0.0
diff --git a/main.py b/main.py
index b5a8ad0..f6a375e 100644
--- a/main.py
+++ b/main.py
@@ -46,7 +46,8 @@ config = {
     'start_episode': args.start_episode,
     'train_perpetually': args.train_perpetually,
     'model_storage_path': 'models',
-    'bench_storage_path': 'bench'
+    'bench_storage_path': 'bench',
+    'board_representation': 'quack'
 }
 
 # Create models folder
diff --git a/network.py b/network.py
index f1f1859..d19f23c 100644
--- a/network.py
+++ b/network.py
@@ -10,15 +10,15 @@ from eval import Eval
 
 
 class Network:
-    hidden_size = 40
-    input_size = 30
-    output_size = 1
-    # Can't remember the best learning_rate, look this up
-    learning_rate = 0.01
-    # board_features_to_own has size 30
-    # board_features_to_tesauro has size 198
-    # board_features_to_slimmed_down_own has size 28
-    board_rep = Board.board_features_to_own
+    # board_features_quack has size 28
+    # board_features_quack_fat has size 30
+    # board_features_tesauro has size 198
+
+    board_reps = {
+        'quack-fat' : (30, Board.board_features_quack_fat),
+        'quack'     : (28, Board.board_features_quack),
+        'tesauro'   : (198, Board.board_features_tesauro)
+    }
 
     def custom_tanh(self, x, name=None):
         return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
@@ -29,6 +29,15 @@ class Network:
 
         self.name = name
 
+        # Set board representation from config
+        self.input_size, self.board_trans_func = Network.board_reps[
+            self.config['board_representation']
+        ]
+        self.output_size = 1
+        self.hidden_size = 40
+        # Can't remember the best learning_rate, look this up
+        self.learning_rate = 0.01
+        
         # Restore trained episode count for model
         episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
         if os.path.isfile(episode_count_path):
@@ -38,19 +47,19 @@ class Network:
             self.episodes_trained = 0
 
         # input = x
-        self.x = tf.placeholder('float', [1, Network.input_size], name='input')
-        self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
+        self.x = tf.placeholder('float', [1, self.input_size], name='input')
+        self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
 
         xavier_init = tf.contrib.layers.xavier_initializer()
 
-        W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
+        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
                               initializer=xavier_init)
-        W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
+        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
                               initializer=xavier_init)
 
-        b_1 = tf.get_variable("b_1", (Network.hidden_size,),
+        b_1 = tf.get_variable("b_1", (self.hidden_size,),
                               initializer=tf.zeros_initializer)
-        b_2 = tf.get_variable("b_2", (Network.output_size,),
+        b_2 = tf.get_variable("b_2", (self.output_size,),
                               initializer=tf.zeros_initializer)
 
 
@@ -74,7 +83,7 @@ class Network:
         with tf.variable_scope('apply_gradients'):
             for gradient, trainable_var in zip(gradients, trainable_vars):
                 # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
-                backprop_calc = Network.learning_rate * difference_in_values * gradient
+                backprop_calc = self.learning_rate * difference_in_values * gradient
                 grad_apply = trainable_var.assign_add(backprop_calc)
                 apply_gradients.append(grad_apply)
 
@@ -148,7 +157,7 @@ class Network:
     def make_move(self, sess, board, roll, player):
         # print(Board.pretty(board))
         legal_moves = Board.calculate_legal_states(board, player, roll)
-        moves_and_scores = [(move, self.eval_state(sess, Network.board_rep(move, player))) for move in legal_moves]
+        moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
         scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
         best_score_index = np.array(scores).argmax()
         best_move_pair = moves_and_scores[best_score_index]
@@ -367,7 +376,7 @@ class Network:
 
                     # adjust weights
                     sess.run(self.training_op,
-                             feed_dict={self.x: Network.board_rep(prev_board, player),
+                             feed_dict={self.x: self.board_trans_func(prev_board, player),
                                         self.value_next: cur_board_value})
 
                     player *= -1
@@ -386,7 +395,7 @@ class Network:
                 with tf.name_scope("final"):
                     merged = tf.summary.merge_all()
                     summary, _ = sess.run([merged, self.training_op],
-                                          feed_dict={self.x: Network.board_rep(prev_board, player),
+                                          feed_dict={self.x: self.board_trans_func(prev_board, player),
                                                      self.value_next: scaled_final_score.reshape((1, 1))})
                     writer.add_summary(summary, episode + trained_eps)
 

From 17f5b62e9b3fc3e9662f41a8b69ff94ccb506f8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Wed, 28 Mar 2018 14:36:52 +0200
Subject: [PATCH 16/17] proper Tesauro board representation

---
 board.py   |  53 +++++-----
 network.py |   6 +-
 test.py    | 306 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 338 insertions(+), 27 deletions(-)

diff --git a/board.py b/board.py
index 33303c1..2136e47 100644
--- a/board.py
+++ b/board.py
@@ -57,30 +57,33 @@ class Board:
     # tesauro
     @staticmethod
     def board_features_tesauro(board, cur_player):
-        features = []
-        for player in [-1,1]:
-            sum = 0.0
-            for board_range in range(1,25):
-                pin = board[board_range]
-                #print("PIIIN:",pin)
-                feature = [0.0]*4
-                if np.sign(pin) == np.sign(player):
-                    sum += abs(pin)
-                    for i in range(min(abs(pin), 3)):
-                        feature[i] = 1
-                        if (abs(pin) > 3):
-                            feature[3] = (abs(pin)-3)/2
-                features += feature
-            #print("SUUUM:",sum)
-            # Append the amount of men on the bar of the current player divided by 2
-            features.append((board[0] if np.sign(player) < 0 else board[25]) / 2.0)
-            # Calculate how many pieces there must be in the home state and divide it by 15
-            features.append((15 - sum) / 15)
-        features += ([1,0] if np.sign(cur_player) > 0 else [0,1])
-        test = np.array(features).reshape(1,-1)
-        #print("TEST:",test)
-        return test
+        def ordinary_trans(val, player):
+            abs_val = val * player
+            if   abs_val <= 0: return (0,0,0,0)
+            elif abs_val == 1: return (1,0,0,0)
+            elif abs_val == 2: return (1,1,0,0)
+            elif abs_val == 3: return (1,1,1,0)
+            else:              return (1,1,1, (abs_val - 3) / 2)
 
+        def bar_trans(board, player):
+            if    player == 1: return (abs(board[0]/2),)
+            elif player == -1: return (abs(board[25]/2),)
+
+        # def ordinary_trans_board(board, player):
+        #     return np.array(
+        #         [ordinary_trans(x, player) for x in board[1:25]]
+        #     ).flatten()
+
+        board_rep = []
+        for player in [1,-1]:
+            for x in board[1:25]:
+                board_rep += ordinary_trans(x, player)
+            board_rep += bar_trans(board, player)
+            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
+
+        board_rep += ([1,0] if cur_player == 1 else [0,1])
+
+        return np.array(board_rep).reshape(1,198)
 
 
     @staticmethod
@@ -295,9 +298,9 @@ class Board:
         return """
   13  14  15  16  17  18               19  20  21  22  23  24
 +--------------------------------------------------------------------------+
-| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
+| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO|
 |---|---|---|---|---|---|------------|---|---|---|---|---|---|             |
-| {13}| {14}| {15}| {16}| {17}| {18}| bar  1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
+| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end  1: TODO|
 +--------------------------------------------------------------------------+
   12  11  10   9   8   7                6   5   4   3   2   1 
  """.format(*temp)
diff --git a/network.py b/network.py
index d19f23c..2722f6a 100644
--- a/network.py
+++ b/network.py
@@ -365,13 +365,15 @@ class Network:
                 # first thing inside of the while loop and then call
                 # best_move_and_score to get V_t+1
 
+                i = 0
                 while Board.outcome(prev_board) is None:
+                    i += 1
 
                     #print("PREEEV_BOOOOAAARD:",prev_board)
                     cur_board, cur_board_value = self.make_move(sess,
                                                                 prev_board,
                                                                 (random.randrange(1, 7), random.randrange(1, 7)), player)
-                
+
                     #print("The current value:",cur_board_value)
 
                     # adjust weights
@@ -385,7 +387,7 @@ class Network:
                     prev_board = cur_board
 
                 final_board = prev_board
-                sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
+                sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
                 outcomes.append(Board.outcome(final_board)[1])
                 final_score = np.array([Board.outcome(final_board)[1]])
                 scaled_final_score = ((final_score + 2) / 4)
diff --git a/test.py b/test.py
index efc243e..6c9c130 100644
--- a/test.py
+++ b/test.py
@@ -613,6 +613,312 @@ class TestBoardFlip(unittest.TestCase):
                  -2)
 
         self.assertEqual(Board.flip(Board.flip(board)), board)
+
+    def test_tesauro_initial(self):
+        board = Board.initial_state
+
+        expected = (1,1,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,0,0,
+
+                    0.0,
+                    0,
+
+                    1,
+                    0
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, 1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+    def test_tesauro_bars(self):
+        board = list(Board.initial_state)
+        board[1] = 0
+        board[0] = 2
+        board[24] = 0
+        board[25] = -2
+
+        board = tuple(board)
+        
+        expected = (0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1.0,
+                    0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1.0,
+                    0,
+
+                    1,
+                    0
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, 1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+
+    def test_tesauro_home(self):
+        board = list(Board.initial_state)
+
+        board[1] = 0
+        board[24] = 0
+
+        board = tuple(board)
+        
+        expected = (0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    2,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    2,
+
+                    1,
+                    0
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, 1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
+
+    def test_tesauro_black_player(self):
+        board = Board.initial_state
+
+        expected = (1,1,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+                    
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0.0,
+                    0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,1,1,
+
+                    0,0,0,0,
+                    1,1,1,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    1,1,1,1,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    0,0,0,0,
+                    1,1,0,0,
+
+                    0.0,
+                    0,
+
+                    0,
+                    1
+        )
+
+        import numpy as np
+        self.assertTrue((Board.board_features_tesauro(board, -1) ==
+                         np.array(expected).reshape(1, 198)).all())
+
         
 if __name__ == '__main__':
     unittest.main()

From 8764fadd6af82f02e348e912fc9540293d6dd50b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?=
 <christoffer@guava.space>
Date: Wed, 28 Mar 2018 15:32:22 +0200
Subject: [PATCH 17/17] train-evaluate-save

---
 bin/train-evaluate-save | 47 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100755 bin/train-evaluate-save

diff --git a/bin/train-evaluate-save b/bin/train-evaluate-save
new file mode 100755
index 0000000..00b6411
--- /dev/null
+++ b/bin/train-evaluate-save
@@ -0,0 +1,47 @@
+#!/usr/bin/env ruby
+def save(model_name)
+  require 'date'
+
+  models_dir = 'models'
+  model_path = File.join(models_dir, model_name)
+  if not File.exists? model_path then
+    return false
+  end
+
+  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
+
+  puts "Found model #{model_name} with episodes #{episode_count} trained!"
+
+  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
+  save_path = File.join(models_dir, 'saves', file_name)
+  puts "Saving to #{save_path}"
+  
+  system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
+
+  return true
+end
+
+def train(model, episodes)
+  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
+end
+
+def evaluate(model, episodes, method)
+  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
+end
+
+model = ARGV[0]
+
+if model.nil? then raise "no model specified" end
+
+while true do
+  save model
+  train model, 1000
+  save model
+  train model, 1000
+  3.times do
+    evaluate model, 250, "pubeval"
+  end
+  3.times do
+    evaluate model, 250, "dumbeval"
+  end
+end