Merge branch 'rework-1' into 'master'

Rework 1 See merge request Pownie/backgammon!4
2018-03-28 13:32:58 +00:00 · 2018-03-28 13:32:58 +00:00 · 3bcb7c5df9
commit 3bcb7c5df9
parent b7e6dd10af 8764fadd6a
13 changed files with 1104 additions and 301 deletions
--- a/.gitignore
+++ b/.gitignore
@ -169,3 +169,6 @@ venv.bak/
 README.*
 !README.org
 models/
 .DS_Store
 bench/
--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -0,0 +1,47 @@
 #!/usr/bin/env ruby
 def save(model_name)
  require 'date'
  models_dir = 'models'
  model_path = File.join(models_dir, model_name)
  if not File.exists? model_path then
    return false
  end
  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
  puts "Found model #{model_name} with episodes #{episode_count} trained!"
  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
  save_path = File.join(models_dir, 'saves', file_name)
  puts "Saving to #{save_path}"
  system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
  return true
 end
 def train(model, episodes)
  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
 end
 def evaluate(model, episodes, method)
  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
 end
 model = ARGV[0]
 if model.nil? then raise "no model specified" end
 while true do
  save model
  train model, 1000
  save model
  train model, 1000
  3.times do
    evaluate model, 250, "pubeval"
  end
  3.times do
    evaluate model, 250, "dumbeval"
  end
 end
--- a/board.py
+++ b/board.py
@ -35,7 +35,55 @@ class Board:
        board.append(-15 - sum(negatives))
        return tuple(board)
    # quack
    @staticmethod
    def board_features_quack(board, player):
        board = list(board)
        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
        return np.array(board).reshape(1, -1)
    # quack-fat
    @staticmethod
    def board_features_quack_fat(board, player):
        board = list(board)
        positives = [x if x > 0 else 0 for x in board]
        negatives = [x if x < 0 else 0 for x in board]
        board.append( 15 - sum(positives))
        board.append(-15 - sum(negatives))
        board += ([1, 0] if np.sign(player) > 0 else [0, 1])
        return np.array(board).reshape(1,-1)
    # tesauro
    @staticmethod
    def board_features_tesauro(board, cur_player):
        def ordinary_trans(val, player):
            abs_val = val * player
            if   abs_val <= 0: return (0,0,0,0)
            elif abs_val == 1: return (1,0,0,0)
            elif abs_val == 2: return (1,1,0,0)
            elif abs_val == 3: return (1,1,1,0)
            else:              return (1,1,1, (abs_val - 3) / 2)
        def bar_trans(board, player):
            if    player == 1: return (abs(board[0]/2),)
            elif player == -1: return (abs(board[25]/2),)
        # def ordinary_trans_board(board, player):
        #     return np.array(
        #         [ordinary_trans(x, player) for x in board[1:25]]
        #     ).flatten()
        board_rep = []
        for player in [1,-1]:
            for x in board[1:25]:
                board_rep += ordinary_trans(x, player)
            board_rep += bar_trans(board, player)
            board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
        board_rep += ([1,0] if cur_player == 1 else [0,1])
        return np.array(board_rep).reshape(1,198)
    @staticmethod
@ -250,9 +298,9 @@ class Board:
        return """
  13  14  15  16  17  18               19  20  21  22  23  24
 +--------------------------------------------------------------------------+
-| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
+| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO|
 |---|---|---|---|---|---|------------|---|---|---|---|---|---|             |
-| {13}| {14}| {15}| {16}| {17}| {18}| bar  1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end  1: TODO|
+| {12}| {11}| {10}| {9}| {8}| {7}| bar  1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end  1: TODO|
 +--------------------------------------------------------------------------+
  12  11  10   9   8   7                6   5   4   3   2   1 
 """.format(*temp)
--- a/dumbeval/.gitignore
+++ b/dumbeval/.gitignore
@ -0,0 +1 @@
 build/
--- a/dumbeval/dumbeval.c
+++ b/dumbeval/dumbeval.c
@ -0,0 +1,194 @@
 #include <Python.h>
 static PyObject* DumbevalError;
 static float x[122];
 /* With apologies to Gerry Tesauro */
 /* Weights generated by weights.py */
 static const float wc[122] = {
 -1.91222,  1.45979,  0.40657, -1.39159,  3.64558, -0.45381, -0.03157,
  0.14539,  0.80232,  0.87558,  2.36202, -2.01887, -0.88918,  2.65871,
 -1.31587,  1.07476,  0.30491, -1.32892,  0.38018, -0.30714, -1.16178,
  0.71481, -1.01334, -0.44373,  0.51255, -0.17171, -0.88886,  0.02071,
 -0.53279, -0.22139, -1.02436,  0.17948,  0.95697,  0.49272,  0.31848,
 -0.58293,  0.14484,  0.22063,  1.0336 , -1.90554,  1.10291, -2.05589,
 -0.16964, -0.82442,  1.27217, -1.24968, -0.90372,  0.05546,  0.2535 ,
 -0.03533, -0.31773,  0.43704,  0.21699,  0.10519,  2.12775, -0.48196,
 -0.08445, -0.13156, -0.68362,  0.64765,  0.32537,  0.79493,  1.94577,
 -0.63827,  0.97057, -0.46039,  1.51801, -0.62955, -0.43632,  0.25876,
 -0.46623, -0.46963,  1.3532 , -0.07362, -1.53211,  0.69676, -0.92407,
  0.07153,  0.67173,  0.27661, -0.51579, -0.49019,  1.06603, -0.97673,
 -1.21231, -1.54966, -0.07795,  0.32697,  0.02873,  1.38703,  0.41725,
  0.78326, -0.7257 ,  0.54165,  1.38882,  0.27304,  1.0739 ,  0.74654,
  1.35561,  1.18697,  1.09146,  0.17552, -0.30773,  0.27812, -1.674  ,
 -0.31073, -0.40745,  0.51546, -1.10875,  2.0081 , -1.27931, -1.16321,
  0.95652,  0.7487 , -0.2347 ,  0.20324, -0.41417,  0.05929,  0.72632,
 -1.15223,  1.2745 , -0.15947 };
 static const float wr[122] = {
 0.13119, -0.13164, -1.2736 ,  1.06352, -1.34749, -1.03086, -0.27417,
 -0.27762,  0.79454, -1.12623,  2.1134 , -0.7003 ,  0.26056, -1.13518,
 -1.64548, -1.30828, -0.96589, -0.36258, -1.14323, -0.2006 , -1.00307,
  0.57739, -0.62693,  0.29721, -0.36996, -0.17462,  0.96704,  0.08902,
  1.4337 , -0.47107,  0.82156,  0.14988,  1.74034,  1.13313, -0.32083,
 -0.00048, -0.86622,  1.12808,  0.99875,  0.8049 , -0.16841, -0.42677,
 -1.9409 , -0.53565, -0.83708,  0.69603,  0.32079,  0.56942,  0.67965,
  1.49328, -1.65885,  0.96284,  0.63196, -0.27504,  0.39174,  0.71225,
 -0.3614 ,  0.88761,  1.12882,  0.77764,  1.02618, -0.20245, -0.39245,
 -1.56799,  1.04888, -1.20858, -0.24361, -1.85157, -0.16912,  0.50512,
 -2.93122,  0.70477, -0.93066,  1.74867,  0.23963, -0.00699, -1.27183,
 -0.30604,  1.71039,  0.82202, -1.36734, -1.08352, -1.25054,  0.49436,
 -1.5037 , -0.73143,  0.74189,  0.32365,  0.30539, -0.72169,  0.41088,
 -1.56632, -0.63526,  0.58779, -0.05653,  0.76713, -1.40898, -0.33683,
  1.86802,  0.59773,  1.28668, -0.65817,  2.46829, -0.09331,  2.9034 ,
  1.04809,  0.73222, -0.44372,  0.53044, -1.9274 , -1.57183, -1.14068,
  1.26036, -0.9296 ,  0.06662, -0.26572, -0.30862,  0.72915,  0.98977,
  0.63513, -1.43917, -0.12523 };
 void setx(int pos[])
 {
        /* sets input vector x[] given board position pos[] */
        extern float x[];
        int j, jm1, n;
        /* initialize */
        for(j=0;j<122;++j) x[j] = 0.0;
        /* first encode board locations 24-1 */
        for(j=1;j<=24;++j) {
            jm1 = j - 1;
            n = pos[25-j];
            if(n!=0) {
                if(n==-1) x[5*jm1+0] = 1.0;
                if(n==1) x[5*jm1+1] = 1.0;
                if(n>=2) x[5*jm1+2] = 1.0;
                if(n==3) x[5*jm1+3] = 1.0;
                if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0;
            }
        }
        /* encode opponent barmen */
        x[120] = -(float)(pos[0])/2.0;
        /* encode computer's menoff */
        x[121] = (float)(pos[26])/15.0;
 }
 float dumbeval(int race, int pos[])
 {
        /* Backgammon move-selection evaluation function
           for benchmark comparisons.  Computes a linear
           evaluation function:  Score = W * X, where X is
           an input vector encoding the board state (using
           a raw encoding of the number of men at each location),
           and W is a weight vector.  Separate weight vectors
           are used for racing positions and contact positions.
           Makes lots of obvious mistakes, but provides a
           decent level of play for benchmarking purposes. */
        /* Provided as a public service to the backgammon
           programming community by Gerry Tesauro, IBM Research.
           (e-mail: tesauro@watson.ibm.com)                     */
        /* The following inputs are needed for this routine:
           race   is an integer variable which should be set
           based on the INITIAL position BEFORE the move.
           Set race=1 if the position is a race (i.e. no contact)
           and 0 if the position is a contact position.
           pos[]  is an integer array of dimension 28 which
           should represent a legal final board state after
           the move. Elements 1-24 correspond to board locations
           1-24 from computer's point of view, i.e. computer's
           men move in the negative direction from 24 to 1, and
           opponent's men move in the positive direction from
           1 to 24. Computer's men are represented by positive
           integers, and opponent's men are represented by negative
           integers. Element 25 represents computer's men on the
           bar (positive integer), and element 0 represents opponent's
           men on the bar (negative integer). Element 26 represents
           computer's men off the board (positive integer), and
           element 27 represents opponent's men off the board
           (negative integer).                                  */
        /* Also, be sure to call rdwts() at the start of your
           program to read in the weight values. Happy hacking] */
        int i;
        float score;
        if(pos[26]==15) return(99999999.);
        /* all men off, best possible move */
        setx(pos); /* sets input array x[] */
        score = 0.0;
        if(race) {  /* use race weights */
            for(i=0;i<122;++i) score += wr[i]*x[i];
        }
        else {  /* use contact weights */
            for(i=0;i<122;++i) score += wc[i]*x[i];
        }
        return(score);
 }
 static PyObject*
 dumbeval_eval(PyObject *self, PyObject *args) {
  int race;
  long numValues;
  int board[28];
  float eval_score;
  PyObject* tuple_obj;
  PyObject* val_obj;
  if (! PyArg_ParseTuple(args, "pO!", &race, &PyTuple_Type, &tuple_obj))
    return NULL;
  numValues = PyTuple_Size(tuple_obj);
  if (numValues < 0) return NULL;
  if (numValues != 28) {
    PyErr_SetString(DumbevalError, "Tuple must have 28 entries");
    return NULL;
  }
  // Iterate over tuple to retreive positions
  for (int i=0; i<numValues; i++) {
    val_obj = PyTuple_GetItem(tuple_obj, i);
    board[i] = PyLong_AsLong(val_obj);
  }
  eval_score = dumbeval(race, board);
  return Py_BuildValue("f", eval_score);
 }
 static PyMethodDef dumbeval_methods[] = {
  {
    "eval", dumbeval_eval, METH_VARARGS,
    "Returns evaluation results for the given board position."
  },
  {NULL, NULL, 0, NULL}
 };
 static struct PyModuleDef dumbeval_definition = {
  PyModuleDef_HEAD_INIT,
  "dumbeval",
  "A Python module that implements Gerald Tesauro's pubeval function for evaluation backgammon positions with badly initialized weights.",
  -1,
  dumbeval_methods
 };
 PyMODINIT_FUNC PyInit_dumbeval(void) {
  PyObject* module;
  module = PyModule_Create(&dumbeval_definition);
  if (module == NULL)
    return NULL;
  DumbevalError = PyErr_NewException("dumbeval.error", NULL, NULL);
  Py_INCREF(DumbevalError);
  PyModule_AddObject(module, "error", DumbevalError);
  return module;
 }
--- a/dumbeval/setup.py
+++ b/dumbeval/setup.py
@ -0,0 +1,9 @@
 from distutils.core import setup, Extension
 dumbeval = Extension('dumbeval',
                    sources = ['dumbeval.c'])
 setup (name = 'dumbeval',
       version = '0.1',
       description = 'Dumbeval for Python',
       ext_modules = [dumbeval])
--- a/dumbeval/weights.py
+++ b/dumbeval/weights.py
@ -0,0 +1,14 @@
 #!/usr/bin/env python3
 import numpy as np
 import re
 re.DOTALL = True
 np.set_printoptions(precision=5, suppress=True, threshold=np.nan)
 def random_array_string():
    return re.sub(r'^\[(.*)\]$(?s)', r'{\n\1 };', np.array2string(np.random.normal(0,1,122), separator=', '))
 print("/* Weights generated by weights.py */")
 print("static const float wc[122] =", random_array_string())
 print()
 print("static const float wr[122] =", random_array_string())
--- a/eval.py
+++ b/eval.py
@ -2,6 +2,7 @@ from board import Board
 import numpy as np
 import pubeval
 import dumbeval
 class Eval:
@ -24,4 +25,16 @@ class Eval:
        return best_move_pair
    @staticmethod
    def make_dumbeval_move(board, sym, roll):
        legal_moves = Board.calculate_legal_states(board, sym, roll)
        moves_and_scores = [ ( board,
                               dumbeval.eval(False, Board.board_features_to_pubeval(board, sym)))
                             for board
                             in legal_moves ]
        scores = [ x[1] for x in moves_and_scores ]
        best_move_pair = moves_and_scores[np.array(scores).argmax()]
        return best_move_pair
--- a/game.py
+++ b/game.py
@ -23,18 +23,21 @@ class Game:
    def roll(self):
        return self.cup.roll()
-
+    '''
    def best_move_and_score(self):
        roll = self.roll()
        move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
        self.board = move_and_val[0]
        return move_and_val
    '''
    '''
    def next_round(self):
        roll = self.roll()
        #print(roll)
        self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0])
        return self.board
    '''
    def board_state(self):
        return self.board
--- a/main.py
+++ b/main.py
@ -3,38 +3,6 @@ import sys
 import os
 import time
 model_storage_path = 'models'
 # Create models folder
 if not os.path.exists(model_storage_path):
    os.makedirs(model_storage_path)
 # Define helper functions
 def log_train_outcome(outcome, trained_eps = 0):
    format_vars = { 'trained_eps': trained_eps,
                    'count': len(train_outcome),
                    'sum': sum(train_outcome),
                    'mean': sum(train_outcome) / len(train_outcome),
                    'time': int(time.time())
    }
    with open(os.path.join(config['model_path'], 'logs', "train.log"), 'a+') as f:
        f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
 def log_eval_outcomes(outcomes, trained_eps = 0):
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
                        'mean': sum(scores) / len(scores),
                        'time': int(time.time())
        }
        with open(os.path.join(config['model_path'], 'logs', "eval.log"), 'a+') as f:
            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Backgammon games")
 parser.add_argument('--episodes', action='store', dest='episode_count',
@ -47,13 +15,15 @@ parser.add_argument('--eval-methods', action='store',
                    default=['random'], nargs='*',
                    help='specifies evaluation methods')
 parser.add_argument('--eval', action='store_true',
-                    help='whether to evaluate the neural network with a random choice bot')
+                    help='evaluate the neural network with a random choice bot')
 parser.add_argument('--bench-eval-scores', action='store_true',
                    help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
 parser.add_argument('--train', action='store_true',
-                    help='whether to train the neural network')
+                    help='train the neural network')
 parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
-                    help='whether to evaluate after each training session')
+                    help='evaluate after each training session')
 parser.add_argument('--play', action='store_true',
-                    help='whether to play with the neural network')
+                    help='play with the neural network')
 parser.add_argument('--start-episode', action='store', dest='start_episode',
                    type=int, default=0,
                    help='episode count to start at; purely for display purposes')
@ -66,27 +36,74 @@ args = parser.parse_args()
 config = {
    'model': args.model,
    'model_path': os.path.join(model_storage_path, args.model),
    'episode_count': args.episode_count,
    'eval_methods': args.eval_methods,
    'train': args.train,
    'play': args.play,
    'eval': args.eval,
    'bench_eval_scores': args.bench_eval_scores,
    'eval_after_train': args.eval_after_train,
    'start_episode': args.start_episode,
    'train_perpetually': args.train_perpetually,
-    'model_storage_path': model_storage_path
+    'model_storage_path': 'models',
    'bench_storage_path': 'bench',
    'board_representation': 'quack'
 }
 # Create models folder
 if not os.path.exists(config['model_storage_path']):
    os.makedirs(config['model_storage_path'])
 model_path = lambda: os.path.join(config['model_storage_path'], config['model'])
 # Make sure directories exist
-model_path = os.path.join(config['model_path'])
+log_path = os.path.join(model_path(), 'logs')
-log_path   = os.path.join(model_path, 'logs')
+if not os.path.isdir(model_path()):
-if not os.path.isdir(model_path):
+    os.mkdir(model_path())
    os.mkdir(model_path)
 if not os.path.isdir(log_path):
    os.mkdir(log_path)
 # Define helper functions
 def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
    format_vars = { 'trained_eps': trained_eps,
                    'count': len(train_outcome),
                    'sum': sum(train_outcome),
                    'mean': sum(train_outcome) / len(train_outcome),
                    'time': int(time.time())
    }
    with open(log_path, 'a+') as f:
        f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
 def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
                        'mean': sum(scores) / len(scores),
                        'time': int(time.time())
        }
        with open(log_path, 'a+') as f:
            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
 def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
                        'mean': sum(scores) / len(scores),
                        'time': time,
                        'index': index,
        }
        with open(log_path, 'a+') as f:
            f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
 # Do actions specified by command-line
 if args.list_models:
    def get_eps_trained(folder):
@ -94,7 +111,7 @@ if args.list_models:
            return int(f.read())
    model_folders = [ f.path
                      for f
-                      in os.scandir(model_storage_path)
+                      in os.scandir(config['model_storage_path'])
                      if f.is_dir() ]
    models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
    sys.stderr.write("Found {} model(s)\n".format(len(models)))
@ -103,28 +120,77 @@ if args.list_models:
    exit()
 if __name__ == "__main__":
    # Set up network
    from network import Network
 network = Network(config, config['model'])
 eps = config['start_episode']
    # Set up variables
    episode_count = config['episode_count']
    if args.train:
        network = Network(config, config['model'])
        start_episode = network.episodes_trained
        while True:
-        train_outcome = network.train_model(episodes = episode_count, trained_eps = eps)
+            train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)
-        eps += episode_count
+            start_episode += episode_count
-        log_train_outcome(train_outcome, trained_eps = eps)
+            log_train_outcome(train_outcome, trained_eps = start_episode)
            if config['eval_after_train']:
-            eval_outcomes = network.eval(trained_eps = eps)
+                eval_outcomes = network.eval(trained_eps = start_episode)
-            log_eval_outcomes(eval_outcomes, trained_eps = eps)
+                log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
            if not config['train_perpetually']:
                break
    elif args.eval:
-    eps = config['start_episode']
+        network = Network(config, config['model'])
-    outcomes = network.eval()
+        start_episode = network.episodes_trained
-    log_eval_outcomes(outcomes, trained_eps = eps)
+        # Evaluation measures are described in `config`
        outcomes = network.eval(config['episode_count'])
        log_eval_outcomes(outcomes, trained_eps = start_episode)
        # elif args.play:
        # g.play(episodes = episode_count)
    elif args.bench_eval_scores:
        # Make sure benchmark directory exists
        if not os.path.isdir(config['bench_storage_path']):
            os.mkdir(config['bench_storage_path'])
        config = config.copy()
        config['model'] = 'bench'
        network = Network(config, config['model'])
        start_episode = network.episodes_trained
        if start_episode == 0:
            print("Model not trained! Beware of using non-existing models!")
            exit()
        sample_count = 20
        episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
                          10000, 20000]
        def do_eval(sess):
            for eval_method in config['eval_methods']:
                result_path = os.path.join(config['bench_storage_path'],
                                           eval_method) + "-{}.log".format(int(time.time()))
                for n in episode_counts:
                    for i in range(sample_count):
                        start_time = time.time()
                        # Evaluation measure to be benchmarked are described in `config`
                        outcomes = network.eval(episode_count = n,
                                                tf_session = sess)
                        time_diff = time.time() - start_time
                        log_bench_eval_outcomes(outcomes,
                                                time = time_diff,
                                                index = i,
                                                trained_eps = start_episode,
                                                log_path = result_path)
        # CMM: oh no
        import tensorflow as tf
        with tf.Session() as session:
            network.restore_model(session)
            do_eval(session)
--- a/network.py
+++ b/network.py
@ -8,51 +8,72 @@ import sys
 import random
 from eval import Eval
 class Network:
    hidden_size = 40
    input_size = 26
    output_size = 1
    # Can't remember the best learning_rate, look this up
    learning_rate = 0.1
-    # TODO: Actually compile tensorflow properly
+class Network:
-    #os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
+    # board_features_quack has size 28
    # board_features_quack_fat has size 30
    # board_features_tesauro has size 198
    board_reps = {
        'quack-fat' : (30, Board.board_features_quack_fat),
        'quack'     : (28, Board.board_features_quack),
        'tesauro'   : (198, Board.board_features_tesauro)
    }
    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
    def __init__(self, config, name):
        self.config = config
-        self.session = tf.Session()
+        self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
-        self.checkpoint_path = config['model_path']
+
        self.name = name
        # Set board representation from config
        self.input_size, self.board_trans_func = Network.board_reps[
            self.config['board_representation']
        ]
        self.output_size = 1
        self.hidden_size = 40
        # Can't remember the best learning_rate, look this up
        self.learning_rate = 0.01
        # Restore trained episode count for model
        episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
        if os.path.isfile(episode_count_path):
            with open(episode_count_path, 'r') as f:
                self.episodes_trained = int(f.read())
        else:
            self.episodes_trained = 0
        # input = x
-        self.x = tf.placeholder('float', [1, Network.input_size], name='x')
+        self.x = tf.placeholder('float', [1, self.input_size], name='input')
-        self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next")
+        self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
        xavier_init = tf.contrib.layers.xavier_initializer()
-        W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size),
+        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
                              initializer=xavier_init)
-        W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size),
+        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
                              initializer=xavier_init)
-        b_1 = tf.get_variable("b_1", (Network.hidden_size,),
+        b_1 = tf.get_variable("b_1", (self.hidden_size,),
                              initializer=tf.zeros_initializer)
-        b_2 = tf.get_variable("b_2", (Network.output_size,),
+        b_2 = tf.get_variable("b_2", (self.output_size,),
                              initializer=tf.zeros_initializer)
        value_after_input = self.custom_tanh(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
-        self.value = self.custom_tanh(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
+        value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
        # tf.reduce_sum basically finds the sum of its input, so this gives the
        # difference between the two values, in case they should be lists, which
        # they might be if our input changes
        # TODO: Alexander thinks that self.value will be computed twice (instead of once)
-        difference_in_values = tf.reduce_sum(self.value_next - self.value, name='difference')
+        difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
        trainable_vars = tf.trainable_variables()
        gradients = tf.gradients(self.value, trainable_vars)
@ -62,18 +83,15 @@ class Network:
        with tf.variable_scope('apply_gradients'):
            for gradient, trainable_var in zip(gradients, trainable_vars):
                # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
-                backprop_calc = Network.learning_rate * difference_in_values * gradient
+                backprop_calc = self.learning_rate * difference_in_values * gradient
                grad_apply = trainable_var.assign_add(backprop_calc)
                apply_gradients.append(grad_apply)
            self.training_op = tf.group(*apply_gradients, name='training_op')
        self.saver = tf.train.Saver(max_to_keep=1)
        self.session.run(tf.global_variables_initializer())
-        self.restore_model()
+    def eval_state(self, sess, state):
    def eval_state(self, state):
        # Run state through a network
        # Remember to create placeholders for everything because wtf tensorflow
@ -105,27 +123,26 @@ class Network:
        # implement learning_rate * (difference_in_values) * gradients (the
        # before-mentioned calculation.
        # print("Network is evaluating")
        val = self.session.run(self.value, feed_dict={self.x: state})
        # print("eval ({})".format(self.name), state, val, sep="\n")
        return val
-    def save_model(self, episode_count):
+        return sess.run(self.value, feed_dict={self.x: state})
-        self.saver.save(self.session, os.path.join(self.checkpoint_path, 'model.ckpt'))
+
    def save_model(self, sess, episode_count):
        self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'))
        with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
            f.write(str(episode_count) + "\n")
-    def restore_model(self):
+    def restore_model(self, sess):
        if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
            latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
            print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
                  str(latest_checkpoint))
-            self.saver.restore(self.session, latest_checkpoint)
+            self.saver.restore(sess, latest_checkpoint)
            variables_names = [v.name for v in tf.trainable_variables()]
-            values = self.session.run(variables_names)
+            values = sess.run(variables_names)
            for k, v in zip(variables_names, values):
                print("Variable: ", k)
                print("Shape: ", v.shape)
@ -137,34 +154,18 @@ class Network:
                with open(episode_count_path, 'r') as f:
                    self.config['start_episode'] = int(f.read())
-    # Have a circular dependency, #fuck, need to rewrite something
+    def make_move(self, sess, board, roll, player):
    def adjust_weights(self, board, v_next):
 #        print("lol")
        board = np.array(board).reshape((1,26))
        self.session.run(self.training_op, feed_dict = { self.x: board,
                                                         self.value_next: v_next })
            # while game isn't done:
                #x_next = g.next_move()
                #value_next = network.eval_state(x_next)
                #self.session.run(self.training_op, feed_dict={self.x: x, self.value_next: value_next})
                #x = x_next
    def make_move(self, board, roll):
        # print(Board.pretty(board))
-        legal_moves = Board.calculate_legal_states(board, 1, roll)
+        legal_moves = Board.calculate_legal_states(board, player, roll)
-        moves_and_scores = [ (move, self.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
+        moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
-        scores = [ x[1] for x in moves_and_scores ]
+        scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
        best_score_index = np.array(scores).argmax()
        best_move_pair = moves_and_scores[best_score_index]
        # print("Found the best state, being:", np.array(move_scores).argmax())
        return best_move_pair
-                
+    def eval(self, episode_count, trained_eps = 0, tf_session = None):
-    def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
+        def do_eval(sess, method, episodes = 1000, trained_eps = 0):
            start_time = time.time()
            def print_time_estimate(eps_completed):
@ -173,99 +174,23 @@ class Network:
                eps_per_sec = eps_completed / time_diff
                secs_per_ep = time_diff / eps_completed
                eps_remaining = (episodes - eps_completed)
-            sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
+                sys.stderr.write(
-            sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
+                    "[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
                sys.stderr.write(
                    "[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
                        eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
-        
+            sys.stderr.write(
-        sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
+                "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
        outcomes = []
        for episode in range(1, episodes + 1):
            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
            # TODO decide which player should be here
            player = 1
            roll = (random.randrange(1,7), random.randrange(1,7))
            prev_board, _ = self.make_move(Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
            if player == -1:
                prev_board = Board.flip(prev_board)
            # find the best move here, make this move, then change turn as the
            # first thing inside of the while loop and then call
            # best_move_and_score to get V_t+1
            # i = 0
            while Board.outcome(prev_board) is None:
                # print("-"*30)
                # print(i)
                # print(roll)
                # print(Board.pretty(prev_board))
                # print("/"*30)
                # i += 1
                player *= -1
                roll = (random.randrange(1,7), random.randrange(1,7))
                cur_board, cur_board_value = self.make_move(Board.flip(prev_board) if player == -1 else prev_board, roll)
                if player == -1:
                    cur_board  = Board.flip(cur_board)
                self.adjust_weights(prev_board, cur_board_value)
                prev_board = cur_board
            final_board = prev_board
            sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1]))
            outcomes.append(Board.outcome(final_board)[1])
            final_score = np.array([ Board.outcome(final_board)[1] ])
            self.adjust_weights(prev_board, final_score.reshape((1, 1)))
            sys.stderr.write("\n")
            if episode % min(save_step_size, episodes) == 0:
                sys.stderr.write("[TRAIN] Saving model...\n")
                self.save_model(episode+trained_eps)
            if episode % 50 == 0:
                print_time_estimate(episode)
        sys.stderr.write("[TRAIN] Saving model for final episode...\n")
        self.save_model(episode+trained_eps)
        return outcomes
                # take turn, which finds the best state and picks it, based on the current network
                # save current state
                # run training operation (session.run(self.training_op, {x:x, value_next, value_next})), (something which does the backprop, based on the state after having taken a turn, found before, and the state we saved in the beginning and from now we'll save it at the end of the turn
                # save the current state again, so we can continue running backprop based on the "previous" turn.
        # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
    def eval(self, trained_eps = 0):
        def do_eval(method, episodes = 1000, trained_eps = 0):
            start_time = time.time()
            def print_time_estimate(eps_completed):
                cur_time      = time.time()
                time_diff     = cur_time - start_time
                eps_per_sec   = eps_completed / time_diff
                secs_per_ep   = time_diff / eps_completed
                eps_remaining = (episodes - eps_completed)
                sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
                sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
            sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
            if method == 'random':
                outcomes = []
-                for i in range(1, episodes + 1):
+                """for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-                        board = (self.p1.make_move(board, self.p1.get_sym(), roll))[0]
+                        board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0]
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -273,11 +198,12 @@ class Network:
                    sys.stderr.write("\n")
                    if i % 50 == 0:
-                        print_time_estimate(i)
+                        print_time_estimate(i)"""
                return outcomes
            elif method == 'pubeval':
                outcomes = []
-                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
+                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
                #  which can be used to get the best move according to pubeval
                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
@ -287,19 +213,18 @@ class Network:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        # print(roll)
-                        prev_board = tuple(board)
+                        # prev_board = tuple(board)
-                        board = (self.make_move(board, roll))[0]
+                        board = (self.make_move(sess, board, roll, 1))[0]
                        # print("post p1:", board, sep="\n")
                        # print("."*30)
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        # print(roll)
-                        prev_board = tuple(board)
+                        # prev_board = tuple(board)
                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
                        # print("post pubeval:", board, sep="\n")
                    # print("*"*30)
                    # print(board)
                    # print("+"*30)
@ -311,34 +236,191 @@ class Network:
                        print_time_estimate(i)
                return outcomes
            # elif method == 'dumbmodel':
            #     config_prime = self.config.copy()
            #     config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
            #     eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
            #     #print(self.config, "\n", config_prime)
            #     outcomes = []
            #     for i in range(1, episodes + 1):
            #         sys.stderr.write("[EVAL ] Episode {}".format(i))
            #         board = Board.initial_state
            #         while Board.outcome(board) is None:
            #             roll = (random.randrange(1,7), random.randrange(1,7))
            #             board = (self.make_move(board, self.p1.get_sym(), roll))[0]
-            #             roll = (random.randrange(1,7), random.randrange(1,7))
+            elif method == 'dumbeval':
-            #             board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
+                outcomes = []
-            #         sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
+                # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
-            #         outcomes.append(Board.outcome(board)[1])
+                #  which can be used to get the best move according to pubeval
-            #         sys.stderr.write("\n")
+                for i in range(1, episodes + 1):
                    sys.stderr.write("[EVAL ] Episode {}".format(i))
                    board = Board.initial_state
                    # print("init:", board, sep="\n")
                    while Board.outcome(board) is None:
                        # print("-"*30)
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        # print(roll)
-            #         if i % 50 == 0:
+                        # prev_board = tuple(board)
-            #             print_time_estimate(i)
+                        board = (self.make_move(sess, board, roll, 1))[0]
-            #     return outcomes
+                        # print("post p1:", board, sep="\n")
                        # print("."*30)
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
                        # print(roll)
                        # prev_board = tuple(board)
                        board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
                        # print("post pubeval:", board, sep="\n")
                    # print("*"*30)
                    # print(board)
                    # print("+"*30)
                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                    outcomes.append(Board.outcome(board)[1])
                    sys.stderr.write("\n")
                    if i % 10 == 0:
                        print_time_estimate(i)
                return outcomes
            elif method == 'dumbmodel':
                outcomes = []
                """
                config_prime = self.config.copy()
                config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
                eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
                #print(self.config, "\n", config_prime)
                outcomes = []
                for i in range(1, episodes + 1):
                sys.stderr.write("[EVAL ] Episode {}".format(i))
                board = Board.initial_state
                while Board.outcome(board) is None:
                roll = (random.randrange(1,7), random.randrange(1,7))
                board = (self.make_move(board, self.p1.get_sym(), roll))[0]
                roll = (random.randrange(1,7), random.randrange(1,7))
                board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
                sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
                outcomes.append(Board.outcome(board)[1])
                sys.stderr.write("\n")
                if i % 50 == 0:
                print_time_estimate(i)
                """
                return outcomes
            else:
                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                return [0]
-        return [ (method, do_eval(method,
+        if tf_session == None:
-                                  self.config['episode_count'],
+            with tf.Session() as session:
                session.run(tf.global_variables_initializer())
                self.restore_model(session)
                outcomes = [ (method, do_eval(session,
                                              method,
                                              episode_count,
                                              trained_eps = trained_eps))
                             for method
                             in self.config['eval_methods'] ]
                return outcomes
        else:
            outcomes = [ (method, do_eval(tf_session,
                                          method,
                                          episode_count,
                                          trained_eps = trained_eps))
                         for method
                         in self.config['eval_methods'] ]
            return outcomes
    def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
        with tf.Session() as sess:
            writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
            sess.run(tf.global_variables_initializer())
            self.restore_model(sess)
            variables_names = [v.name for v in tf.trainable_variables()]
            values = sess.run(variables_names)
            for k, v in zip(variables_names, values):
                print("Variable: ", k)
                print("Shape: ", v.shape)
                print(v)
            start_time = time.time()
            def print_time_estimate(eps_completed):
                cur_time = time.time()
                time_diff = cur_time - start_time
                eps_per_sec = eps_completed / time_diff
                secs_per_ep = time_diff / eps_completed
                eps_remaining = (episodes - eps_completed)
                sys.stderr.write(
                    "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
                sys.stderr.write(
                    "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
                        eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
            sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
            outcomes = []
            for episode in range(1, episodes + 1):
                sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
                # TODO decide which player should be here
                player = 1
                prev_board = Board.initial_state
                # find the best move here, make this move, then change turn as the
                # first thing inside of the while loop and then call
                # best_move_and_score to get V_t+1
                i = 0
                while Board.outcome(prev_board) is None:
                    i += 1
                    #print("PREEEV_BOOOOAAARD:",prev_board)
                    cur_board, cur_board_value = self.make_move(sess,
                                                                prev_board,
                                                                (random.randrange(1, 7), random.randrange(1, 7)), player)
                    #print("The current value:",cur_board_value)
                    # adjust weights
                    sess.run(self.training_op,
                             feed_dict={self.x: self.board_trans_func(prev_board, player),
                                        self.value_next: cur_board_value})
                    player *= -1
                    prev_board = cur_board
                final_board = prev_board
                sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
                outcomes.append(Board.outcome(final_board)[1])
                final_score = np.array([Board.outcome(final_board)[1]])
                scaled_final_score = ((final_score + 2) / 4)
                #print("The difference in values:", scaled_final_score - cur_board_value)
                # print("scaled_final_score",scaled_final_score)
                with tf.name_scope("final"):
                    merged = tf.summary.merge_all()
                    summary, _ = sess.run([merged, self.training_op],
                                          feed_dict={self.x: self.board_trans_func(prev_board, player),
                                                     self.value_next: scaled_final_score.reshape((1, 1))})
                    writer.add_summary(summary, episode + trained_eps)
                sys.stderr.write("\n")
                if episode % min(save_step_size, episodes) == 0:
                    sys.stderr.write("[TRAIN] Saving model...\n")
                    self.save_model(sess, episode + trained_eps)
                if episode % 50 == 0:
                    print_time_estimate(episode)
            sys.stderr.write("[TRAIN] Saving model for final episode...\n")
            self.save_model(sess, episode+trained_eps)
            writer.close()
            return outcomes
                # take turn, which finds the best state and picks it, based on the current network
                # save current state
                # run training operation (session.run(self.training_op, {x:x, value_next, value_next})), (something which does the backprop, based on the state after having taken a turn, found before, and the state we saved in the beginning and from now we'll save it at the end of the turn
                # save the current state again, so we can continue running backprop based on the "previous" turn.
        # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
--- a/plot.py
+++ b/plot.py
@ -9,9 +9,26 @@ import matplotlib.dates as mdates
 train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean']
 eval_headers  = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean']
 bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean']
 model_path = 'models'
 def plot_bench(data_path):
    df = pd.read_csv(data_path, sep=";",
                     names=bench_headers, index_col=[0,1,2])
    for method_label in df.index.levels[0]:
        df_prime = df[['mean']].loc[method_label].unstack().T
        plot = df_prime.plot.box()
        plot.set_title("Evaluation variance, {}".format(method_label))
        plot.set_xlabel("Sample count")
        plot.set_ylabel("Mean score")
        plt.show(plot.figure)
        # for later use:
        variances = df_prime.var()
        print(variances)
        del df_prime, plot, variances
 def dataframes(model_name):
    def df_timestamp_to_datetime(df):
@ -44,7 +61,7 @@ if __name__ == '__main__':
    plt.show()
    while True:
-        df = dataframes('default')['eval']
+        df = dataframes('a')['eval']
        print(df)
--- a/test.py
+++ b/test.py
@ -614,5 +614,311 @@ class TestBoardFlip(unittest.TestCase):
        self.assertEqual(Board.flip(Board.flip(board)), board)
    def test_tesauro_initial(self):
        board = Board.initial_state
        expected = (1,1,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,0,0,
                    0.0,
                    0,
                    1,
                    0
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_tesauro_bars(self):
        board = list(Board.initial_state)
        board[1] = 0
        board[0] = 2
        board[24] = 0
        board[25] = -2
        board = tuple(board)
        expected = (0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1.0,
                    0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1.0,
                    0,
                    1,
                    0
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_tesauro_home(self):
        board = list(Board.initial_state)
        board[1] = 0
        board[24] = 0
        board = tuple(board)
        expected = (0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    2,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    2,
                    1,
                    0
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_tesauro_black_player(self):
        board = Board.initial_state
        expected = (1,1,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0.0,
                    0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    1,1,1,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,1,1,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    0,0,0,0,
                    1,1,0,0,
                    0.0,
                    0,
                    0,
                    1
        )
        import numpy as np
        self.assertTrue((Board.board_features_tesauro(board, -1) ==
                         np.array(expected).reshape(1, 198)).all())
 if __name__ == '__main__':
    unittest.main()