Merge branch 'rework-1' into 'master'

Rework 1

See merge request Pownie/backgammon!4
This commit is contained in:
Christoffer Müller Madsen 2018-03-28 13:32:58 +00:00
commit 3bcb7c5df9
13 changed files with 1104 additions and 301 deletions

3
.gitignore vendored
View File

@ -169,3 +169,6 @@ venv.bak/
README.* README.*
!README.org !README.org
models/ models/
.DS_Store
bench/

47
bin/train-evaluate-save Executable file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env ruby
def save(model_name)
require 'date'
models_dir = 'models'
model_path = File.join(models_dir, model_name)
if not File.exists? model_path then
return false
end
episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
puts "Found model #{model_name} with episodes #{episode_count} trained!"
file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
save_path = File.join(models_dir, 'saves', file_name)
puts "Saving to #{save_path}"
system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
return true
end
def train(model, episodes)
system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
end
def evaluate(model, episodes, method)
system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
end
model = ARGV[0]
if model.nil? then raise "no model specified" end
while true do
save model
train model, 1000
save model
train model, 1000
3.times do
evaluate model, 250, "pubeval"
end
3.times do
evaluate model, 250, "dumbeval"
end
end

View File

@ -31,11 +31,59 @@ class Board:
board = list(board) board = list(board)
positives = [x if x > 0 else 0 for x in board] positives = [x if x > 0 else 0 for x in board]
negatives = [x if x < 0 else 0 for x in board] negatives = [x if x < 0 else 0 for x in board]
board.append(15 - sum(positives)) board.append( 15 - sum(positives))
board.append(-15 - sum(negatives)) board.append(-15 - sum(negatives))
return tuple(board) return tuple(board)
# quack
@staticmethod
def board_features_quack(board, player):
board = list(board)
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
return np.array(board).reshape(1, -1)
# quack-fat
@staticmethod
def board_features_quack_fat(board, player):
board = list(board)
positives = [x if x > 0 else 0 for x in board]
negatives = [x if x < 0 else 0 for x in board]
board.append( 15 - sum(positives))
board.append(-15 - sum(negatives))
board += ([1, 0] if np.sign(player) > 0 else [0, 1])
return np.array(board).reshape(1,-1)
# tesauro
@staticmethod
def board_features_tesauro(board, cur_player):
def ordinary_trans(val, player):
abs_val = val * player
if abs_val <= 0: return (0,0,0,0)
elif abs_val == 1: return (1,0,0,0)
elif abs_val == 2: return (1,1,0,0)
elif abs_val == 3: return (1,1,1,0)
else: return (1,1,1, (abs_val - 3) / 2)
def bar_trans(board, player):
if player == 1: return (abs(board[0]/2),)
elif player == -1: return (abs(board[25]/2),)
# def ordinary_trans_board(board, player):
# return np.array(
# [ordinary_trans(x, player) for x in board[1:25]]
# ).flatten()
board_rep = []
for player in [1,-1]:
for x in board[1:25]:
board_rep += ordinary_trans(x, player)
board_rep += bar_trans(board, player)
board_rep += (15 - Board.num_of_checkers_for_player(board, player),)
board_rep += ([1,0] if cur_player == 1 else [0,1])
return np.array(board_rep).reshape(1,198)
@staticmethod @staticmethod
@ -250,9 +298,9 @@ class Board:
return """ return """
13 14 15 16 17 18 19 20 21 22 23 24 13 14 15 16 17 18 19 20 21 22 23 24
+--------------------------------------------------------------------------+ +--------------------------------------------------------------------------+
| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO| | {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO|
|---|---|---|---|---|---|------------|---|---|---|---|---|---| | |---|---|---|---|---|---|------------|---|---|---|---|---|---| |
| {13}| {14}| {15}| {16}| {17}| {18}| bar 1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end 1: TODO| | {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end 1: TODO|
+--------------------------------------------------------------------------+ +--------------------------------------------------------------------------+
12 11 10 9 8 7 6 5 4 3 2 1 12 11 10 9 8 7 6 5 4 3 2 1
""".format(*temp) """.format(*temp)

1
dumbeval/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
build/

194
dumbeval/dumbeval.c Normal file
View File

@ -0,0 +1,194 @@
#include <Python.h>
static PyObject* DumbevalError;
static float x[122];
/* With apologies to Gerry Tesauro */
/* Weights generated by weights.py */
static const float wc[122] = {
-1.91222, 1.45979, 0.40657, -1.39159, 3.64558, -0.45381, -0.03157,
0.14539, 0.80232, 0.87558, 2.36202, -2.01887, -0.88918, 2.65871,
-1.31587, 1.07476, 0.30491, -1.32892, 0.38018, -0.30714, -1.16178,
0.71481, -1.01334, -0.44373, 0.51255, -0.17171, -0.88886, 0.02071,
-0.53279, -0.22139, -1.02436, 0.17948, 0.95697, 0.49272, 0.31848,
-0.58293, 0.14484, 0.22063, 1.0336 , -1.90554, 1.10291, -2.05589,
-0.16964, -0.82442, 1.27217, -1.24968, -0.90372, 0.05546, 0.2535 ,
-0.03533, -0.31773, 0.43704, 0.21699, 0.10519, 2.12775, -0.48196,
-0.08445, -0.13156, -0.68362, 0.64765, 0.32537, 0.79493, 1.94577,
-0.63827, 0.97057, -0.46039, 1.51801, -0.62955, -0.43632, 0.25876,
-0.46623, -0.46963, 1.3532 , -0.07362, -1.53211, 0.69676, -0.92407,
0.07153, 0.67173, 0.27661, -0.51579, -0.49019, 1.06603, -0.97673,
-1.21231, -1.54966, -0.07795, 0.32697, 0.02873, 1.38703, 0.41725,
0.78326, -0.7257 , 0.54165, 1.38882, 0.27304, 1.0739 , 0.74654,
1.35561, 1.18697, 1.09146, 0.17552, -0.30773, 0.27812, -1.674 ,
-0.31073, -0.40745, 0.51546, -1.10875, 2.0081 , -1.27931, -1.16321,
0.95652, 0.7487 , -0.2347 , 0.20324, -0.41417, 0.05929, 0.72632,
-1.15223, 1.2745 , -0.15947 };
static const float wr[122] = {
0.13119, -0.13164, -1.2736 , 1.06352, -1.34749, -1.03086, -0.27417,
-0.27762, 0.79454, -1.12623, 2.1134 , -0.7003 , 0.26056, -1.13518,
-1.64548, -1.30828, -0.96589, -0.36258, -1.14323, -0.2006 , -1.00307,
0.57739, -0.62693, 0.29721, -0.36996, -0.17462, 0.96704, 0.08902,
1.4337 , -0.47107, 0.82156, 0.14988, 1.74034, 1.13313, -0.32083,
-0.00048, -0.86622, 1.12808, 0.99875, 0.8049 , -0.16841, -0.42677,
-1.9409 , -0.53565, -0.83708, 0.69603, 0.32079, 0.56942, 0.67965,
1.49328, -1.65885, 0.96284, 0.63196, -0.27504, 0.39174, 0.71225,
-0.3614 , 0.88761, 1.12882, 0.77764, 1.02618, -0.20245, -0.39245,
-1.56799, 1.04888, -1.20858, -0.24361, -1.85157, -0.16912, 0.50512,
-2.93122, 0.70477, -0.93066, 1.74867, 0.23963, -0.00699, -1.27183,
-0.30604, 1.71039, 0.82202, -1.36734, -1.08352, -1.25054, 0.49436,
-1.5037 , -0.73143, 0.74189, 0.32365, 0.30539, -0.72169, 0.41088,
-1.56632, -0.63526, 0.58779, -0.05653, 0.76713, -1.40898, -0.33683,
1.86802, 0.59773, 1.28668, -0.65817, 2.46829, -0.09331, 2.9034 ,
1.04809, 0.73222, -0.44372, 0.53044, -1.9274 , -1.57183, -1.14068,
1.26036, -0.9296 , 0.06662, -0.26572, -0.30862, 0.72915, 0.98977,
0.63513, -1.43917, -0.12523 };
void setx(int pos[])
{
/* sets input vector x[] given board position pos[] */
extern float x[];
int j, jm1, n;
/* initialize */
for(j=0;j<122;++j) x[j] = 0.0;
/* first encode board locations 24-1 */
for(j=1;j<=24;++j) {
jm1 = j - 1;
n = pos[25-j];
if(n!=0) {
if(n==-1) x[5*jm1+0] = 1.0;
if(n==1) x[5*jm1+1] = 1.0;
if(n>=2) x[5*jm1+2] = 1.0;
if(n==3) x[5*jm1+3] = 1.0;
if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0;
}
}
/* encode opponent barmen */
x[120] = -(float)(pos[0])/2.0;
/* encode computer's menoff */
x[121] = (float)(pos[26])/15.0;
}
float dumbeval(int race, int pos[])
{
/* Backgammon move-selection evaluation function
for benchmark comparisons. Computes a linear
evaluation function: Score = W * X, where X is
an input vector encoding the board state (using
a raw encoding of the number of men at each location),
and W is a weight vector. Separate weight vectors
are used for racing positions and contact positions.
Makes lots of obvious mistakes, but provides a
decent level of play for benchmarking purposes. */
/* Provided as a public service to the backgammon
programming community by Gerry Tesauro, IBM Research.
(e-mail: tesauro@watson.ibm.com) */
/* The following inputs are needed for this routine:
race is an integer variable which should be set
based on the INITIAL position BEFORE the move.
Set race=1 if the position is a race (i.e. no contact)
and 0 if the position is a contact position.
pos[] is an integer array of dimension 28 which
should represent a legal final board state after
the move. Elements 1-24 correspond to board locations
1-24 from computer's point of view, i.e. computer's
men move in the negative direction from 24 to 1, and
opponent's men move in the positive direction from
1 to 24. Computer's men are represented by positive
integers, and opponent's men are represented by negative
integers. Element 25 represents computer's men on the
bar (positive integer), and element 0 represents opponent's
men on the bar (negative integer). Element 26 represents
computer's men off the board (positive integer), and
element 27 represents opponent's men off the board
(negative integer). */
/* Also, be sure to call rdwts() at the start of your
program to read in the weight values. Happy hacking] */
int i;
float score;
if(pos[26]==15) return(99999999.);
/* all men off, best possible move */
setx(pos); /* sets input array x[] */
score = 0.0;
if(race) { /* use race weights */
for(i=0;i<122;++i) score += wr[i]*x[i];
}
else { /* use contact weights */
for(i=0;i<122;++i) score += wc[i]*x[i];
}
return(score);
}
static PyObject*
dumbeval_eval(PyObject *self, PyObject *args) {
int race;
long numValues;
int board[28];
float eval_score;
PyObject* tuple_obj;
PyObject* val_obj;
if (! PyArg_ParseTuple(args, "pO!", &race, &PyTuple_Type, &tuple_obj))
return NULL;
numValues = PyTuple_Size(tuple_obj);
if (numValues < 0) return NULL;
if (numValues != 28) {
PyErr_SetString(DumbevalError, "Tuple must have 28 entries");
return NULL;
}
// Iterate over tuple to retreive positions
for (int i=0; i<numValues; i++) {
val_obj = PyTuple_GetItem(tuple_obj, i);
board[i] = PyLong_AsLong(val_obj);
}
eval_score = dumbeval(race, board);
return Py_BuildValue("f", eval_score);
}
static PyMethodDef dumbeval_methods[] = {
{
"eval", dumbeval_eval, METH_VARARGS,
"Returns evaluation results for the given board position."
},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef dumbeval_definition = {
PyModuleDef_HEAD_INIT,
"dumbeval",
"A Python module that implements Gerald Tesauro's pubeval function for evaluation backgammon positions with badly initialized weights.",
-1,
dumbeval_methods
};
PyMODINIT_FUNC PyInit_dumbeval(void) {
PyObject* module;
module = PyModule_Create(&dumbeval_definition);
if (module == NULL)
return NULL;
DumbevalError = PyErr_NewException("dumbeval.error", NULL, NULL);
Py_INCREF(DumbevalError);
PyModule_AddObject(module, "error", DumbevalError);
return module;
}

9
dumbeval/setup.py Normal file
View File

@ -0,0 +1,9 @@
from distutils.core import setup, Extension
dumbeval = Extension('dumbeval',
sources = ['dumbeval.c'])
setup (name = 'dumbeval',
version = '0.1',
description = 'Dumbeval for Python',
ext_modules = [dumbeval])

14
dumbeval/weights.py Normal file
View File

@ -0,0 +1,14 @@
#!/usr/bin/env python3
import numpy as np
import re
re.DOTALL = True
np.set_printoptions(precision=5, suppress=True, threshold=np.nan)
def random_array_string():
return re.sub(r'^\[(.*)\]$(?s)', r'{\n\1 };', np.array2string(np.random.normal(0,1,122), separator=', '))
print("/* Weights generated by weights.py */")
print("static const float wc[122] =", random_array_string())
print()
print("static const float wr[122] =", random_array_string())

13
eval.py
View File

@ -2,6 +2,7 @@ from board import Board
import numpy as np import numpy as np
import pubeval import pubeval
import dumbeval
class Eval: class Eval:
@ -24,4 +25,16 @@ class Eval:
return best_move_pair return best_move_pair
@staticmethod
def make_dumbeval_move(board, sym, roll):
legal_moves = Board.calculate_legal_states(board, sym, roll)
moves_and_scores = [ ( board,
dumbeval.eval(False, Board.board_features_to_pubeval(board, sym)))
for board
in legal_moves ]
scores = [ x[1] for x in moves_and_scores ]
best_move_pair = moves_and_scores[np.array(scores).argmax()]
return best_move_pair

View File

@ -23,18 +23,21 @@ class Game:
def roll(self): def roll(self):
return self.cup.roll() return self.cup.roll()
'''
def best_move_and_score(self): def best_move_and_score(self):
roll = self.roll() roll = self.roll()
move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll) move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
self.board = move_and_val[0] self.board = move_and_val[0]
return move_and_val return move_and_val
'''
'''
def next_round(self): def next_round(self):
roll = self.roll() roll = self.roll()
#print(roll) #print(roll)
self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0]) self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)[0])
return self.board return self.board
'''
def board_state(self): def board_state(self):
return self.board return self.board

188
main.py
View File

@ -3,38 +3,6 @@ import sys
import os import os
import time import time
model_storage_path = 'models'
# Create models folder
if not os.path.exists(model_storage_path):
os.makedirs(model_storage_path)
# Define helper functions
def log_train_outcome(outcome, trained_eps = 0):
format_vars = { 'trained_eps': trained_eps,
'count': len(train_outcome),
'sum': sum(train_outcome),
'mean': sum(train_outcome) / len(train_outcome),
'time': int(time.time())
}
with open(os.path.join(config['model_path'], 'logs', "train.log"), 'a+') as f:
f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
def log_eval_outcomes(outcomes, trained_eps = 0):
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'trained_eps': trained_eps,
'method': outcome[0],
'count': len(scores),
'sum': sum(scores),
'mean': sum(scores) / len(scores),
'time': int(time.time())
}
with open(os.path.join(config['model_path'], 'logs', "eval.log"), 'a+') as f:
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
# Parse command line arguments # Parse command line arguments
parser = argparse.ArgumentParser(description="Backgammon games") parser = argparse.ArgumentParser(description="Backgammon games")
parser.add_argument('--episodes', action='store', dest='episode_count', parser.add_argument('--episodes', action='store', dest='episode_count',
@ -47,13 +15,15 @@ parser.add_argument('--eval-methods', action='store',
default=['random'], nargs='*', default=['random'], nargs='*',
help='specifies evaluation methods') help='specifies evaluation methods')
parser.add_argument('--eval', action='store_true', parser.add_argument('--eval', action='store_true',
help='whether to evaluate the neural network with a random choice bot') help='evaluate the neural network with a random choice bot')
parser.add_argument('--bench-eval-scores', action='store_true',
help='benchmark scores of evaluation measures. episode counts and model specified as options are ignored.')
parser.add_argument('--train', action='store_true', parser.add_argument('--train', action='store_true',
help='whether to train the neural network') help='train the neural network')
parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train', parser.add_argument('--eval-after-train', action='store_true', dest='eval_after_train',
help='whether to evaluate after each training session') help='evaluate after each training session')
parser.add_argument('--play', action='store_true', parser.add_argument('--play', action='store_true',
help='whether to play with the neural network') help='play with the neural network')
parser.add_argument('--start-episode', action='store', dest='start_episode', parser.add_argument('--start-episode', action='store', dest='start_episode',
type=int, default=0, type=int, default=0,
help='episode count to start at; purely for display purposes') help='episode count to start at; purely for display purposes')
@ -66,27 +36,74 @@ args = parser.parse_args()
config = { config = {
'model': args.model, 'model': args.model,
'model_path': os.path.join(model_storage_path, args.model),
'episode_count': args.episode_count, 'episode_count': args.episode_count,
'eval_methods': args.eval_methods, 'eval_methods': args.eval_methods,
'train': args.train, 'train': args.train,
'play': args.play, 'play': args.play,
'eval': args.eval, 'eval': args.eval,
'bench_eval_scores': args.bench_eval_scores,
'eval_after_train': args.eval_after_train, 'eval_after_train': args.eval_after_train,
'start_episode': args.start_episode, 'start_episode': args.start_episode,
'train_perpetually': args.train_perpetually, 'train_perpetually': args.train_perpetually,
'model_storage_path': model_storage_path 'model_storage_path': 'models',
'bench_storage_path': 'bench',
'board_representation': 'quack'
} }
# Create models folder
if not os.path.exists(config['model_storage_path']):
os.makedirs(config['model_storage_path'])
model_path = lambda: os.path.join(config['model_storage_path'], config['model'])
# Make sure directories exist # Make sure directories exist
model_path = os.path.join(config['model_path']) log_path = os.path.join(model_path(), 'logs')
log_path = os.path.join(model_path, 'logs') if not os.path.isdir(model_path()):
if not os.path.isdir(model_path): os.mkdir(model_path())
os.mkdir(model_path)
if not os.path.isdir(log_path): if not os.path.isdir(log_path):
os.mkdir(log_path) os.mkdir(log_path)
# Define helper functions
def log_train_outcome(outcome, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
format_vars = { 'trained_eps': trained_eps,
'count': len(train_outcome),
'sum': sum(train_outcome),
'mean': sum(train_outcome) / len(train_outcome),
'time': int(time.time())
}
with open(log_path, 'a+') as f:
f.write("{time};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'trained_eps': trained_eps,
'method': outcome[0],
'count': len(scores),
'sum': sum(scores),
'mean': sum(scores) / len(scores),
'time': int(time.time())
}
with open(log_path, 'a+') as f:
f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
for outcome in outcomes:
scores = outcome[1]
format_vars = { 'trained_eps': trained_eps,
'method': outcome[0],
'count': len(scores),
'sum': sum(scores),
'mean': sum(scores) / len(scores),
'time': time,
'index': index,
}
with open(log_path, 'a+') as f:
f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
# Do actions specified by command-line # Do actions specified by command-line
if args.list_models: if args.list_models:
def get_eps_trained(folder): def get_eps_trained(folder):
@ -94,7 +111,7 @@ if args.list_models:
return int(f.read()) return int(f.read())
model_folders = [ f.path model_folders = [ f.path
for f for f
in os.scandir(model_storage_path) in os.scandir(config['model_storage_path'])
if f.is_dir() ] if f.is_dir() ]
models = [ (folder, get_eps_trained(folder)) for folder in model_folders ] models = [ (folder, get_eps_trained(folder)) for folder in model_folders ]
sys.stderr.write("Found {} model(s)\n".format(len(models))) sys.stderr.write("Found {} model(s)\n".format(len(models)))
@ -103,28 +120,77 @@ if args.list_models:
exit() exit()
# Set up network if __name__ == "__main__":
from network import Network # Set up network
network = Network(config, config['model']) from network import Network
eps = config['start_episode']
# Set up variables # Set up variables
episode_count = config['episode_count'] episode_count = config['episode_count']
if args.train: if args.train:
network = Network(config, config['model'])
start_episode = network.episodes_trained
while True: while True:
train_outcome = network.train_model(episodes = episode_count, trained_eps = eps) train_outcome = network.train_model(episodes = episode_count, trained_eps = start_episode)
eps += episode_count start_episode += episode_count
log_train_outcome(train_outcome, trained_eps = eps) log_train_outcome(train_outcome, trained_eps = start_episode)
if config['eval_after_train']: if config['eval_after_train']:
eval_outcomes = network.eval(trained_eps = eps) eval_outcomes = network.eval(trained_eps = start_episode)
log_eval_outcomes(eval_outcomes, trained_eps = eps) log_eval_outcomes(eval_outcomes, trained_eps = start_episode)
if not config['train_perpetually']: if not config['train_perpetually']:
break break
elif args.eval:
eps = config['start_episode']
outcomes = network.eval() elif args.eval:
log_eval_outcomes(outcomes, trained_eps = eps) network = Network(config, config['model'])
#elif args.play: start_episode = network.episodes_trained
# g.play(episodes = episode_count) # Evaluation measures are described in `config`
outcomes = network.eval(config['episode_count'])
log_eval_outcomes(outcomes, trained_eps = start_episode)
# elif args.play:
# g.play(episodes = episode_count)
elif args.bench_eval_scores:
# Make sure benchmark directory exists
if not os.path.isdir(config['bench_storage_path']):
os.mkdir(config['bench_storage_path'])
config = config.copy()
config['model'] = 'bench'
network = Network(config, config['model'])
start_episode = network.episodes_trained
if start_episode == 0:
print("Model not trained! Beware of using non-existing models!")
exit()
sample_count = 20
episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
10000, 20000]
def do_eval(sess):
for eval_method in config['eval_methods']:
result_path = os.path.join(config['bench_storage_path'],
eval_method) + "-{}.log".format(int(time.time()))
for n in episode_counts:
for i in range(sample_count):
start_time = time.time()
# Evaluation measure to be benchmarked are described in `config`
outcomes = network.eval(episode_count = n,
tf_session = sess)
time_diff = time.time() - start_time
log_bench_eval_outcomes(outcomes,
time = time_diff,
index = i,
trained_eps = start_episode,
log_path = result_path)
# CMM: oh no
import tensorflow as tf
with tf.Session() as session:
network.restore_model(session)
do_eval(session)

View File

@ -8,51 +8,72 @@ import sys
import random import random
from eval import Eval from eval import Eval
class Network:
hidden_size = 40
input_size = 26
output_size = 1
# Can't remember the best learning_rate, look this up
learning_rate = 0.1
# TODO: Actually compile tensorflow properly class Network:
#os.environ["TF_CPP_MIN_LOG_LEVEL"]="2" # board_features_quack has size 28
# board_features_quack_fat has size 30
# board_features_tesauro has size 198
board_reps = {
'quack-fat' : (30, Board.board_features_quack_fat),
'quack' : (28, Board.board_features_quack),
'tesauro' : (198, Board.board_features_tesauro)
}
def custom_tanh(self, x, name=None): def custom_tanh(self, x, name=None):
return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name)) return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))
def __init__(self, config, name): def __init__(self, config, name):
self.config = config self.config = config
self.session = tf.Session() self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
self.checkpoint_path = config['model_path']
self.name = name self.name = name
# Set board representation from config
self.input_size, self.board_trans_func = Network.board_reps[
self.config['board_representation']
]
self.output_size = 1
self.hidden_size = 40
# Can't remember the best learning_rate, look this up
self.learning_rate = 0.01
# Restore trained episode count for model
episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
if os.path.isfile(episode_count_path):
with open(episode_count_path, 'r') as f:
self.episodes_trained = int(f.read())
else:
self.episodes_trained = 0
# input = x # input = x
self.x = tf.placeholder('float', [1, Network.input_size], name='x') self.x = tf.placeholder('float', [1, self.input_size], name='input')
self.value_next = tf.placeholder('float', [1, Network.output_size], name="value_next") self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
xavier_init = tf.contrib.layers.xavier_initializer() xavier_init = tf.contrib.layers.xavier_initializer()
W_1 = tf.get_variable("w_1", (Network.input_size, Network.hidden_size), W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
initializer=xavier_init) initializer=xavier_init)
W_2 = tf.get_variable("w_2", (Network.hidden_size, Network.output_size), W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
initializer=xavier_init) initializer=xavier_init)
b_1 = tf.get_variable("b_1", (Network.hidden_size,), b_1 = tf.get_variable("b_1", (self.hidden_size,),
initializer=tf.zeros_initializer) initializer=tf.zeros_initializer)
b_2 = tf.get_variable("b_2", (Network.output_size,), b_2 = tf.get_variable("b_2", (self.output_size,),
initializer=tf.zeros_initializer) initializer=tf.zeros_initializer)
value_after_input = self.custom_tanh(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
self.value = self.custom_tanh(tf.matmul(value_after_input, W_2) + b_2, name='output_layer') value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
# tf.reduce_sum basically finds the sum of its input, so this gives the # tf.reduce_sum basically finds the sum of its input, so this gives the
# difference between the two values, in case they should be lists, which # difference between the two values, in case they should be lists, which
# they might be if our input changes # they might be if our input changes
# TODO: Alexander thinks that self.value will be computed twice (instead of once) # TODO: Alexander thinks that self.value will be computed twice (instead of once)
difference_in_values = tf.reduce_sum(self.value_next - self.value, name='difference') difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
trainable_vars = tf.trainable_variables() trainable_vars = tf.trainable_variables()
gradients = tf.gradients(self.value, trainable_vars) gradients = tf.gradients(self.value, trainable_vars)
@ -62,18 +83,15 @@ class Network:
with tf.variable_scope('apply_gradients'): with tf.variable_scope('apply_gradients'):
for gradient, trainable_var in zip(gradients, trainable_vars): for gradient, trainable_var in zip(gradients, trainable_vars):
# Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t. # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
backprop_calc = Network.learning_rate * difference_in_values * gradient backprop_calc = self.learning_rate * difference_in_values * gradient
grad_apply = trainable_var.assign_add(backprop_calc) grad_apply = trainable_var.assign_add(backprop_calc)
apply_gradients.append(grad_apply) apply_gradients.append(grad_apply)
self.training_op = tf.group(*apply_gradients, name='training_op') self.training_op = tf.group(*apply_gradients, name='training_op')
self.saver = tf.train.Saver(max_to_keep=1) self.saver = tf.train.Saver(max_to_keep=1)
self.session.run(tf.global_variables_initializer())
self.restore_model() def eval_state(self, sess, state):
def eval_state(self, state):
# Run state through a network # Run state through a network
# Remember to create placeholders for everything because wtf tensorflow # Remember to create placeholders for everything because wtf tensorflow
@ -105,27 +123,26 @@ class Network:
# implement learning_rate * (difference_in_values) * gradients (the # implement learning_rate * (difference_in_values) * gradients (the
# before-mentioned calculation. # before-mentioned calculation.
# print("Network is evaluating") # print("Network is evaluating")
val = self.session.run(self.value, feed_dict={self.x: state}) # print("eval ({})".format(self.name), state, val, sep="\n")
#print("eval ({})".format(self.name), state, val, sep="\n")
return val
def save_model(self, episode_count): return sess.run(self.value, feed_dict={self.x: state})
self.saver.save(self.session, os.path.join(self.checkpoint_path, 'model.ckpt'))
def save_model(self, sess, episode_count):
self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'))
with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f: with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
print("[NETWK] ({name}) Saving model to:".format(name = self.name), print("[NETWK] ({name}) Saving model to:".format(name=self.name),
os.path.join(self.checkpoint_path, 'model.ckpt')) os.path.join(self.checkpoint_path, 'model.ckpt'))
f.write(str(episode_count) + "\n") f.write(str(episode_count) + "\n")
def restore_model(self): def restore_model(self, sess):
if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')): if os.path.isfile(os.path.join(self.checkpoint_path, 'model.ckpt.index')):
latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path) latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
print("[NETWK] ({name}) Restoring model from:".format(name = self.name), print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
str(latest_checkpoint)) str(latest_checkpoint))
self.saver.restore(self.session, latest_checkpoint) self.saver.restore(sess, latest_checkpoint)
variables_names = [v.name for v in tf.trainable_variables()] variables_names = [v.name for v in tf.trainable_variables()]
values = self.session.run(variables_names) values = sess.run(variables_names)
for k, v in zip(variables_names, values): for k, v in zip(variables_names, values):
print("Variable: ", k) print("Variable: ", k)
print("Shape: ", v.shape) print("Shape: ", v.shape)
@ -137,34 +154,18 @@ class Network:
with open(episode_count_path, 'r') as f: with open(episode_count_path, 'r') as f:
self.config['start_episode'] = int(f.read()) self.config['start_episode'] = int(f.read())
# Have a circular dependency, #fuck, need to rewrite something def make_move(self, sess, board, roll, player):
def adjust_weights(self, board, v_next):
# print("lol")
board = np.array(board).reshape((1,26))
self.session.run(self.training_op, feed_dict = { self.x: board,
self.value_next: v_next })
# while game isn't done:
#x_next = g.next_move()
#value_next = network.eval_state(x_next)
#self.session.run(self.training_op, feed_dict={self.x: x, self.value_next: value_next})
#x = x_next
def make_move(self, board, roll):
# print(Board.pretty(board)) # print(Board.pretty(board))
legal_moves = Board.calculate_legal_states(board, 1, roll) legal_moves = Board.calculate_legal_states(board, player, roll)
moves_and_scores = [ (move, self.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ] moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
scores = [ x[1] for x in moves_and_scores ] scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
best_score_index = np.array(scores).argmax() best_score_index = np.array(scores).argmax()
best_move_pair = moves_and_scores[best_score_index] best_move_pair = moves_and_scores[best_score_index]
#print("Found the best state, being:", np.array(move_scores).argmax()) # print("Found the best state, being:", np.array(move_scores).argmax())
return best_move_pair return best_move_pair
def eval(self, episode_count, trained_eps = 0, tf_session = None):
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0): def do_eval(sess, method, episodes = 1000, trained_eps = 0):
start_time = time.time() start_time = time.time()
def print_time_estimate(eps_completed): def print_time_estimate(eps_completed):
@ -173,63 +174,246 @@ class Network:
eps_per_sec = eps_completed / time_diff eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed secs_per_ep = time_diff / eps_completed
eps_remaining = (episodes - eps_completed) eps_remaining = (episodes - eps_completed)
sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) sys.stderr.write(
sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) "[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
sys.stderr.write(
"[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
sys.stderr.write(
"[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
if method == 'random':
outcomes = []
"""for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i))
board = Board.initial_state
while Board.outcome(board) is None:
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0]
roll = (random.randrange(1, 7), random.randrange(1, 7))
board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
outcomes.append(Board.outcome(board)[1])
sys.stderr.write("\n")
if i % 50 == 0:
print_time_estimate(i)"""
return outcomes
elif method == 'pubeval':
outcomes = []
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
# which can be used to get the best move according to pubeval
for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i))
board = Board.initial_state
# print("init:", board, sep="\n")
while Board.outcome(board) is None:
# print("-"*30)
roll = (random.randrange(1, 7), random.randrange(1, 7))
# print(roll)
# prev_board = tuple(board)
board = (self.make_move(sess, board, roll, 1))[0]
# print("post p1:", board, sep="\n")
# print("."*30)
roll = (random.randrange(1, 7), random.randrange(1, 7))
# print(roll)
# prev_board = tuple(board)
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
# print("post pubeval:", board, sep="\n")
# print("*"*30)
# print(board)
# print("+"*30)
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
outcomes.append(Board.outcome(board)[1])
sys.stderr.write("\n")
if i % 10 == 0:
print_time_estimate(i)
return outcomes
elif method == 'dumbeval':
outcomes = []
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll),
# which can be used to get the best move according to pubeval
for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i))
board = Board.initial_state
# print("init:", board, sep="\n")
while Board.outcome(board) is None:
# print("-"*30)
roll = (random.randrange(1, 7), random.randrange(1, 7))
# print(roll)
# prev_board = tuple(board)
board = (self.make_move(sess, board, roll, 1))[0]
# print("post p1:", board, sep="\n")
# print("."*30)
roll = (random.randrange(1, 7), random.randrange(1, 7))
# print(roll)
# prev_board = tuple(board)
board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]
# print("post pubeval:", board, sep="\n")
# print("*"*30)
# print(board)
# print("+"*30)
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
outcomes.append(Board.outcome(board)[1])
sys.stderr.write("\n")
if i % 10 == 0:
print_time_estimate(i)
return outcomes
elif method == 'dumbmodel':
outcomes = []
"""
config_prime = self.config.copy()
config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
#print(self.config, "\n", config_prime)
outcomes = []
for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i))
board = Board.initial_state
while Board.outcome(board) is None:
roll = (random.randrange(1,7), random.randrange(1,7))
board = (self.make_move(board, self.p1.get_sym(), roll))[0]
roll = (random.randrange(1,7), random.randrange(1,7))
board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
outcomes.append(Board.outcome(board)[1])
sys.stderr.write("\n")
if i % 50 == 0:
print_time_estimate(i)
"""
return outcomes
else:
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
return [0]
if tf_session == None:
with tf.Session() as session:
session.run(tf.global_variables_initializer())
self.restore_model(session)
outcomes = [ (method, do_eval(session,
method,
episode_count,
trained_eps = trained_eps))
for method
in self.config['eval_methods'] ]
return outcomes
else:
outcomes = [ (method, do_eval(tf_session,
method,
episode_count,
trained_eps = trained_eps))
for method
in self.config['eval_methods'] ]
return outcomes
def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
with tf.Session() as sess:
writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
sess.run(tf.global_variables_initializer())
self.restore_model(sess)
variables_names = [v.name for v in tf.trainable_variables()]
values = sess.run(variables_names)
for k, v in zip(variables_names, values):
print("Variable: ", k)
print("Shape: ", v.shape)
print(v)
start_time = time.time()
def print_time_estimate(eps_completed):
cur_time = time.time()
time_diff = cur_time - start_time
eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed
eps_remaining = (episodes - eps_completed)
sys.stderr.write(
"[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
sys.stderr.write(
"[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
outcomes = [] outcomes = []
for episode in range(1, episodes + 1): for episode in range(1, episodes + 1):
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
# TODO decide which player should be here # TODO decide which player should be here
player = 1 player = 1
roll = (random.randrange(1,7), random.randrange(1,7)) prev_board = Board.initial_state
prev_board, _ = self.make_move(Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll)
if player == -1:
prev_board = Board.flip(prev_board)
# find the best move here, make this move, then change turn as the # find the best move here, make this move, then change turn as the
# first thing inside of the while loop and then call # first thing inside of the while loop and then call
# best_move_and_score to get V_t+1 # best_move_and_score to get V_t+1
# i = 0 i = 0
while Board.outcome(prev_board) is None: while Board.outcome(prev_board) is None:
# print("-"*30) i += 1
# print(i)
# print(roll) #print("PREEEV_BOOOOAAARD:",prev_board)
# print(Board.pretty(prev_board)) cur_board, cur_board_value = self.make_move(sess,
# print("/"*30) prev_board,
# i += 1 (random.randrange(1, 7), random.randrange(1, 7)), player)
#print("The current value:",cur_board_value)
# adjust weights
sess.run(self.training_op,
feed_dict={self.x: self.board_trans_func(prev_board, player),
self.value_next: cur_board_value})
player *= -1 player *= -1
roll = (random.randrange(1,7), random.randrange(1,7))
cur_board, cur_board_value = self.make_move(Board.flip(prev_board) if player == -1 else prev_board, roll)
if player == -1:
cur_board = Board.flip(cur_board)
self.adjust_weights(prev_board, cur_board_value)
prev_board = cur_board prev_board = cur_board
final_board = prev_board final_board = prev_board
sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1])) sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
outcomes.append(Board.outcome(final_board)[1]) outcomes.append(Board.outcome(final_board)[1])
final_score = np.array([ Board.outcome(final_board)[1] ]) final_score = np.array([Board.outcome(final_board)[1]])
self.adjust_weights(prev_board, final_score.reshape((1, 1))) scaled_final_score = ((final_score + 2) / 4)
#print("The difference in values:", scaled_final_score - cur_board_value)
# print("scaled_final_score",scaled_final_score)
with tf.name_scope("final"):
merged = tf.summary.merge_all()
summary, _ = sess.run([merged, self.training_op],
feed_dict={self.x: self.board_trans_func(prev_board, player),
self.value_next: scaled_final_score.reshape((1, 1))})
writer.add_summary(summary, episode + trained_eps)
sys.stderr.write("\n") sys.stderr.write("\n")
if episode % min(save_step_size, episodes) == 0: if episode % min(save_step_size, episodes) == 0:
sys.stderr.write("[TRAIN] Saving model...\n") sys.stderr.write("[TRAIN] Saving model...\n")
self.save_model(episode+trained_eps) self.save_model(sess, episode + trained_eps)
if episode % 50 == 0: if episode % 50 == 0:
print_time_estimate(episode) print_time_estimate(episode)
sys.stderr.write("[TRAIN] Saving model for final episode...\n") sys.stderr.write("[TRAIN] Saving model for final episode...\n")
self.save_model(episode+trained_eps) self.save_model(sess, episode+trained_eps)
writer.close()
return outcomes return outcomes
@ -240,105 +424,3 @@ class Network:
# save the current state again, so we can continue running backprop based on the "previous" turn. # save the current state again, so we can continue running backprop based on the "previous" turn.
# NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it! # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it!
def eval(self, trained_eps = 0):
def do_eval(method, episodes = 1000, trained_eps = 0):
start_time = time.time()
def print_time_estimate(eps_completed):
cur_time = time.time()
time_diff = cur_time - start_time
eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed
eps_remaining = (episodes - eps_completed)
sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
if method == 'random':
outcomes = []
for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i))
board = Board.initial_state
while Board.outcome(board) is None:
roll = (random.randrange(1,7), random.randrange(1,7))
board = (self.p1.make_move(board, self.p1.get_sym(), roll))[0]
roll = (random.randrange(1,7), random.randrange(1,7))
board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll))
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
outcomes.append(Board.outcome(board)[1])
sys.stderr.write("\n")
if i % 50 == 0:
print_time_estimate(i)
return outcomes
elif method == 'pubeval':
outcomes = []
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i))
board = Board.initial_state
#print("init:", board, sep="\n")
while Board.outcome(board) is None:
#print("-"*30)
roll = (random.randrange(1,7), random.randrange(1,7))
#print(roll)
prev_board = tuple(board)
board = (self.make_move(board, roll))[0]
#print("post p1:", board, sep="\n")
#print("."*30)
roll = (random.randrange(1,7), random.randrange(1,7))
#print(roll)
prev_board = tuple(board)
board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]
#print("post pubeval:", board, sep="\n")
#print("*"*30)
#print(board)
#print("+"*30)
sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
outcomes.append(Board.outcome(board)[1])
sys.stderr.write("\n")
if i % 10 == 0:
print_time_estimate(i)
return outcomes
# elif method == 'dumbmodel':
# config_prime = self.config.copy()
# config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel')
# eval_bot = Bot(1, config = config_prime, name = "dumbmodel")
# #print(self.config, "\n", config_prime)
# outcomes = []
# for i in range(1, episodes + 1):
# sys.stderr.write("[EVAL ] Episode {}".format(i))
# board = Board.initial_state
# while Board.outcome(board) is None:
# roll = (random.randrange(1,7), random.randrange(1,7))
# board = (self.make_move(board, self.p1.get_sym(), roll))[0]
# roll = (random.randrange(1,7), random.randrange(1,7))
# board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0])
# sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
# outcomes.append(Board.outcome(board)[1])
# sys.stderr.write("\n")
# if i % 50 == 0:
# print_time_estimate(i)
# return outcomes
else:
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
return [0]
return [ (method, do_eval(method,
self.config['episode_count'],
trained_eps = trained_eps))
for method
in self.config['eval_methods'] ]

19
plot.py
View File

@ -9,9 +9,26 @@ import matplotlib.dates as mdates
train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean'] train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean']
eval_headers = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean'] eval_headers = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean']
bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean']
model_path = 'models' model_path = 'models'
def plot_bench(data_path):
df = pd.read_csv(data_path, sep=";",
names=bench_headers, index_col=[0,1,2])
for method_label in df.index.levels[0]:
df_prime = df[['mean']].loc[method_label].unstack().T
plot = df_prime.plot.box()
plot.set_title("Evaluation variance, {}".format(method_label))
plot.set_xlabel("Sample count")
plot.set_ylabel("Mean score")
plt.show(plot.figure)
# for later use:
variances = df_prime.var()
print(variances)
del df_prime, plot, variances
def dataframes(model_name): def dataframes(model_name):
def df_timestamp_to_datetime(df): def df_timestamp_to_datetime(df):
@ -44,7 +61,7 @@ if __name__ == '__main__':
plt.show() plt.show()
while True: while True:
df = dataframes('default')['eval'] df = dataframes('a')['eval']
print(df) print(df)

306
test.py
View File

@ -614,5 +614,311 @@ class TestBoardFlip(unittest.TestCase):
self.assertEqual(Board.flip(Board.flip(board)), board) self.assertEqual(Board.flip(Board.flip(board)), board)
def test_tesauro_initial(self):
board = Board.initial_state
expected = (1,1,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0.0,
0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
1,1,1,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,0,0,
0.0,
0,
1,
0
)
import numpy as np
self.assertTrue((Board.board_features_tesauro(board, 1) ==
np.array(expected).reshape(1, 198)).all())
def test_tesauro_bars(self):
board = list(Board.initial_state)
board[1] = 0
board[0] = 2
board[24] = 0
board[25] = -2
board = tuple(board)
expected = (0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1.0,
0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
1,1,1,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1.0,
0,
1,
0
)
import numpy as np
self.assertTrue((Board.board_features_tesauro(board, 1) ==
np.array(expected).reshape(1, 198)).all())
def test_tesauro_home(self):
board = list(Board.initial_state)
board[1] = 0
board[24] = 0
board = tuple(board)
expected = (0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0.0,
2,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
1,1,1,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0.0,
2,
1,
0
)
import numpy as np
self.assertTrue((Board.board_features_tesauro(board, 1) ==
np.array(expected).reshape(1, 198)).all())
def test_tesauro_black_player(self):
board = Board.initial_state
expected = (1,1,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0.0,
0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
1,1,1,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,1,1,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
1,1,0,0,
0.0,
0,
0,
1
)
import numpy as np
self.assertTrue((Board.board_features_tesauro(board, -1) ==
np.array(expected).reshape(1, 198)).all())
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()