diff --git a/.gitignore b/.gitignore index 08bc86a..03ee050 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,6 @@ venv.bak/ README.* !README.org models/ +.DS_Store +bench/ + diff --git a/bin/train-evaluate-save b/bin/train-evaluate-save new file mode 100755 index 0000000..00b6411 --- /dev/null +++ b/bin/train-evaluate-save @@ -0,0 +1,47 @@ +#!/usr/bin/env ruby +def save(model_name) + require 'date' + + models_dir = 'models' + model_path = File.join(models_dir, model_name) + if not File.exists? model_path then + return false + end + + episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i + + puts "Found model #{model_name} with episodes #{episode_count} trained!" + + file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz" + save_path = File.join(models_dir, 'saves', file_name) + puts "Saving to #{save_path}" + + system("tar", "-cvzf", save_path, "-C", models_dir, model_name) + + return true +end + +def train(model, episodes) + system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s) +end + +def evaluate(model, episodes, method) + system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method) +end + +model = ARGV[0] + +if model.nil? then raise "no model specified" end + +while true do + save model + train model, 1000 + save model + train model, 1000 + 3.times do + evaluate model, 250, "pubeval" + end + 3.times do + evaluate model, 250, "dumbeval" + end +end diff --git a/board.py b/board.py index bfa7998..2136e47 100644 --- a/board.py +++ b/board.py @@ -31,11 +31,59 @@ class Board: board = list(board) positives = [x if x > 0 else 0 for x in board] negatives = [x if x < 0 else 0 for x in board] - board.append(15 - sum(positives)) + board.append( 15 - sum(positives)) board.append(-15 - sum(negatives)) return tuple(board) - - + + # quack + @staticmethod + def board_features_quack(board, player): + board = list(board) + board += ([1, 0] if np.sign(player) > 0 else [0, 1]) + return np.array(board).reshape(1, -1) + + # quack-fat + @staticmethod + def board_features_quack_fat(board, player): + board = list(board) + positives = [x if x > 0 else 0 for x in board] + negatives = [x if x < 0 else 0 for x in board] + board.append( 15 - sum(positives)) + board.append(-15 - sum(negatives)) + board += ([1, 0] if np.sign(player) > 0 else [0, 1]) + return np.array(board).reshape(1,-1) + + + # tesauro + @staticmethod + def board_features_tesauro(board, cur_player): + def ordinary_trans(val, player): + abs_val = val * player + if abs_val <= 0: return (0,0,0,0) + elif abs_val == 1: return (1,0,0,0) + elif abs_val == 2: return (1,1,0,0) + elif abs_val == 3: return (1,1,1,0) + else: return (1,1,1, (abs_val - 3) / 2) + + def bar_trans(board, player): + if player == 1: return (abs(board[0]/2),) + elif player == -1: return (abs(board[25]/2),) + + # def ordinary_trans_board(board, player): + # return np.array( + # [ordinary_trans(x, player) for x in board[1:25]] + # ).flatten() + + board_rep = [] + for player in [1,-1]: + for x in board[1:25]: + board_rep += ordinary_trans(x, player) + board_rep += bar_trans(board, player) + board_rep += (15 - Board.num_of_checkers_for_player(board, player),) + + board_rep += ([1,0] if cur_player == 1 else [0,1]) + + return np.array(board_rep).reshape(1,198) @staticmethod @@ -250,9 +298,9 @@ class Board: return """ 13 14 15 16 17 18 19 20 21 22 23 24 +--------------------------------------------------------------------------+ -| {12}| {11}| {10}| {9}| {8}| {7}| bar -1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO| +| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO| |---|---|---|---|---|---|------------|---|---|---|---|---|---| | -| {13}| {14}| {15}| {16}| {17}| {18}| bar 1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end 1: TODO| +| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end 1: TODO| +--------------------------------------------------------------------------+ 12 11 10 9 8 7 6 5 4 3 2 1 """.format(*temp) diff --git a/dumbeval/.gitignore b/dumbeval/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/dumbeval/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/dumbeval/dumbeval.c b/dumbeval/dumbeval.c new file mode 100644 index 0000000..4d2579a --- /dev/null +++ b/dumbeval/dumbeval.c @@ -0,0 +1,194 @@ +#include + +static PyObject* DumbevalError; + +static float x[122]; + + +/* With apologies to Gerry Tesauro */ + +/* Weights generated by weights.py */ +static const float wc[122] = { +-1.91222, 1.45979, 0.40657, -1.39159, 3.64558, -0.45381, -0.03157, + 0.14539, 0.80232, 0.87558, 2.36202, -2.01887, -0.88918, 2.65871, + -1.31587, 1.07476, 0.30491, -1.32892, 0.38018, -0.30714, -1.16178, + 0.71481, -1.01334, -0.44373, 0.51255, -0.17171, -0.88886, 0.02071, + -0.53279, -0.22139, -1.02436, 0.17948, 0.95697, 0.49272, 0.31848, + -0.58293, 0.14484, 0.22063, 1.0336 , -1.90554, 1.10291, -2.05589, + -0.16964, -0.82442, 1.27217, -1.24968, -0.90372, 0.05546, 0.2535 , + -0.03533, -0.31773, 0.43704, 0.21699, 0.10519, 2.12775, -0.48196, + -0.08445, -0.13156, -0.68362, 0.64765, 0.32537, 0.79493, 1.94577, + -0.63827, 0.97057, -0.46039, 1.51801, -0.62955, -0.43632, 0.25876, + -0.46623, -0.46963, 1.3532 , -0.07362, -1.53211, 0.69676, -0.92407, + 0.07153, 0.67173, 0.27661, -0.51579, -0.49019, 1.06603, -0.97673, + -1.21231, -1.54966, -0.07795, 0.32697, 0.02873, 1.38703, 0.41725, + 0.78326, -0.7257 , 0.54165, 1.38882, 0.27304, 1.0739 , 0.74654, + 1.35561, 1.18697, 1.09146, 0.17552, -0.30773, 0.27812, -1.674 , + -0.31073, -0.40745, 0.51546, -1.10875, 2.0081 , -1.27931, -1.16321, + 0.95652, 0.7487 , -0.2347 , 0.20324, -0.41417, 0.05929, 0.72632, + -1.15223, 1.2745 , -0.15947 }; + +static const float wr[122] = { + 0.13119, -0.13164, -1.2736 , 1.06352, -1.34749, -1.03086, -0.27417, + -0.27762, 0.79454, -1.12623, 2.1134 , -0.7003 , 0.26056, -1.13518, + -1.64548, -1.30828, -0.96589, -0.36258, -1.14323, -0.2006 , -1.00307, + 0.57739, -0.62693, 0.29721, -0.36996, -0.17462, 0.96704, 0.08902, + 1.4337 , -0.47107, 0.82156, 0.14988, 1.74034, 1.13313, -0.32083, + -0.00048, -0.86622, 1.12808, 0.99875, 0.8049 , -0.16841, -0.42677, + -1.9409 , -0.53565, -0.83708, 0.69603, 0.32079, 0.56942, 0.67965, + 1.49328, -1.65885, 0.96284, 0.63196, -0.27504, 0.39174, 0.71225, + -0.3614 , 0.88761, 1.12882, 0.77764, 1.02618, -0.20245, -0.39245, + -1.56799, 1.04888, -1.20858, -0.24361, -1.85157, -0.16912, 0.50512, + -2.93122, 0.70477, -0.93066, 1.74867, 0.23963, -0.00699, -1.27183, + -0.30604, 1.71039, 0.82202, -1.36734, -1.08352, -1.25054, 0.49436, + -1.5037 , -0.73143, 0.74189, 0.32365, 0.30539, -0.72169, 0.41088, + -1.56632, -0.63526, 0.58779, -0.05653, 0.76713, -1.40898, -0.33683, + 1.86802, 0.59773, 1.28668, -0.65817, 2.46829, -0.09331, 2.9034 , + 1.04809, 0.73222, -0.44372, 0.53044, -1.9274 , -1.57183, -1.14068, + 1.26036, -0.9296 , 0.06662, -0.26572, -0.30862, 0.72915, 0.98977, + 0.63513, -1.43917, -0.12523 }; + +void setx(int pos[]) +{ + /* sets input vector x[] given board position pos[] */ + extern float x[]; + int j, jm1, n; + /* initialize */ + for(j=0;j<122;++j) x[j] = 0.0; + + /* first encode board locations 24-1 */ + for(j=1;j<=24;++j) { + jm1 = j - 1; + n = pos[25-j]; + if(n!=0) { + if(n==-1) x[5*jm1+0] = 1.0; + if(n==1) x[5*jm1+1] = 1.0; + if(n>=2) x[5*jm1+2] = 1.0; + if(n==3) x[5*jm1+3] = 1.0; + if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0; + } + } + /* encode opponent barmen */ + x[120] = -(float)(pos[0])/2.0; + /* encode computer's menoff */ + x[121] = (float)(pos[26])/15.0; +} + +float dumbeval(int race, int pos[]) +{ + /* Backgammon move-selection evaluation function + for benchmark comparisons. Computes a linear + evaluation function: Score = W * X, where X is + an input vector encoding the board state (using + a raw encoding of the number of men at each location), + and W is a weight vector. Separate weight vectors + are used for racing positions and contact positions. + Makes lots of obvious mistakes, but provides a + decent level of play for benchmarking purposes. */ + + /* Provided as a public service to the backgammon + programming community by Gerry Tesauro, IBM Research. + (e-mail: tesauro@watson.ibm.com) */ + + /* The following inputs are needed for this routine: + + race is an integer variable which should be set + based on the INITIAL position BEFORE the move. + Set race=1 if the position is a race (i.e. no contact) + and 0 if the position is a contact position. + + pos[] is an integer array of dimension 28 which + should represent a legal final board state after + the move. Elements 1-24 correspond to board locations + 1-24 from computer's point of view, i.e. computer's + men move in the negative direction from 24 to 1, and + opponent's men move in the positive direction from + 1 to 24. Computer's men are represented by positive + integers, and opponent's men are represented by negative + integers. Element 25 represents computer's men on the + bar (positive integer), and element 0 represents opponent's + men on the bar (negative integer). Element 26 represents + computer's men off the board (positive integer), and + element 27 represents opponent's men off the board + (negative integer). */ + + /* Also, be sure to call rdwts() at the start of your + program to read in the weight values. Happy hacking] */ + + int i; + float score; + + if(pos[26]==15) return(99999999.); + /* all men off, best possible move */ + + setx(pos); /* sets input array x[] */ + score = 0.0; + if(race) { /* use race weights */ + for(i=0;i<122;++i) score += wr[i]*x[i]; + } + else { /* use contact weights */ + for(i=0;i<122;++i) score += wc[i]*x[i]; + } + return(score); +} + +static PyObject* +dumbeval_eval(PyObject *self, PyObject *args) { + int race; + long numValues; + int board[28]; + float eval_score; + + PyObject* tuple_obj; + PyObject* val_obj; + + if (! PyArg_ParseTuple(args, "pO!", &race, &PyTuple_Type, &tuple_obj)) + return NULL; + + numValues = PyTuple_Size(tuple_obj); + + if (numValues < 0) return NULL; + if (numValues != 28) { + PyErr_SetString(DumbevalError, "Tuple must have 28 entries"); + return NULL; + } + + // Iterate over tuple to retreive positions + for (int i=0; i 0 else 1-x[1] for x in moves_and_scores] best_score_index = np.array(scores).argmax() best_move_pair = moves_and_scores[best_score_index] - #print("Found the best state, being:", np.array(move_scores).argmax()) + # print("Found the best state, being:", np.array(move_scores).argmax()) return best_move_pair - - - def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0): - start_time = time.time() - def print_time_estimate(eps_completed): - cur_time = time.time() - time_diff = cur_time - start_time - eps_per_sec = eps_completed / time_diff - secs_per_ep = time_diff / eps_completed - eps_remaining = (episodes - eps_completed) - sys.stderr.write("[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) - sys.stderr.write("[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) + def eval(self, episode_count, trained_eps = 0, tf_session = None): + def do_eval(sess, method, episodes = 1000, trained_eps = 0): + start_time = time.time() - - sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) - outcomes = [] - for episode in range(1, episodes + 1): - sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) - # TODO decide which player should be here - player = 1 - - roll = (random.randrange(1,7), random.randrange(1,7)) - prev_board, _ = self.make_move(Board.flip(Board.initial_state) if player == -1 else Board.initial_state, roll) - if player == -1: - prev_board = Board.flip(prev_board) - - # find the best move here, make this move, then change turn as the - # first thing inside of the while loop and then call - # best_move_and_score to get V_t+1 + def print_time_estimate(eps_completed): + cur_time = time.time() + time_diff = cur_time - start_time + eps_per_sec = eps_completed / time_diff + secs_per_ep = time_diff / eps_completed + eps_remaining = (episodes - eps_completed) + sys.stderr.write( + "[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) + sys.stderr.write( + "[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( + eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) - # i = 0 - while Board.outcome(prev_board) is None: - # print("-"*30) - # print(i) - # print(roll) - # print(Board.pretty(prev_board)) - # print("/"*30) - # i += 1 - - player *= -1 + sys.stderr.write( + "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) + + if method == 'random': + outcomes = [] + """for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + while Board.outcome(board) is None: + roll = (random.randrange(1, 7), random.randrange(1, 7)) + board = (self.p1.make_move(sess, board, self.p1.get_sym(), roll))[0] + roll = (random.randrange(1, 7), random.randrange(1, 7)) + board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll)) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") + + if i % 50 == 0: + print_time_estimate(i)""" + return outcomes + elif method == 'pubeval': + outcomes = [] + # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), + # which can be used to get the best move according to pubeval + for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + # print("init:", board, sep="\n") + while Board.outcome(board) is None: + # print("-"*30) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + # print(roll) + + # prev_board = tuple(board) + board = (self.make_move(sess, board, roll, 1))[0] + # print("post p1:", board, sep="\n") + + # print("."*30) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + # print(roll) + + # prev_board = tuple(board) + board = Eval.make_pubeval_move(board, -1, roll)[0][0:26] + # print("post pubeval:", board, sep="\n") + + # print("*"*30) + # print(board) + # print("+"*30) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") + + if i % 10 == 0: + print_time_estimate(i) + + return outcomes + + elif method == 'dumbeval': + outcomes = [] + # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), + # which can be used to get the best move according to pubeval + for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + # print("init:", board, sep="\n") + while Board.outcome(board) is None: + # print("-"*30) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + # print(roll) + + # prev_board = tuple(board) + board = (self.make_move(sess, board, roll, 1))[0] + # print("post p1:", board, sep="\n") + + # print("."*30) + roll = (random.randrange(1, 7), random.randrange(1, 7)) + # print(roll) + + # prev_board = tuple(board) + board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26] + # print("post pubeval:", board, sep="\n") + + # print("*"*30) + # print(board) + # print("+"*30) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") + + if i % 10 == 0: + print_time_estimate(i) + + return outcomes + + elif method == 'dumbmodel': + outcomes = [] + """ + config_prime = self.config.copy() + config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel') + eval_bot = Bot(1, config = config_prime, name = "dumbmodel") + #print(self.config, "\n", config_prime) + outcomes = [] + for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + board = Board.initial_state + while Board.outcome(board) is None: roll = (random.randrange(1,7), random.randrange(1,7)) + board = (self.make_move(board, self.p1.get_sym(), roll))[0] - cur_board, cur_board_value = self.make_move(Board.flip(prev_board) if player == -1 else prev_board, roll) - if player == -1: - cur_board = Board.flip(cur_board) + roll = (random.randrange(1,7), random.randrange(1,7)) + board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0]) + sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) + outcomes.append(Board.outcome(board)[1]) + sys.stderr.write("\n") - self.adjust_weights(prev_board, cur_board_value) - - prev_board = cur_board - - final_board = prev_board - sys.stderr.write("\t outcome {}".format(Board.outcome(final_board)[1])) - outcomes.append(Board.outcome(final_board)[1]) - final_score = np.array([ Board.outcome(final_board)[1] ]) - self.adjust_weights(prev_board, final_score.reshape((1, 1))) - - sys.stderr.write("\n") + if i % 50 == 0: + print_time_estimate(i) + """ + return outcomes + else: + sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) + return [0] - if episode % min(save_step_size, episodes) == 0: - sys.stderr.write("[TRAIN] Saving model...\n") - self.save_model(episode+trained_eps) + if tf_session == None: + with tf.Session() as session: + session.run(tf.global_variables_initializer()) + self.restore_model(session) + outcomes = [ (method, do_eval(session, + method, + episode_count, + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ] + return outcomes + else: + outcomes = [ (method, do_eval(tf_session, + method, + episode_count, + trained_eps = trained_eps)) + for method + in self.config['eval_methods'] ] + return outcomes - if episode % 50 == 0: - print_time_estimate(episode) + def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): + with tf.Session() as sess: + writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph) - sys.stderr.write("[TRAIN] Saving model for final episode...\n") - self.save_model(episode+trained_eps) + sess.run(tf.global_variables_initializer()) + self.restore_model(sess) + + variables_names = [v.name for v in tf.trainable_variables()] + values = sess.run(variables_names) + for k, v in zip(variables_names, values): + print("Variable: ", k) + print("Shape: ", v.shape) + print(v) + + start_time = time.time() + + def print_time_estimate(eps_completed): + cur_time = time.time() + time_diff = cur_time - start_time + eps_per_sec = eps_completed / time_diff + secs_per_ep = time_diff / eps_completed + eps_remaining = (episodes - eps_completed) + sys.stderr.write( + "[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2))) + sys.stderr.write( + "[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format( + eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep))) + + sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) + outcomes = [] + for episode in range(1, episodes + 1): + sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) + # TODO decide which player should be here + + player = 1 + + prev_board = Board.initial_state + + # find the best move here, make this move, then change turn as the + # first thing inside of the while loop and then call + # best_move_and_score to get V_t+1 + + i = 0 + while Board.outcome(prev_board) is None: + i += 1 + + #print("PREEEV_BOOOOAAARD:",prev_board) + cur_board, cur_board_value = self.make_move(sess, + prev_board, + (random.randrange(1, 7), random.randrange(1, 7)), player) + + #print("The current value:",cur_board_value) + + # adjust weights + sess.run(self.training_op, + feed_dict={self.x: self.board_trans_func(prev_board, player), + self.value_next: cur_board_value}) + + player *= -1 + + + prev_board = cur_board + + final_board = prev_board + sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i)) + outcomes.append(Board.outcome(final_board)[1]) + final_score = np.array([Board.outcome(final_board)[1]]) + scaled_final_score = ((final_score + 2) / 4) + #print("The difference in values:", scaled_final_score - cur_board_value) + # print("scaled_final_score",scaled_final_score) + + with tf.name_scope("final"): + merged = tf.summary.merge_all() + summary, _ = sess.run([merged, self.training_op], + feed_dict={self.x: self.board_trans_func(prev_board, player), + self.value_next: scaled_final_score.reshape((1, 1))}) + writer.add_summary(summary, episode + trained_eps) + + sys.stderr.write("\n") + + if episode % min(save_step_size, episodes) == 0: + sys.stderr.write("[TRAIN] Saving model...\n") + self.save_model(sess, episode + trained_eps) + + if episode % 50 == 0: + print_time_estimate(episode) + + sys.stderr.write("[TRAIN] Saving model for final episode...\n") + self.save_model(sess, episode+trained_eps) + + writer.close() - return outcomes + return outcomes # take turn, which finds the best state and picks it, based on the current network @@ -240,105 +424,3 @@ class Network: # save the current state again, so we can continue running backprop based on the "previous" turn. # NOTE: We need to make a method so that we can take a single turn or at least just pick the next best move, so we know how to evaluate according to TD-learning. Right now, our game just continues in a while loop without nothing to stop it! - - - - def eval(self, trained_eps = 0): - def do_eval(method, episodes = 1000, trained_eps = 0): - start_time = time.time() - - def print_time_estimate(eps_completed): - cur_time = time.time() - time_diff = cur_time - start_time - eps_per_sec = eps_completed / time_diff - secs_per_ep = time_diff / eps_completed - eps_remaining = (episodes - eps_completed) - sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) - sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) - - sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) - - if method == 'random': - outcomes = [] - for i in range(1, episodes + 1): - sys.stderr.write("[EVAL ] Episode {}".format(i)) - board = Board.initial_state - while Board.outcome(board) is None: - roll = (random.randrange(1,7), random.randrange(1,7)) - board = (self.p1.make_move(board, self.p1.get_sym(), roll))[0] - roll = (random.randrange(1,7), random.randrange(1,7)) - board = Board.flip(Eval.make_random_move(Board.flip(board), 1, roll)) - sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) - outcomes.append(Board.outcome(board)[1]) - sys.stderr.write("\n") - - if i % 50 == 0: - print_time_estimate(i) - return outcomes - elif method == 'pubeval': - outcomes = [] - # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval - for i in range(1, episodes + 1): - sys.stderr.write("[EVAL ] Episode {}".format(i)) - board = Board.initial_state - #print("init:", board, sep="\n") - while Board.outcome(board) is None: - #print("-"*30) - roll = (random.randrange(1,7), random.randrange(1,7)) - #print(roll) - - prev_board = tuple(board) - board = (self.make_move(board, roll))[0] - #print("post p1:", board, sep="\n") - - #print("."*30) - roll = (random.randrange(1,7), random.randrange(1,7)) - #print(roll) - - prev_board = tuple(board) - board = Eval.make_pubeval_move(board, -1, roll)[0][0:26] - #print("post pubeval:", board, sep="\n") - - - #print("*"*30) - #print(board) - #print("+"*30) - sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) - outcomes.append(Board.outcome(board)[1]) - sys.stderr.write("\n") - - if i % 10 == 0: - print_time_estimate(i) - - return outcomes - # elif method == 'dumbmodel': - # config_prime = self.config.copy() - # config_prime['model_path'] = os.path.join(config_prime['model_storage_path'], 'dumbmodel') - # eval_bot = Bot(1, config = config_prime, name = "dumbmodel") - # #print(self.config, "\n", config_prime) - # outcomes = [] - # for i in range(1, episodes + 1): - # sys.stderr.write("[EVAL ] Episode {}".format(i)) - # board = Board.initial_state - # while Board.outcome(board) is None: - # roll = (random.randrange(1,7), random.randrange(1,7)) - # board = (self.make_move(board, self.p1.get_sym(), roll))[0] - - # roll = (random.randrange(1,7), random.randrange(1,7)) - # board = Board.flip(eval_bot.make_move(Board.flip(board), self.p1.get_sym(), roll)[0]) - # sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1])) - # outcomes.append(Board.outcome(board)[1]) - # sys.stderr.write("\n") - - # if i % 50 == 0: - # print_time_estimate(i) - # return outcomes - else: - sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) - return [0] - - return [ (method, do_eval(method, - self.config['episode_count'], - trained_eps = trained_eps)) - for method - in self.config['eval_methods'] ] diff --git a/plot.py b/plot.py index 8261cde..5957854 100644 --- a/plot.py +++ b/plot.py @@ -9,9 +9,26 @@ import matplotlib.dates as mdates train_headers = ['timestamp', 'eps_train', 'eps_trained_session', 'sum', 'mean'] eval_headers = ['timestamp', 'method', 'eps_train', 'eval_eps_used', 'sum', 'mean'] +bench_headers = ['method', 'sample_count', 'i', 'time', 'sum', 'mean'] model_path = 'models' +def plot_bench(data_path): + df = pd.read_csv(data_path, sep=";", + names=bench_headers, index_col=[0,1,2]) + for method_label in df.index.levels[0]: + df_prime = df[['mean']].loc[method_label].unstack().T + plot = df_prime.plot.box() + plot.set_title("Evaluation variance, {}".format(method_label)) + plot.set_xlabel("Sample count") + plot.set_ylabel("Mean score") + plt.show(plot.figure) + + # for later use: + variances = df_prime.var() + print(variances) + + del df_prime, plot, variances def dataframes(model_name): def df_timestamp_to_datetime(df): @@ -44,7 +61,7 @@ if __name__ == '__main__': plt.show() while True: - df = dataframes('default')['eval'] + df = dataframes('a')['eval'] print(df) diff --git a/test.py b/test.py index efc243e..6c9c130 100644 --- a/test.py +++ b/test.py @@ -613,6 +613,312 @@ class TestBoardFlip(unittest.TestCase): -2) self.assertEqual(Board.flip(Board.flip(board)), board) + + def test_tesauro_initial(self): + board = Board.initial_state + + expected = (1,1,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0.0, + 0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,0,0, + + 0.0, + 0, + + 1, + 0 + ) + + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, 1) == + np.array(expected).reshape(1, 198)).all()) + + def test_tesauro_bars(self): + board = list(Board.initial_state) + board[1] = 0 + board[0] = 2 + board[24] = 0 + board[25] = -2 + + board = tuple(board) + + expected = (0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1.0, + 0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1.0, + 0, + + 1, + 0 + ) + + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, 1) == + np.array(expected).reshape(1, 198)).all()) + + + def test_tesauro_home(self): + board = list(Board.initial_state) + + board[1] = 0 + board[24] = 0 + + board = tuple(board) + + expected = (0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0.0, + 2, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0.0, + 2, + + 1, + 0 + ) + + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, 1) == + np.array(expected).reshape(1, 198)).all()) + + + def test_tesauro_black_player(self): + board = Board.initial_state + + expected = (1,1,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0.0, + 0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,1,1, + + 0,0,0,0, + 1,1,1,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 1,1,1,1, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 0,0,0,0, + 1,1,0,0, + + 0.0, + 0, + + 0, + 1 + ) + + import numpy as np + self.assertTrue((Board.board_features_tesauro(board, -1) == + np.array(expected).reshape(1, 198)).all()) + if __name__ == '__main__': unittest.main()