From 90b97da4ff4fff34b49fc3c0d2fc2d6906f96421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Mon, 12 Mar 2018 00:11:40 +0100 Subject: [PATCH] pubeval evaluation --- README.org | 10 +++++++--- bot.py | 4 ++-- game.py | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/README.org b/README.org index dd45173..61549cc 100644 --- a/README.org +++ b/README.org @@ -24,8 +24,12 @@ command-line options and switches are listed by running =main.py= with the argum ** Evaluation methods -Currently, only a single evaluation method is implemented: +Currently, the following evaluation methods are implemented: +- =pubeval=: Evaluates against the =pubeval= backgammon benchmark developed by + Gerald Tesauro. The source code is included in the =pubeval= directory and + needs to be compiled before use. The binary should be placed at + =pubeval/pubeval=. - =random=: Evaluates by playing against a player that makes random moves drawn from the set of legal moves. Should be used with high episode counts to lower variance. *TODO*: Doesn't even work currently @@ -54,9 +58,9 @@ The following examples describe commmon operations. =python3 --eval --model-name=quack= -*** Evaluate default model using evaluation methods =random= and =foovaluation= +*** Evaluate default model using evaluation methods =random= and =pubeval= -=python3 --eval --eval-methods random foovaluation= +=python3 --eval --eval-methods random pubeval= * Model storage format diff --git a/bot.py b/bot.py index a2ac368..5ab9614 100644 --- a/bot.py +++ b/bot.py @@ -48,9 +48,9 @@ class Bot: return random.choice(list(legal_moves)) - # TODO: Test this, the score results are deterministic + # TODO: Test this, the score results should be deterministic def make_pubeval_move(self, board, sym, roll): - legal_moves = Board.calculate_legal_states(board, sym, roll) + legal_moves = Board.calculate_legal_states(tuple(board), sym, roll) moves_and_scores = [] for board in legal_moves: call_argument = ["./pubeval/pubeval"] diff --git a/game.py b/game.py index acc3640..fbc0fe5 100644 --- a/game.py +++ b/game.py @@ -6,7 +6,7 @@ from cup import Cup import numpy as np import sys - +import time class Game: @@ -131,10 +131,22 @@ class Game: def eval(self, trained_eps = 0): def do_eval(method, episodes = 1000, trained_eps = 0): + start_time = time.time() + + def print_time_estimate(eps_completed): + cur_time = time.time() + time_diff = cur_time - start_time + eps_per_sec = eps_completed / time_diff + secs_per_ep = time_diff / eps_completed + eps_remaining = (episodes - eps_completed) + sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2))) + sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep))) + sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) + if method == 'random': outcomes = [] - for i in range(episodes): + for i in range(1, episodes + 1): sys.stderr.write("[EVAL ] Episode {}".format(i)) self.board = Board.initial_state while Board.outcome(self.board) is None: @@ -145,10 +157,29 @@ class Game: sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) outcomes.append(Board.outcome(self.board)[1]) sys.stderr.write("\n") + + if i % 50 == 0: + print_time_estimate(i) return outcomes elif method == 'pubeval': outcomes = [] # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval + for i in range(1, episodes + 1): + sys.stderr.write("[EVAL ] Episode {}".format(i)) + self.board = Board.initial_state + while Board.outcome(self.board) is None: + roll = self.roll() + self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] + roll = self.roll() + self.board = Board.flip(self.p2.make_pubeval_move(Board.board_features_to_pubeval(self.board, self.p2.get_sym()), self.p2.get_sym(), roll)[0][0:26]) + sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) + outcomes.append(Board.outcome(self.board)[1]) + sys.stderr.write("\n") + + if i % 10 == 0: + print_time_estimate(i) + + return outcomes else: sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) return [0]