pubeval evaluation

This commit is contained in:
Christoffer Müller Madsen 2018-03-12 00:11:40 +01:00
parent ac924f655b
commit 90b97da4ff
3 changed files with 42 additions and 7 deletions

View File

@ -24,8 +24,12 @@ command-line options and switches are listed by running =main.py= with the argum
** Evaluation methods ** Evaluation methods
Currently, only a single evaluation method is implemented: Currently, the following evaluation methods are implemented:
- =pubeval=: Evaluates against the =pubeval= backgammon benchmark developed by
Gerald Tesauro. The source code is included in the =pubeval= directory and
needs to be compiled before use. The binary should be placed at
=pubeval/pubeval=.
- =random=: Evaluates by playing against a player that makes random moves drawn - =random=: Evaluates by playing against a player that makes random moves drawn
from the set of legal moves. Should be used with high episode counts to lower from the set of legal moves. Should be used with high episode counts to lower
variance. *TODO*: Doesn't even work currently variance. *TODO*: Doesn't even work currently
@ -54,9 +58,9 @@ The following examples describe commmon operations.
=python3 --eval --model-name=quack= =python3 --eval --model-name=quack=
*** Evaluate default model using evaluation methods =random= and =foovaluation= *** Evaluate default model using evaluation methods =random= and =pubeval=
=python3 --eval --eval-methods random foovaluation= =python3 --eval --eval-methods random pubeval=
* Model storage format * Model storage format

4
bot.py
View File

@ -48,9 +48,9 @@ class Bot:
return random.choice(list(legal_moves)) return random.choice(list(legal_moves))
# TODO: Test this, the score results are deterministic # TODO: Test this, the score results should be deterministic
def make_pubeval_move(self, board, sym, roll): def make_pubeval_move(self, board, sym, roll):
legal_moves = Board.calculate_legal_states(board, sym, roll) legal_moves = Board.calculate_legal_states(tuple(board), sym, roll)
moves_and_scores = [] moves_and_scores = []
for board in legal_moves: for board in legal_moves:
call_argument = ["./pubeval/pubeval"] call_argument = ["./pubeval/pubeval"]

35
game.py
View File

@ -6,7 +6,7 @@ from cup import Cup
import numpy as np import numpy as np
import sys import sys
import time
class Game: class Game:
@ -131,10 +131,22 @@ class Game:
def eval(self, trained_eps = 0): def eval(self, trained_eps = 0):
def do_eval(method, episodes = 1000, trained_eps = 0): def do_eval(method, episodes = 1000, trained_eps = 0):
start_time = time.time()
def print_time_estimate(eps_completed):
cur_time = time.time()
time_diff = cur_time - start_time
eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed
eps_remaining = (episodes - eps_completed)
sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method)) sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
if method == 'random': if method == 'random':
outcomes = [] outcomes = []
for i in range(episodes): for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i)) sys.stderr.write("[EVAL ] Episode {}".format(i))
self.board = Board.initial_state self.board = Board.initial_state
while Board.outcome(self.board) is None: while Board.outcome(self.board) is None:
@ -145,10 +157,29 @@ class Game:
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n") sys.stderr.write("\n")
if i % 50 == 0:
print_time_estimate(i)
return outcomes return outcomes
elif method == 'pubeval': elif method == 'pubeval':
outcomes = [] outcomes = []
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
for i in range(1, episodes + 1):
sys.stderr.write("[EVAL ] Episode {}".format(i))
self.board = Board.initial_state
while Board.outcome(self.board) is None:
roll = self.roll()
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
roll = self.roll()
self.board = Board.flip(self.p2.make_pubeval_move(Board.board_features_to_pubeval(self.board, self.p2.get_sym()), self.p2.get_sym(), roll)[0][0:26])
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n")
if i % 10 == 0:
print_time_estimate(i)
return outcomes
else: else:
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
return [0] return [0]