pubeval evaluation
This commit is contained in:
parent
ac924f655b
commit
90b97da4ff
10
README.org
10
README.org
|
@ -24,8 +24,12 @@ command-line options and switches are listed by running =main.py= with the argum
|
||||||
|
|
||||||
** Evaluation methods
|
** Evaluation methods
|
||||||
|
|
||||||
Currently, only a single evaluation method is implemented:
|
Currently, the following evaluation methods are implemented:
|
||||||
|
|
||||||
|
- =pubeval=: Evaluates against the =pubeval= backgammon benchmark developed by
|
||||||
|
Gerald Tesauro. The source code is included in the =pubeval= directory and
|
||||||
|
needs to be compiled before use. The binary should be placed at
|
||||||
|
=pubeval/pubeval=.
|
||||||
- =random=: Evaluates by playing against a player that makes random moves drawn
|
- =random=: Evaluates by playing against a player that makes random moves drawn
|
||||||
from the set of legal moves. Should be used with high episode counts to lower
|
from the set of legal moves. Should be used with high episode counts to lower
|
||||||
variance. *TODO*: Doesn't even work currently
|
variance. *TODO*: Doesn't even work currently
|
||||||
|
@ -54,9 +58,9 @@ The following examples describe commmon operations.
|
||||||
|
|
||||||
=python3 --eval --model-name=quack=
|
=python3 --eval --model-name=quack=
|
||||||
|
|
||||||
*** Evaluate default model using evaluation methods =random= and =foovaluation=
|
*** Evaluate default model using evaluation methods =random= and =pubeval=
|
||||||
|
|
||||||
=python3 --eval --eval-methods random foovaluation=
|
=python3 --eval --eval-methods random pubeval=
|
||||||
|
|
||||||
* Model storage format
|
* Model storage format
|
||||||
|
|
||||||
|
|
4
bot.py
4
bot.py
|
@ -48,9 +48,9 @@ class Bot:
|
||||||
return random.choice(list(legal_moves))
|
return random.choice(list(legal_moves))
|
||||||
|
|
||||||
|
|
||||||
# TODO: Test this, the score results are deterministic
|
# TODO: Test this, the score results should be deterministic
|
||||||
def make_pubeval_move(self, board, sym, roll):
|
def make_pubeval_move(self, board, sym, roll):
|
||||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
legal_moves = Board.calculate_legal_states(tuple(board), sym, roll)
|
||||||
moves_and_scores = []
|
moves_and_scores = []
|
||||||
for board in legal_moves:
|
for board in legal_moves:
|
||||||
call_argument = ["./pubeval/pubeval"]
|
call_argument = ["./pubeval/pubeval"]
|
||||||
|
|
35
game.py
35
game.py
|
@ -6,7 +6,7 @@ from cup import Cup
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
class Game:
|
class Game:
|
||||||
|
|
||||||
|
@ -131,10 +131,22 @@ class Game:
|
||||||
|
|
||||||
def eval(self, trained_eps = 0):
|
def eval(self, trained_eps = 0):
|
||||||
def do_eval(method, episodes = 1000, trained_eps = 0):
|
def do_eval(method, episodes = 1000, trained_eps = 0):
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
def print_time_estimate(eps_completed):
|
||||||
|
cur_time = time.time()
|
||||||
|
time_diff = cur_time - start_time
|
||||||
|
eps_per_sec = eps_completed / time_diff
|
||||||
|
secs_per_ep = time_diff / eps_completed
|
||||||
|
eps_remaining = (episodes - eps_completed)
|
||||||
|
sys.stderr.write("[EVAL ] Averaging {per_sec} episodes per second\n".format(per_sec = round(eps_per_sec, 2)))
|
||||||
|
sys.stderr.write("[EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(eps_remaining = eps_remaining, time_remaining = int(eps_remaining * secs_per_ep)))
|
||||||
|
|
||||||
sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
|
sys.stderr.write("[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))
|
||||||
|
|
||||||
if method == 'random':
|
if method == 'random':
|
||||||
outcomes = []
|
outcomes = []
|
||||||
for i in range(episodes):
|
for i in range(1, episodes + 1):
|
||||||
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||||
self.board = Board.initial_state
|
self.board = Board.initial_state
|
||||||
while Board.outcome(self.board) is None:
|
while Board.outcome(self.board) is None:
|
||||||
|
@ -145,10 +157,29 @@ class Game:
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
outcomes.append(Board.outcome(self.board)[1])
|
||||||
sys.stderr.write("\n")
|
sys.stderr.write("\n")
|
||||||
|
|
||||||
|
if i % 50 == 0:
|
||||||
|
print_time_estimate(i)
|
||||||
return outcomes
|
return outcomes
|
||||||
elif method == 'pubeval':
|
elif method == 'pubeval':
|
||||||
outcomes = []
|
outcomes = []
|
||||||
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
|
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
|
||||||
|
for i in range(1, episodes + 1):
|
||||||
|
sys.stderr.write("[EVAL ] Episode {}".format(i))
|
||||||
|
self.board = Board.initial_state
|
||||||
|
while Board.outcome(self.board) is None:
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = Board.flip(self.p2.make_pubeval_move(Board.board_features_to_pubeval(self.board, self.p2.get_sym()), self.p2.get_sym(), roll)[0][0:26])
|
||||||
|
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
||||||
|
outcomes.append(Board.outcome(self.board)[1])
|
||||||
|
sys.stderr.write("\n")
|
||||||
|
|
||||||
|
if i % 10 == 0:
|
||||||
|
print_time_estimate(i)
|
||||||
|
|
||||||
|
return outcomes
|
||||||
else:
|
else:
|
||||||
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
|
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
|
||||||
return [0]
|
return [0]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user