Added a bunch of pubeval stuff
This commit is contained in:
parent
81461d917e
commit
5e937d68b4
20
board.py
20
board.py
|
@ -18,6 +18,26 @@ class Board:
|
||||||
idxs.append(idx)
|
idxs.append(idx)
|
||||||
return idxs
|
return idxs
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Write a test for this
|
||||||
|
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
|
||||||
|
# index 26 is player 1 home, index 27 is player -1 home
|
||||||
|
@staticmethod
|
||||||
|
def board_features_to_pubeval(board, player):
|
||||||
|
|
||||||
|
if player == -1:
|
||||||
|
board = Board.flip(board)
|
||||||
|
|
||||||
|
board = list(board)
|
||||||
|
positives = [x if x > 0 else 0 for x in board]
|
||||||
|
negatives = [x if x < 0 else 0 for x in board]
|
||||||
|
board.append(15 - sum(positives))
|
||||||
|
board.append(-15 - sum(negatives))
|
||||||
|
return board
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_move_valid(board, player, face_value, move):
|
def is_move_valid(board, player, face_value, move):
|
||||||
def sign(a):
|
def sign(a):
|
||||||
|
|
20
bot.py
20
bot.py
|
@ -3,7 +3,9 @@ import tensorflow as tf
|
||||||
from network import Network
|
from network import Network
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from board import Board
|
from board import Board
|
||||||
|
import subprocess
|
||||||
import random
|
import random
|
||||||
|
import sys
|
||||||
|
|
||||||
class Bot:
|
class Bot:
|
||||||
|
|
||||||
|
@ -44,6 +46,24 @@ class Bot:
|
||||||
def make_random_move(self, board, sym, roll):
|
def make_random_move(self, board, sym, roll):
|
||||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
||||||
return random.choice(list(legal_moves))
|
return random.choice(list(legal_moves))
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Test this, the score results are deterministic
|
||||||
|
def make_pubeval_move(self, board, sym, roll):
|
||||||
|
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
||||||
|
moves_and_scores = []
|
||||||
|
for board in legal_moves:
|
||||||
|
call_argument = ["./pubeval_bin"]
|
||||||
|
for x in Board.board_features_to_pubeval(board, sym):
|
||||||
|
call_argument.append(str(x))
|
||||||
|
data = subprocess.check_output(call_argument)
|
||||||
|
moves_and_scores.append([board,str(data).split("'")[1]])
|
||||||
|
scores = [ x[1] for x in moves_and_scores ]
|
||||||
|
best_move_pair = moves_and_scores[np.array(scores).argmax()]
|
||||||
|
return best_move_pair
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def make_move(self, board, sym, roll):
|
def make_move(self, board, sym, roll):
|
||||||
# print(Board.pretty(board))
|
# print(Board.pretty(board))
|
||||||
|
|
44
game.py
44
game.py
|
@ -3,6 +3,7 @@ from player import Player
|
||||||
from bot import Bot
|
from bot import Bot
|
||||||
from restore_bot import RestoreBot
|
from restore_bot import RestoreBot
|
||||||
from cup import Cup
|
from cup import Cup
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -45,20 +46,38 @@ class Game:
|
||||||
if coin_flip > 0.5:
|
if coin_flip > 0.5:
|
||||||
user_color = input("Pick a number, 1 (white) or -1 (black)")
|
user_color = input("Pick a number, 1 (white) or -1 (black)")
|
||||||
if int(user_color) == 1:
|
if int(user_color) == 1:
|
||||||
p1 = player(1)
|
p1 = Player(1)
|
||||||
p2 = bot(-1)
|
p2 = Bot(-1)
|
||||||
else:
|
else:
|
||||||
p1 = bot(1)
|
p1 = Bot(1)
|
||||||
p2 = player(-1)
|
p2 = Player(-1)
|
||||||
else:
|
else:
|
||||||
p1 = bot(1)
|
p1 = Bot(1)
|
||||||
p2 = player(-1)
|
p2 = Player(-1)
|
||||||
|
|
||||||
|
# Since we have to make sure that the Bot always plays as if it's white, we have to flip
|
||||||
|
# the board when it's not actually.
|
||||||
|
if p1.__name__ == "Bot" and p1.get_sym() == 1:
|
||||||
|
while Board.outcome(self.board) == None:
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = p1.make_move(self.board, p1.get_sym(), roll)
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = p2.make_move(self.board, p2.get_sym(), roll)
|
||||||
|
if p1.__name__ == "Bot" and p1.get_sym() == -1:
|
||||||
|
while Board.outcome(self.board) == None:
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = Board.flip(p1.make_move(Board.flip(self.board), p1.get_sym(), roll))
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = p2.make_move(self.board, p2.get_sym(), roll)
|
||||||
|
if p2.__name__ == "Bot" and p1.get_sym() == -1:
|
||||||
|
while Board.outcome(self.board) == None:
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = p1.make_move(self.board, p1.get_sym(), roll)
|
||||||
|
roll = self.roll()
|
||||||
|
self.board = Board.flip(p2.make_move(Board.flip(self.board), p2.get_sym(), roll))
|
||||||
|
|
||||||
while Board.outcome(self.board) == None:
|
|
||||||
roll = self.roll()
|
print(Board.outcome(self.board))
|
||||||
self.board = p1.make_move(self.board, p1.get_sym(), roll)
|
|
||||||
roll = self.roll()
|
|
||||||
self.board = p2.make_move(self.board, p2.get_sym(), roll)
|
|
||||||
|
|
||||||
|
|
||||||
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
|
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
|
||||||
|
@ -127,6 +146,9 @@ class Game:
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
outcomes.append(Board.outcome(self.board)[1])
|
||||||
sys.stderr.write("\n")
|
sys.stderr.write("\n")
|
||||||
return outcomes
|
return outcomes
|
||||||
|
elif method == 'pubeval':
|
||||||
|
outcomes = []
|
||||||
|
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
|
||||||
else:
|
else:
|
||||||
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
|
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
|
||||||
return [0]
|
return [0]
|
||||||
|
|
145
pubeval/pubeval.c
Normal file
145
pubeval/pubeval.c
Normal file
|
@ -0,0 +1,145 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
static float x[122];
|
||||||
|
|
||||||
|
static const float wc[122] = {
|
||||||
|
0.25696, -0.66937, -1.66135, -2.02487, -2.53398, -0.16092, -1.11725, -1.06654,
|
||||||
|
-0.92830, -1.99558, -1.10388, -0.80802, 0.09856, -0.62086, -1.27999, -0.59220,
|
||||||
|
-0.73667, 0.89032, -0.38933, -1.59847, -1.50197, -0.60966, 1.56166, -0.47389,
|
||||||
|
-1.80390, -0.83425, -0.97741, -1.41371, 0.24500, 0.10970, -1.36476, -1.05572,
|
||||||
|
1.15420, 0.11069, -0.38319, -0.74816, -0.59244, 0.81116, -0.39511, 0.11424,
|
||||||
|
-0.73169, -0.56074, 1.09792, 0.15977, 0.13786, -1.18435, -0.43363, 1.06169,
|
||||||
|
-0.21329, 0.04798, -0.94373, -0.22982, 1.22737, -0.13099, -0.06295, -0.75882,
|
||||||
|
-0.13658, 1.78389, 0.30416, 0.36797, -0.69851, 0.13003, 1.23070, 0.40868,
|
||||||
|
-0.21081, -0.64073, 0.31061, 1.59554, 0.65718, 0.25429, -0.80789, 0.08240,
|
||||||
|
1.78964, 0.54304, 0.41174, -1.06161, 0.07851, 2.01451, 0.49786, 0.91936,
|
||||||
|
-0.90750, 0.05941, 1.83120, 0.58722, 1.28777, -0.83711, -0.33248, 2.64983,
|
||||||
|
0.52698, 0.82132, -0.58897, -1.18223, 3.35809, 0.62017, 0.57353, -0.07276,
|
||||||
|
-0.36214, 4.37655, 0.45481, 0.21746, 0.10504, -0.61977, 3.54001, 0.04612,
|
||||||
|
-0.18108, 0.63211, -0.87046, 2.47673, -0.48016, -1.27157, 0.86505, -1.11342,
|
||||||
|
1.24612, -0.82385, -2.77082, 1.23606, -1.59529, 0.10438, -1.30206, -4.11520,
|
||||||
|
5.62596, -2.75800
|
||||||
|
};
|
||||||
|
|
||||||
|
static const float wr[122] = {
|
||||||
|
0.00000, -0.17160, 0.27010, 0.29906, -0.08471, 0.00000, -1.40375, -1.05121,
|
||||||
|
0.07217, -0.01351, 0.00000, -1.29506, -2.16183, 0.13246, -1.03508, 0.00000,
|
||||||
|
-2.29847, -2.34631, 0.17253, 0.08302, 0.00000, -1.27266, -2.87401, -0.07456,
|
||||||
|
-0.34240, 0.00000, -1.34640, -2.46556, -0.13022, -0.01591, 0.00000, 0.27448,
|
||||||
|
0.60015, 0.48302, 0.25236, 0.00000, 0.39521, 0.68178, 0.05281, 0.09266,
|
||||||
|
0.00000, 0.24855, -0.06844, -0.37646, 0.05685, 0.00000, 0.17405, 0.00430,
|
||||||
|
0.74427, 0.00576, 0.00000, 0.12392, 0.31202, -0.91035, -0.16270, 0.00000,
|
||||||
|
0.01418, -0.10839, -0.02781, -0.88035, 0.00000, 1.07274, 2.00366, 1.16242,
|
||||||
|
0.22520, 0.00000, 0.85631, 1.06349, 1.49549, 0.18966, 0.00000, 0.37183,
|
||||||
|
-0.50352, -0.14818, 0.12039, 0.00000, 0.13681, 0.13978, 1.11245, -0.12707,
|
||||||
|
0.00000, -0.22082, 0.20178, -0.06285, -0.52728, 0.00000, -0.13597, -0.19412,
|
||||||
|
-0.09308, -1.26062, 0.00000, 3.05454, 5.16874, 1.50680, 5.35000, 0.00000,
|
||||||
|
2.19605, 3.85390, 0.88296, 2.30052, 0.00000, 0.92321, 1.08744, -0.11696,
|
||||||
|
-0.78560, 0.00000, -0.09795, -0.83050, -1.09167, -4.94251, 0.00000, -1.00316,
|
||||||
|
-3.66465, -2.56906, -9.67677, 0.00000, -2.77982, -7.26713, -3.40177,-12.32252,
|
||||||
|
0.00000, 3.42040
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
float pubeval(race,pos)
|
||||||
|
int race,pos[];
|
||||||
|
{
|
||||||
|
/* Backgammon move-selection evaluation function
|
||||||
|
for benchmark comparisons. Computes a linear
|
||||||
|
evaluation function: Score = W * X, where X is
|
||||||
|
an input vector encoding the board state (using
|
||||||
|
a raw encoding of the number of men at each location),
|
||||||
|
and W is a weight vector. Separate weight vectors
|
||||||
|
are used for racing positions and contact positions.
|
||||||
|
Makes lots of obvious mistakes, but provides a
|
||||||
|
decent level of play for benchmarking purposes. */
|
||||||
|
|
||||||
|
/* Provided as a public service to the backgammon
|
||||||
|
programming community by Gerry Tesauro, IBM Research.
|
||||||
|
(e-mail: tesauro@watson.ibm.com) */
|
||||||
|
|
||||||
|
/* The following inputs are needed for this routine:
|
||||||
|
|
||||||
|
race is an integer variable which should be set
|
||||||
|
based on the INITIAL position BEFORE the move.
|
||||||
|
Set race=1 if the position is a race (i.e. no contact)
|
||||||
|
and 0 if the position is a contact position.
|
||||||
|
|
||||||
|
pos[] is an integer array of dimension 28 which
|
||||||
|
should represent a legal final board state after
|
||||||
|
the move. Elements 1-24 correspond to board locations
|
||||||
|
1-24 from computer's point of view, i.e. computer's
|
||||||
|
men move in the negative direction from 24 to 1, and
|
||||||
|
opponent's men move in the positive direction from
|
||||||
|
1 to 24. Computer's men are represented by positive
|
||||||
|
integers, and opponent's men are represented by negative
|
||||||
|
integers. Element 25 represents computer's men on the
|
||||||
|
bar (positive integer), and element 0 represents opponent's
|
||||||
|
men on the bar (negative integer). Element 26 represents
|
||||||
|
computer's men off the board (positive integer), and
|
||||||
|
element 27 represents opponent's men off the board
|
||||||
|
(negative integer). */
|
||||||
|
|
||||||
|
/* Also, be sure to call rdwts() at the start of your
|
||||||
|
program to read in the weight values. Happy hacking] */
|
||||||
|
|
||||||
|
int i;
|
||||||
|
float score;
|
||||||
|
|
||||||
|
if(pos[26]==15) return(99999999.);
|
||||||
|
/* all men off, best possible move */
|
||||||
|
|
||||||
|
setx(pos); /* sets input array x[] */
|
||||||
|
score = 0.0;
|
||||||
|
if(race) { /* use race weights */
|
||||||
|
for(i=0;i<122;++i) score += wr[i]*x[i];
|
||||||
|
}
|
||||||
|
else { /* use contact weights */
|
||||||
|
for(i=0;i<122;++i) score += wc[i]*x[i];
|
||||||
|
}
|
||||||
|
return(score);
|
||||||
|
}
|
||||||
|
|
||||||
|
setx(pos)
|
||||||
|
int pos[];
|
||||||
|
{
|
||||||
|
/* sets input vector x[] given board position pos[] */
|
||||||
|
extern float x[];
|
||||||
|
int j, jm1, n;
|
||||||
|
/* initialize */
|
||||||
|
for(j=0;j<122;++j) x[j] = 0.0;
|
||||||
|
|
||||||
|
/* first encode board locations 24-1 */
|
||||||
|
for(j=1;j<=24;++j) {
|
||||||
|
jm1 = j - 1;
|
||||||
|
n = pos[25-j];
|
||||||
|
if(n!=0) {
|
||||||
|
if(n==-1) x[5*jm1+0] = 1.0;
|
||||||
|
if(n==1) x[5*jm1+1] = 1.0;
|
||||||
|
if(n>=2) x[5*jm1+2] = 1.0;
|
||||||
|
if(n==3) x[5*jm1+3] = 1.0;
|
||||||
|
if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* encode opponent barmen */
|
||||||
|
x[120] = -(float)(pos[0])/2.0;
|
||||||
|
/* encode computer's menoff */
|
||||||
|
x[121] = (float)(pos[26])/15.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char**argv) {
|
||||||
|
int test[28]; //= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||||
|
int i = 0;
|
||||||
|
char *ptr;
|
||||||
|
|
||||||
|
|
||||||
|
for (i=1; i<argc; i++) {
|
||||||
|
// printf("%s\n", argv[i]);
|
||||||
|
test[i-1] = (int) strtol(argv[i], &ptr, 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%f", pubeval(0, test));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
BIN
pubeval_bin
Executable file
BIN
pubeval_bin
Executable file
Binary file not shown.
Loading…
Reference in New Issue
Block a user