Added a bunch of pubeval stuff

This commit is contained in:
Alexander Munch-Hansen 2018-03-11 20:00:24 +01:00
parent 81461d917e
commit 5e937d68b4
5 changed files with 218 additions and 11 deletions

View File

@ -18,6 +18,26 @@ class Board:
idxs.append(idx) idxs.append(idx)
return idxs return idxs
# TODO: Write a test for this
# TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
# index 26 is player 1 home, index 27 is player -1 home
@staticmethod
def board_features_to_pubeval(board, player):
if player == -1:
board = Board.flip(board)
board = list(board)
positives = [x if x > 0 else 0 for x in board]
negatives = [x if x < 0 else 0 for x in board]
board.append(15 - sum(positives))
board.append(-15 - sum(negatives))
return board
@staticmethod @staticmethod
def is_move_valid(board, player, face_value, move): def is_move_valid(board, player, face_value, move):
def sign(a): def sign(a):

20
bot.py
View File

@ -3,7 +3,9 @@ import tensorflow as tf
from network import Network from network import Network
import numpy as np import numpy as np
from board import Board from board import Board
import subprocess
import random import random
import sys
class Bot: class Bot:
@ -44,6 +46,24 @@ class Bot:
def make_random_move(self, board, sym, roll): def make_random_move(self, board, sym, roll):
legal_moves = Board.calculate_legal_states(board, sym, roll) legal_moves = Board.calculate_legal_states(board, sym, roll)
return random.choice(list(legal_moves)) return random.choice(list(legal_moves))
# TODO: Test this, the score results are deterministic
def make_pubeval_move(self, board, sym, roll):
legal_moves = Board.calculate_legal_states(board, sym, roll)
moves_and_scores = []
for board in legal_moves:
call_argument = ["./pubeval_bin"]
for x in Board.board_features_to_pubeval(board, sym):
call_argument.append(str(x))
data = subprocess.check_output(call_argument)
moves_and_scores.append([board,str(data).split("'")[1]])
scores = [ x[1] for x in moves_and_scores ]
best_move_pair = moves_and_scores[np.array(scores).argmax()]
return best_move_pair
def make_move(self, board, sym, roll): def make_move(self, board, sym, roll):
# print(Board.pretty(board)) # print(Board.pretty(board))

44
game.py
View File

@ -3,6 +3,7 @@ from player import Player
from bot import Bot from bot import Bot
from restore_bot import RestoreBot from restore_bot import RestoreBot
from cup import Cup from cup import Cup
import numpy as np import numpy as np
import sys import sys
@ -45,20 +46,38 @@ class Game:
if coin_flip > 0.5: if coin_flip > 0.5:
user_color = input("Pick a number, 1 (white) or -1 (black)") user_color = input("Pick a number, 1 (white) or -1 (black)")
if int(user_color) == 1: if int(user_color) == 1:
p1 = player(1) p1 = Player(1)
p2 = bot(-1) p2 = Bot(-1)
else: else:
p1 = bot(1) p1 = Bot(1)
p2 = player(-1) p2 = Player(-1)
else: else:
p1 = bot(1) p1 = Bot(1)
p2 = player(-1) p2 = Player(-1)
# Since we have to make sure that the Bot always plays as if it's white, we have to flip
# the board when it's not actually.
if p1.__name__ == "Bot" and p1.get_sym() == 1:
while Board.outcome(self.board) == None:
roll = self.roll()
self.board = p1.make_move(self.board, p1.get_sym(), roll)
roll = self.roll()
self.board = p2.make_move(self.board, p2.get_sym(), roll)
if p1.__name__ == "Bot" and p1.get_sym() == -1:
while Board.outcome(self.board) == None:
roll = self.roll()
self.board = Board.flip(p1.make_move(Board.flip(self.board), p1.get_sym(), roll))
roll = self.roll()
self.board = p2.make_move(self.board, p2.get_sym(), roll)
if p2.__name__ == "Bot" and p1.get_sym() == -1:
while Board.outcome(self.board) == None:
roll = self.roll()
self.board = p1.make_move(self.board, p1.get_sym(), roll)
roll = self.roll()
self.board = Board.flip(p2.make_move(Board.flip(self.board), p2.get_sym(), roll))
while Board.outcome(self.board) == None:
roll = self.roll() print(Board.outcome(self.board))
self.board = p1.make_move(self.board, p1.get_sym(), roll)
roll = self.roll()
self.board = p2.make_move(self.board, p2.get_sym(), roll)
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0): def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
@ -127,6 +146,9 @@ class Game:
outcomes.append(Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1])
sys.stderr.write("\n") sys.stderr.write("\n")
return outcomes return outcomes
elif method == 'pubeval':
outcomes = []
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
else: else:
sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
return [0] return [0]

145
pubeval/pubeval.c Normal file
View File

@ -0,0 +1,145 @@
#include <stdio.h>
#include <stdlib.h>
static float x[122];
static const float wc[122] = {
0.25696, -0.66937, -1.66135, -2.02487, -2.53398, -0.16092, -1.11725, -1.06654,
-0.92830, -1.99558, -1.10388, -0.80802, 0.09856, -0.62086, -1.27999, -0.59220,
-0.73667, 0.89032, -0.38933, -1.59847, -1.50197, -0.60966, 1.56166, -0.47389,
-1.80390, -0.83425, -0.97741, -1.41371, 0.24500, 0.10970, -1.36476, -1.05572,
1.15420, 0.11069, -0.38319, -0.74816, -0.59244, 0.81116, -0.39511, 0.11424,
-0.73169, -0.56074, 1.09792, 0.15977, 0.13786, -1.18435, -0.43363, 1.06169,
-0.21329, 0.04798, -0.94373, -0.22982, 1.22737, -0.13099, -0.06295, -0.75882,
-0.13658, 1.78389, 0.30416, 0.36797, -0.69851, 0.13003, 1.23070, 0.40868,
-0.21081, -0.64073, 0.31061, 1.59554, 0.65718, 0.25429, -0.80789, 0.08240,
1.78964, 0.54304, 0.41174, -1.06161, 0.07851, 2.01451, 0.49786, 0.91936,
-0.90750, 0.05941, 1.83120, 0.58722, 1.28777, -0.83711, -0.33248, 2.64983,
0.52698, 0.82132, -0.58897, -1.18223, 3.35809, 0.62017, 0.57353, -0.07276,
-0.36214, 4.37655, 0.45481, 0.21746, 0.10504, -0.61977, 3.54001, 0.04612,
-0.18108, 0.63211, -0.87046, 2.47673, -0.48016, -1.27157, 0.86505, -1.11342,
1.24612, -0.82385, -2.77082, 1.23606, -1.59529, 0.10438, -1.30206, -4.11520,
5.62596, -2.75800
};
static const float wr[122] = {
0.00000, -0.17160, 0.27010, 0.29906, -0.08471, 0.00000, -1.40375, -1.05121,
0.07217, -0.01351, 0.00000, -1.29506, -2.16183, 0.13246, -1.03508, 0.00000,
-2.29847, -2.34631, 0.17253, 0.08302, 0.00000, -1.27266, -2.87401, -0.07456,
-0.34240, 0.00000, -1.34640, -2.46556, -0.13022, -0.01591, 0.00000, 0.27448,
0.60015, 0.48302, 0.25236, 0.00000, 0.39521, 0.68178, 0.05281, 0.09266,
0.00000, 0.24855, -0.06844, -0.37646, 0.05685, 0.00000, 0.17405, 0.00430,
0.74427, 0.00576, 0.00000, 0.12392, 0.31202, -0.91035, -0.16270, 0.00000,
0.01418, -0.10839, -0.02781, -0.88035, 0.00000, 1.07274, 2.00366, 1.16242,
0.22520, 0.00000, 0.85631, 1.06349, 1.49549, 0.18966, 0.00000, 0.37183,
-0.50352, -0.14818, 0.12039, 0.00000, 0.13681, 0.13978, 1.11245, -0.12707,
0.00000, -0.22082, 0.20178, -0.06285, -0.52728, 0.00000, -0.13597, -0.19412,
-0.09308, -1.26062, 0.00000, 3.05454, 5.16874, 1.50680, 5.35000, 0.00000,
2.19605, 3.85390, 0.88296, 2.30052, 0.00000, 0.92321, 1.08744, -0.11696,
-0.78560, 0.00000, -0.09795, -0.83050, -1.09167, -4.94251, 0.00000, -1.00316,
-3.66465, -2.56906, -9.67677, 0.00000, -2.77982, -7.26713, -3.40177,-12.32252,
0.00000, 3.42040
};
float pubeval(race,pos)
int race,pos[];
{
/* Backgammon move-selection evaluation function
for benchmark comparisons. Computes a linear
evaluation function: Score = W * X, where X is
an input vector encoding the board state (using
a raw encoding of the number of men at each location),
and W is a weight vector. Separate weight vectors
are used for racing positions and contact positions.
Makes lots of obvious mistakes, but provides a
decent level of play for benchmarking purposes. */
/* Provided as a public service to the backgammon
programming community by Gerry Tesauro, IBM Research.
(e-mail: tesauro@watson.ibm.com) */
/* The following inputs are needed for this routine:
race is an integer variable which should be set
based on the INITIAL position BEFORE the move.
Set race=1 if the position is a race (i.e. no contact)
and 0 if the position is a contact position.
pos[] is an integer array of dimension 28 which
should represent a legal final board state after
the move. Elements 1-24 correspond to board locations
1-24 from computer's point of view, i.e. computer's
men move in the negative direction from 24 to 1, and
opponent's men move in the positive direction from
1 to 24. Computer's men are represented by positive
integers, and opponent's men are represented by negative
integers. Element 25 represents computer's men on the
bar (positive integer), and element 0 represents opponent's
men on the bar (negative integer). Element 26 represents
computer's men off the board (positive integer), and
element 27 represents opponent's men off the board
(negative integer). */
/* Also, be sure to call rdwts() at the start of your
program to read in the weight values. Happy hacking] */
int i;
float score;
if(pos[26]==15) return(99999999.);
/* all men off, best possible move */
setx(pos); /* sets input array x[] */
score = 0.0;
if(race) { /* use race weights */
for(i=0;i<122;++i) score += wr[i]*x[i];
}
else { /* use contact weights */
for(i=0;i<122;++i) score += wc[i]*x[i];
}
return(score);
}
setx(pos)
int pos[];
{
/* sets input vector x[] given board position pos[] */
extern float x[];
int j, jm1, n;
/* initialize */
for(j=0;j<122;++j) x[j] = 0.0;
/* first encode board locations 24-1 */
for(j=1;j<=24;++j) {
jm1 = j - 1;
n = pos[25-j];
if(n!=0) {
if(n==-1) x[5*jm1+0] = 1.0;
if(n==1) x[5*jm1+1] = 1.0;
if(n>=2) x[5*jm1+2] = 1.0;
if(n==3) x[5*jm1+3] = 1.0;
if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0;
}
}
/* encode opponent barmen */
x[120] = -(float)(pos[0])/2.0;
/* encode computer's menoff */
x[121] = (float)(pos[26])/15.0;
}
int main(int argc, char**argv) {
int test[28]; //= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
int i = 0;
char *ptr;
for (i=1; i<argc; i++) {
// printf("%s\n", argv[i]);
test[i-1] = (int) strtol(argv[i], &ptr, 10);
}
printf("%f", pubeval(0, test));
return 0;
}

BIN
pubeval_bin Executable file

Binary file not shown.