From 5e937d68b472847aa961833caa61608da85e34e8 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Sun, 11 Mar 2018 20:00:24 +0100 Subject: [PATCH] Added a bunch of pubeval stuff --- board.py | 20 +++++++ bot.py | 20 +++++++ game.py | 44 ++++++++++---- pubeval/pubeval.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++ pubeval_bin | Bin 0 -> 8748 bytes 5 files changed, 218 insertions(+), 11 deletions(-) create mode 100644 pubeval/pubeval.c create mode 100755 pubeval_bin diff --git a/board.py b/board.py index c42b777..5150d35 100644 --- a/board.py +++ b/board.py @@ -18,6 +18,26 @@ class Board: idxs.append(idx) return idxs + + # TODO: Write a test for this + # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player + # index 26 is player 1 home, index 27 is player -1 home + @staticmethod + def board_features_to_pubeval(board, player): + + if player == -1: + board = Board.flip(board) + + board = list(board) + positives = [x if x > 0 else 0 for x in board] + negatives = [x if x < 0 else 0 for x in board] + board.append(15 - sum(positives)) + board.append(-15 - sum(negatives)) + return board + + + + @staticmethod def is_move_valid(board, player, face_value, move): def sign(a): diff --git a/bot.py b/bot.py index b79e2b8..0f1b32a 100644 --- a/bot.py +++ b/bot.py @@ -3,7 +3,9 @@ import tensorflow as tf from network import Network import numpy as np from board import Board +import subprocess import random +import sys class Bot: @@ -44,6 +46,24 @@ class Bot: def make_random_move(self, board, sym, roll): legal_moves = Board.calculate_legal_states(board, sym, roll) return random.choice(list(legal_moves)) + + + # TODO: Test this, the score results are deterministic + def make_pubeval_move(self, board, sym, roll): + legal_moves = Board.calculate_legal_states(board, sym, roll) + moves_and_scores = [] + for board in legal_moves: + call_argument = ["./pubeval_bin"] + for x in Board.board_features_to_pubeval(board, sym): + call_argument.append(str(x)) + data = subprocess.check_output(call_argument) + moves_and_scores.append([board,str(data).split("'")[1]]) + scores = [ x[1] for x in moves_and_scores ] + best_move_pair = moves_and_scores[np.array(scores).argmax()] + return best_move_pair + + + def make_move(self, board, sym, roll): # print(Board.pretty(board)) diff --git a/game.py b/game.py index 1b6d94e..acc3640 100644 --- a/game.py +++ b/game.py @@ -3,6 +3,7 @@ from player import Player from bot import Bot from restore_bot import RestoreBot from cup import Cup + import numpy as np import sys @@ -45,20 +46,38 @@ class Game: if coin_flip > 0.5: user_color = input("Pick a number, 1 (white) or -1 (black)") if int(user_color) == 1: - p1 = player(1) - p2 = bot(-1) + p1 = Player(1) + p2 = Bot(-1) else: - p1 = bot(1) - p2 = player(-1) + p1 = Bot(1) + p2 = Player(-1) else: - p1 = bot(1) - p2 = player(-1) + p1 = Bot(1) + p2 = Player(-1) + + # Since we have to make sure that the Bot always plays as if it's white, we have to flip + # the board when it's not actually. + if p1.__name__ == "Bot" and p1.get_sym() == 1: + while Board.outcome(self.board) == None: + roll = self.roll() + self.board = p1.make_move(self.board, p1.get_sym(), roll) + roll = self.roll() + self.board = p2.make_move(self.board, p2.get_sym(), roll) + if p1.__name__ == "Bot" and p1.get_sym() == -1: + while Board.outcome(self.board) == None: + roll = self.roll() + self.board = Board.flip(p1.make_move(Board.flip(self.board), p1.get_sym(), roll)) + roll = self.roll() + self.board = p2.make_move(self.board, p2.get_sym(), roll) + if p2.__name__ == "Bot" and p1.get_sym() == -1: + while Board.outcome(self.board) == None: + roll = self.roll() + self.board = p1.make_move(self.board, p1.get_sym(), roll) + roll = self.roll() + self.board = Board.flip(p2.make_move(Board.flip(self.board), p2.get_sym(), roll)) - while Board.outcome(self.board) == None: - roll = self.roll() - self.board = p1.make_move(self.board, p1.get_sym(), roll) - roll = self.roll() - self.board = p2.make_move(self.board, p2.get_sym(), roll) + + print(Board.outcome(self.board)) def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0): @@ -127,6 +146,9 @@ class Game: outcomes.append(Board.outcome(self.board)[1]) sys.stderr.write("\n") return outcomes + elif method == 'pubeval': + outcomes = [] + # Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval else: sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method)) return [0] diff --git a/pubeval/pubeval.c b/pubeval/pubeval.c new file mode 100644 index 0000000..31d254a --- /dev/null +++ b/pubeval/pubeval.c @@ -0,0 +1,145 @@ +#include +#include + +static float x[122]; + +static const float wc[122] = { + 0.25696, -0.66937, -1.66135, -2.02487, -2.53398, -0.16092, -1.11725, -1.06654, +-0.92830, -1.99558, -1.10388, -0.80802, 0.09856, -0.62086, -1.27999, -0.59220, +-0.73667, 0.89032, -0.38933, -1.59847, -1.50197, -0.60966, 1.56166, -0.47389, +-1.80390, -0.83425, -0.97741, -1.41371, 0.24500, 0.10970, -1.36476, -1.05572, + 1.15420, 0.11069, -0.38319, -0.74816, -0.59244, 0.81116, -0.39511, 0.11424, +-0.73169, -0.56074, 1.09792, 0.15977, 0.13786, -1.18435, -0.43363, 1.06169, +-0.21329, 0.04798, -0.94373, -0.22982, 1.22737, -0.13099, -0.06295, -0.75882, +-0.13658, 1.78389, 0.30416, 0.36797, -0.69851, 0.13003, 1.23070, 0.40868, +-0.21081, -0.64073, 0.31061, 1.59554, 0.65718, 0.25429, -0.80789, 0.08240, + 1.78964, 0.54304, 0.41174, -1.06161, 0.07851, 2.01451, 0.49786, 0.91936, +-0.90750, 0.05941, 1.83120, 0.58722, 1.28777, -0.83711, -0.33248, 2.64983, + 0.52698, 0.82132, -0.58897, -1.18223, 3.35809, 0.62017, 0.57353, -0.07276, +-0.36214, 4.37655, 0.45481, 0.21746, 0.10504, -0.61977, 3.54001, 0.04612, +-0.18108, 0.63211, -0.87046, 2.47673, -0.48016, -1.27157, 0.86505, -1.11342, + 1.24612, -0.82385, -2.77082, 1.23606, -1.59529, 0.10438, -1.30206, -4.11520, + 5.62596, -2.75800 +}; + +static const float wr[122] = { + 0.00000, -0.17160, 0.27010, 0.29906, -0.08471, 0.00000, -1.40375, -1.05121, + 0.07217, -0.01351, 0.00000, -1.29506, -2.16183, 0.13246, -1.03508, 0.00000, +-2.29847, -2.34631, 0.17253, 0.08302, 0.00000, -1.27266, -2.87401, -0.07456, +-0.34240, 0.00000, -1.34640, -2.46556, -0.13022, -0.01591, 0.00000, 0.27448, + 0.60015, 0.48302, 0.25236, 0.00000, 0.39521, 0.68178, 0.05281, 0.09266, + 0.00000, 0.24855, -0.06844, -0.37646, 0.05685, 0.00000, 0.17405, 0.00430, + 0.74427, 0.00576, 0.00000, 0.12392, 0.31202, -0.91035, -0.16270, 0.00000, + 0.01418, -0.10839, -0.02781, -0.88035, 0.00000, 1.07274, 2.00366, 1.16242, + 0.22520, 0.00000, 0.85631, 1.06349, 1.49549, 0.18966, 0.00000, 0.37183, +-0.50352, -0.14818, 0.12039, 0.00000, 0.13681, 0.13978, 1.11245, -0.12707, + 0.00000, -0.22082, 0.20178, -0.06285, -0.52728, 0.00000, -0.13597, -0.19412, +-0.09308, -1.26062, 0.00000, 3.05454, 5.16874, 1.50680, 5.35000, 0.00000, + 2.19605, 3.85390, 0.88296, 2.30052, 0.00000, 0.92321, 1.08744, -0.11696, +-0.78560, 0.00000, -0.09795, -0.83050, -1.09167, -4.94251, 0.00000, -1.00316, +-3.66465, -2.56906, -9.67677, 0.00000, -2.77982, -7.26713, -3.40177,-12.32252, + 0.00000, 3.42040 +}; + + +float pubeval(race,pos) +int race,pos[]; +{ + /* Backgammon move-selection evaluation function + for benchmark comparisons. Computes a linear + evaluation function: Score = W * X, where X is + an input vector encoding the board state (using + a raw encoding of the number of men at each location), + and W is a weight vector. Separate weight vectors + are used for racing positions and contact positions. + Makes lots of obvious mistakes, but provides a + decent level of play for benchmarking purposes. */ + + /* Provided as a public service to the backgammon + programming community by Gerry Tesauro, IBM Research. + (e-mail: tesauro@watson.ibm.com) */ + + /* The following inputs are needed for this routine: + + race is an integer variable which should be set + based on the INITIAL position BEFORE the move. + Set race=1 if the position is a race (i.e. no contact) + and 0 if the position is a contact position. + + pos[] is an integer array of dimension 28 which + should represent a legal final board state after + the move. Elements 1-24 correspond to board locations + 1-24 from computer's point of view, i.e. computer's + men move in the negative direction from 24 to 1, and + opponent's men move in the positive direction from + 1 to 24. Computer's men are represented by positive + integers, and opponent's men are represented by negative + integers. Element 25 represents computer's men on the + bar (positive integer), and element 0 represents opponent's + men on the bar (negative integer). Element 26 represents + computer's men off the board (positive integer), and + element 27 represents opponent's men off the board + (negative integer). */ + + /* Also, be sure to call rdwts() at the start of your + program to read in the weight values. Happy hacking] */ + + int i; + float score; + + if(pos[26]==15) return(99999999.); + /* all men off, best possible move */ + + setx(pos); /* sets input array x[] */ + score = 0.0; + if(race) { /* use race weights */ + for(i=0;i<122;++i) score += wr[i]*x[i]; + } + else { /* use contact weights */ + for(i=0;i<122;++i) score += wc[i]*x[i]; + } + return(score); +} + +setx(pos) +int pos[]; +{ + /* sets input vector x[] given board position pos[] */ + extern float x[]; + int j, jm1, n; + /* initialize */ + for(j=0;j<122;++j) x[j] = 0.0; + + /* first encode board locations 24-1 */ + for(j=1;j<=24;++j) { + jm1 = j - 1; + n = pos[25-j]; + if(n!=0) { + if(n==-1) x[5*jm1+0] = 1.0; + if(n==1) x[5*jm1+1] = 1.0; + if(n>=2) x[5*jm1+2] = 1.0; + if(n==3) x[5*jm1+3] = 1.0; + if(n>=4) x[5*jm1+4] = (float)(n-3)/2.0; + } + } + /* encode opponent barmen */ + x[120] = -(float)(pos[0])/2.0; + /* encode computer's menoff */ + x[121] = (float)(pos[26])/15.0; +} + +int main(int argc, char**argv) { + int test[28]; //= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + int i = 0; + char *ptr; + + + for (i=1; iNTI!1g0qDDtpSQl9lfmPIC78iu= zE=n4smNY)%qrOAc7^CdmF=|C)nxLX-e0+^TU361XTlH&<^qiSlSkUzQ`d`1Vd%o|^ zz31F>e&^hC@10rh<*R@F@T4^%nqWd~L4*(n%Ek~vibw>s2ze0-4@D3rj`)pXx?%DJ zlr|?#bM~1W;`9Lrf)H;=if^tW+qZ8nBPtKE5eSF0Qo98qH!Ux>Nw+0l1r7$fkHCPC zkk=|r<*6k$e^wB3a;+&jjiQ!#mUaqWM^(ee`wZHscunyko?uS1WTj<;QcFCWiWj8< zAdh)(q^IID&dZ@j=a8*d}9(hvk=PHuL3<{VYt5^p;!0L-5XN09fY zX0bwtAXqaOrDvuJ>6xbaGT(fDcdK}~hLDhNiJ|l;Ph2M@5P%?z9uYsnuLsrx#^qZV zEo)pS7_)Eef{G9p4}o&m$*M0rlkyIX|)7 z*|hKD#*80p7(K?fXCqq71Mo-#)Jes-{IMe8y)pd_2m4`arm7XM=8kLO(ce z>{C)z3*l>;e1rcfi*s_*=0%N+f;iGs6zkqlp+6}?$zNZU$NU87|1SdxyyH4=_eMDK zAMp-rJ@2&E+n1RcrU*Y!_nGz~mtbeyXF3=j*Bu85@650FR6&E8DxuV4&woT5hFYd@ zJP1!3yznIUeD4yb6jvu-8uAkCQoc0o0Ko9Z1TR2p-{p!E2W^ z4uiK&Y-nhx`#H2PVSfTzB5cwB0u6%#$BL`R*m}&{k4MZrW-61%@5rxj7CP*441G2@ zgK;X+cF>kI#aII|f`tb3V6a`8RpHP)gzq@#IIq1<;~3}RoeA~*4PM?@@6_&t4B7K* z2~+sJY*E|56oo539>Yr))0_vJIAfeA#C{Y8Rt+i9mVt$)QSZE}H?dJq*)8>w@ny{Z8Fyrk}dU z%&NM_Oh2kvYkk&rU`OOfQ@Xx+($Ih)YXJo6Fsyl}p&p>xo0w(Q1$V*5W@0j7;M30n za5Vs*h9rc!alREz-t6xcIzzTVr_=BlrqL9|yi$mci9DYL-U?{)&X8rePMm~y7#^K0 zdK)GURUGa-Xz`9uvTzoQEUH*(L1e@!%VLXAjVT?AP^F*D2Ierngn4HX8uG?cwB((` zt32-@>gS^syAgJeW{gpC#248V0j_Hqv2HUM3~t~{tu{~LW3VmNS>dG(!;&gA9y#I8 z*S`P>?;HXf?GZopiKFN(uwjbub2V7v3y$N!W*(Ejx@8T$wt;d(ukG@4Cp-y>EA3tY zjS*8CdM$vu%#6ai8vtU)Pet8jZ1u+0S!lsGt5S@E+;gZ#NR(c#2=Uc2TleWe(e>tU z+0^jgvsrKNaij1Td-v0@l`H6#FW=LJPMcYYvf|@gsb$7<(z?y9SzCS~MOkFG7o@v= zJ4*+44PsMU?bM-c#f^_>)twmW*1mP_=O=tDqU=KAZd&<8cPTX~M65_yF5_(}aj}`3 zU*t^VAF}bU7qF=JFzGZK>4@UuCM4;KkGo~OTKh8A(9h+bNhQ&B$t=tI4U1;AH(jwy zOIR*Ntc*q56Q_RCbw0X*-5;7x&zJX>{l-o)4`2P`75ACy?gL88d@h5K+R#SSS}eWTY&!%@)~;8-%`7*+p!#>VfRn+a;9Ek7U?msVlk7 z;X$&G&EY9x#)k2%?Ngik?2oNvABSSwiD@U_5D#w@-3>D~$+GwJN9iY-w(8GamAF@5 z-KEEvJGnyjaoZN^cePF74#mD8%a%>GaKi^Xxz9@OxI1+pBija*SGl)+5Kq53;-u~R zu9wi(HOEFbI_64CGkQv4=er$6+3!4qrFfS|DnB?)YS-5w%W7^;*JB^js4_cSGowEZ z|KoX@w^kz^9uO%Fsp=_)2Cbqgg(bA()_Jtw{04e=Y$-K;JD#5V>nYvd`fz&i!e*LL z-i4ai45z_fBR4AXq`0&E3|*<0=-!Sm(Gy3+SDM3~^z%4>>lnmH7ADHh1fh zBk86~?ZoAYwcO>yAFySnO188*ny%>`!mcjvA)Is0Apc$#o@yUyG3I=4&Pj$8B9S@y!UV*2f=jk+U05dHF-PuO40$l$z_%=E_8 z9Nn?hTU=OtXKq`~7VctPB0W27r&ztFufDIR71w&?WX}EP&UE6Gwe;StME$~J;oOz8 z9l7Y>_uX^nekSe@b?Gl%_OKWB%wmVUa#6?a2&XGnrRb-4hU?nS8!A4i?!hg(FpJXV z;rfz`{}li7`A772&P>i#xRPGBZQyEdPNv-x21_w9>$!!|XK2)k8r{tu+vuT>?b4Oj z2lO57gQQ+2<#+YV%Cr;z8a@f~-?{ik!h(OJB4HbUCx^oC%=S=JYHauf&#KD+0~O2F zdW%{=OE(}3WFU}%Kn4OC2xK6Tfj|ZV83<$`kbyu30vQNoAdrFo?F@8?>^cP_?4TF$ zDm>YkAsEdW!W?UIb}E^a-M&nZmtyiIxsU?)yl|;)^T0`@Oqw#1-L-|KDLLIjKpp|| zEVyXSH6hmrIJkGtosWGoX#}{p(+cy73j)tf&^dI1UYNXCR3i0W)fw3ee>g+nWtu2SOgd6 Mi;P6b1Mh_V8z4+axc~qF literal 0 HcmV?d00001