backgammon/bot.py

from cup import Cup
import tensorflow as tf
from network import Network
import numpy as np
from board import Board
import subprocess
import random
import sys

class Bot:

    def __init__(self, sym, config = None):
        self.config = config
        self.cup = Cup()
        self.sym = sym
        self.graph = tf.Graph()
        with self.graph.as_default():
            self.session = tf.Session()
            self.network = Network(self.session, config)
            self.network.restore_model()


    def roll(self):
        print("{} rolled: ".format(self.sym))
        roll = self.cup.roll()
#        print(roll)
        return roll


    def switch(self,cur):
        return -1 if cur == 1 else 1

    def restore_model(self):
        with self.graph.as_default():
            self.network.restore_model()

    def get_session(self):
        return self.session

    def get_sym(self):
        return self.sym

    def get_network(self):
        return self.network

    def make_random_move(self, board, sym, roll):
        legal_moves = Board.calculate_legal_states(board, sym, roll)
        return random.choice(list(legal_moves))


    # TODO: Test this, the score results should be deterministic
    def make_pubeval_move(self, board, sym, roll):
        legal_moves = Board.calculate_legal_states(tuple(board), sym, roll)
        moves_and_scores = []
        for board in legal_moves:
            call_argument = ["./pubeval/pubeval"]
            for x in Board.board_features_to_pubeval(board, sym):
                call_argument.append(str(x))
            data = subprocess.check_output(call_argument)
            moves_and_scores.append([board, float(bytes.decode(data))])
        scores = [ x[1] for x in moves_and_scores ]
        best_move_pair = moves_and_scores[np.array(scores).argmax()]
        return best_move_pair


    def make_move(self, board, sym, roll):
        # print(Board.pretty(board))
        legal_moves = Board.calculate_legal_states(board, sym, roll)
        moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
        scores = [ x[1] for x in moves_and_scores ]
        best_move_pair = moves_and_scores[np.array(scores).argmax()]
        #print("Found the best state, being:", np.array(move_scores).argmax())
        return best_move_pair