Some flags from main.py is gone, rolls now allow a face_value of 0 yet
again and it is possible to play against the ai. There is no flag for this yet, so this has to be added.
This commit is contained in:
parent
ba4ef86bb5
commit
926a331df0
39
board.py
39
board.py
|
@ -170,12 +170,27 @@ class Board:
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def apply_moves_to_board(board, player, moves):
|
def apply_moves_to_board(board, player, move):
|
||||||
for move in moves:
|
from_idx = move[0]
|
||||||
from_idx, to_idx = move.split("/")
|
to_idx = move[1]
|
||||||
board[int(from_idx)] -= int(player)
|
board = list(board)
|
||||||
board[int(to_idx)] += int(player)
|
board[from_idx] -= player
|
||||||
return board
|
|
||||||
|
if (to_idx < 1 or to_idx > 24):
|
||||||
|
return
|
||||||
|
|
||||||
|
if (board[to_idx] * player == -1):
|
||||||
|
|
||||||
|
if (player == 1):
|
||||||
|
board[25] -= player
|
||||||
|
else:
|
||||||
|
board[0] -= player
|
||||||
|
|
||||||
|
board[to_idx] = 0
|
||||||
|
|
||||||
|
board[to_idx] += player
|
||||||
|
|
||||||
|
return tuple(board)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def calculate_legal_states(board, player, roll):
|
def calculate_legal_states(board, player, roll):
|
||||||
|
@ -186,6 +201,8 @@ class Board:
|
||||||
# turn and then do something with the second die
|
# turn and then do something with the second die
|
||||||
|
|
||||||
def calc_moves(board, face_value):
|
def calc_moves(board, face_value):
|
||||||
|
if face_value == 0:
|
||||||
|
return [board]
|
||||||
return quack.calc_moves(board, player, face_value)
|
return quack.calc_moves(board, player, face_value)
|
||||||
|
|
||||||
# Problem with cal_moves: Method can return empty list (should always contain at least same board).
|
# Problem with cal_moves: Method can return empty list (should always contain at least same board).
|
||||||
|
@ -200,15 +217,21 @@ class Board:
|
||||||
if not Board.any_move_valid(board, player, roll):
|
if not Board.any_move_valid(board, player, roll):
|
||||||
return { board }
|
return { board }
|
||||||
dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4]
|
dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4]
|
||||||
|
#print("Permuts:",dice_permutations)
|
||||||
# print("Dice permuts:",dice_permutations)
|
# print("Dice permuts:",dice_permutations)
|
||||||
for roll in dice_permutations:
|
for roll in dice_permutations:
|
||||||
# Calculate boards resulting from first move
|
# Calculate boards resulting from first move
|
||||||
#print("initial board: ", board)
|
#print("initial board: ", board)
|
||||||
#print("roll:", roll)
|
#print("roll:", roll)
|
||||||
|
#print("Rest of roll:",roll[1:])
|
||||||
boards = calc_moves(board, roll[0])
|
boards = calc_moves(board, roll[0])
|
||||||
|
#print("Boards:",boards)
|
||||||
|
#print("Roll:",roll[0])
|
||||||
#print("boards after first die: ", boards)
|
#print("boards after first die: ", boards)
|
||||||
|
|
||||||
for die in roll[1:]:
|
for die in roll[1:]:
|
||||||
|
# if die != 0:
|
||||||
|
if True:
|
||||||
# Calculate boards resulting from second move
|
# Calculate boards resulting from second move
|
||||||
nested_boards = [calc_moves(board, die) for board in boards]
|
nested_boards = [calc_moves(board, die) for board in boards]
|
||||||
#print("nested boards: ", nested_boards)
|
#print("nested boards: ", nested_boards)
|
||||||
|
@ -245,9 +268,9 @@ class Board:
|
||||||
return """
|
return """
|
||||||
13 14 15 16 17 18 19 20 21 22 23 24
|
13 14 15 16 17 18 19 20 21 22 23 24
|
||||||
+--------------------------------------------------------------------------+
|
+--------------------------------------------------------------------------+
|
||||||
| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO|
|
| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end 1: TODO|
|
||||||
|---|---|---|---|---|---|------------|---|---|---|---|---|---| |
|
|---|---|---|---|---|---|------------|---|---|---|---|---|---| |
|
||||||
| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end 1: TODO|
|
| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
|
||||||
+--------------------------------------------------------------------------+
|
+--------------------------------------------------------------------------+
|
||||||
12 11 10 9 8 7 6 5 4 3 2 1
|
12 11 10 9 8 7 6 5 4 3 2 1
|
||||||
""".format(*temp)
|
""".format(*temp)
|
||||||
|
|
84
bot.py
84
bot.py
|
@ -1,24 +1,8 @@
|
||||||
from cup import Cup
|
|
||||||
from network import Network
|
|
||||||
from board import Board
|
from board import Board
|
||||||
|
|
||||||
import tensorflow as tf
|
|
||||||
import numpy as np
|
|
||||||
import random
|
|
||||||
|
|
||||||
class Bot:
|
class Bot:
|
||||||
def __init__(self, sym, config = None, name = "unnamed"):
|
def __init__(self, sym):
|
||||||
self.config = config
|
|
||||||
self.cup = Cup()
|
|
||||||
self.sym = sym
|
self.sym = sym
|
||||||
self.graph = tf.Graph()
|
|
||||||
|
|
||||||
self.network = Network(config, name)
|
|
||||||
self.network.restore_model()
|
|
||||||
|
|
||||||
def restore_model(self):
|
|
||||||
with self.graph.as_default():
|
|
||||||
self.network.restore_model()
|
|
||||||
|
|
||||||
def get_session(self):
|
def get_session(self):
|
||||||
return self.session
|
return self.session
|
||||||
|
@ -26,16 +10,60 @@ class Bot:
|
||||||
def get_sym(self):
|
def get_sym(self):
|
||||||
return self.sym
|
return self.sym
|
||||||
|
|
||||||
def get_network(self):
|
|
||||||
return self.network
|
|
||||||
|
|
||||||
# TODO: DEPRECATE
|
def calc_move_sets(self, from_board, roll, player):
|
||||||
def make_move(self, board, sym, roll):
|
board = from_board
|
||||||
# print(Board.pretty(board))
|
sets = []
|
||||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
total = 0
|
||||||
moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
|
print("board!:",board)
|
||||||
scores = [ x[1] for x in moves_and_scores ]
|
for r in roll:
|
||||||
best_move_pair = moves_and_scores[np.array(scores).argmax()]
|
# print("Value of r:",r)
|
||||||
#print("Found the best state, being:", np.array(move_scores).argmax())
|
sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
|
||||||
return best_move_pair
|
total += r
|
||||||
|
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
|
||||||
|
return sets
|
||||||
|
|
||||||
|
|
||||||
|
def handle_move(self, from_board, to_board, roll, player):
|
||||||
|
|
||||||
|
# print("Cur board:",board)
|
||||||
|
sets = self.calc_move_sets(from_board, roll, player)
|
||||||
|
for idx, board_set in enumerate(sets):
|
||||||
|
board_set[0] = list(board_set[0])
|
||||||
|
# print("My board_set:",board_set)
|
||||||
|
if to_board in [list(c) for c in board_set[0]]:
|
||||||
|
self.total_moves -= board_set[1]
|
||||||
|
if idx < 2:
|
||||||
|
# print("Roll object:",self.roll)
|
||||||
|
self.roll[idx] = 0
|
||||||
|
else:
|
||||||
|
self.roll = [0,0]
|
||||||
|
break
|
||||||
|
print("Total moves left:",self.total_moves)
|
||||||
|
|
||||||
|
|
||||||
|
def tmp_name(self, from_board, to_board, roll, player, total_moves):
|
||||||
|
sets = self.calc_move_sets(from_board, roll, player)
|
||||||
|
return_board = from_board
|
||||||
|
for idx, board_set in enumerate(sets):
|
||||||
|
board_set = list(board_set[0])
|
||||||
|
if to_board in [list(board) for board in board_set]:
|
||||||
|
total_moves -= board_set[1]
|
||||||
|
# if it's not the sum of the moves
|
||||||
|
if idx < 2:
|
||||||
|
roll[idx] = 0
|
||||||
|
else:
|
||||||
|
roll = [0,0]
|
||||||
|
return_board = to_board
|
||||||
|
break
|
||||||
|
return total_moves, roll, return_board
|
||||||
|
|
||||||
|
def make_human_move(self, board, player, roll):
|
||||||
|
total_moves = roll[0] + roll[1]
|
||||||
|
previous_board = board
|
||||||
|
while total_moves != 0:
|
||||||
|
move = input("Pick a move!\n")
|
||||||
|
to_board = Board.apply_moves_to_board(previous_board, player, move)
|
||||||
|
total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves)
|
||||||
|
|
||||||
|
|
||||||
|
|
18
main.py
18
main.py
|
@ -31,12 +31,8 @@ parser.add_argument('--train-perpetually', action='store_true',
|
||||||
help='start new training session as soon as the previous is finished')
|
help='start new training session as soon as the previous is finished')
|
||||||
parser.add_argument('--list-models', action='store_true',
|
parser.add_argument('--list-models', action='store_true',
|
||||||
help='list all known models')
|
help='list all known models')
|
||||||
parser.add_argument('--force-creation', action='store_true',
|
|
||||||
help='force model creation if model does not exist')
|
|
||||||
parser.add_argument('--board-rep', action='store', dest='board_rep',
|
parser.add_argument('--board-rep', action='store', dest='board_rep',
|
||||||
help='name of board representation to use as input to neural network')
|
help='name of board representation to use as input to neural network')
|
||||||
parser.add_argument('--use-baseline', action='store_true',
|
|
||||||
help='use the baseline model, note, has size 28')
|
|
||||||
parser.add_argument('--verbose', action='store_true',
|
parser.add_argument('--verbose', action='store_true',
|
||||||
help='If set, a lot of stuff will be printed')
|
help='If set, a lot of stuff will be printed')
|
||||||
parser.add_argument('--ply', action='store', dest='ply', default='0',
|
parser.add_argument('--ply', action='store', dest='ply', default='0',
|
||||||
|
@ -46,9 +42,6 @@ parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.model == "baseline_model":
|
|
||||||
print("Model name 'baseline_model' not allowed")
|
|
||||||
exit()
|
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
'model': args.model,
|
'model': args.model,
|
||||||
|
@ -64,8 +57,6 @@ config = {
|
||||||
'model_storage_path': 'models',
|
'model_storage_path': 'models',
|
||||||
'bench_storage_path': 'bench',
|
'bench_storage_path': 'bench',
|
||||||
'board_representation': args.board_rep,
|
'board_representation': args.board_rep,
|
||||||
'force_creation': args.force_creation,
|
|
||||||
'use_baseline': args.use_baseline,
|
|
||||||
'global_step': 0,
|
'global_step': 0,
|
||||||
'verbose': args.verbose,
|
'verbose': args.verbose,
|
||||||
'ply': args.ply,
|
'ply': args.ply,
|
||||||
|
@ -87,6 +78,14 @@ if not os.path.isdir(log_path):
|
||||||
os.mkdir(log_path)
|
os.mkdir(log_path)
|
||||||
|
|
||||||
|
|
||||||
|
def save_config():
|
||||||
|
import yaml
|
||||||
|
# checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
|
||||||
|
# config_path = os.path.join(checkpoint_path, 'config')
|
||||||
|
# with open(config_path, 'a+') as f:
|
||||||
|
# print("lol")
|
||||||
|
print(yaml.dump(config))
|
||||||
|
|
||||||
# Define helper functions
|
# Define helper functions
|
||||||
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
|
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
|
||||||
format_vars = { 'trained_eps': trained_eps,
|
format_vars = { 'trained_eps': trained_eps,
|
||||||
|
@ -173,6 +172,7 @@ if __name__ == "__main__":
|
||||||
# Set up network
|
# Set up network
|
||||||
from network import Network
|
from network import Network
|
||||||
|
|
||||||
|
save_config()
|
||||||
# Set up variables
|
# Set up variables
|
||||||
episode_count = config['episode_count']
|
episode_count = config['episode_count']
|
||||||
|
|
||||||
|
|
25
network.py
25
network.py
|
@ -9,6 +9,7 @@ from eval import Eval
|
||||||
import glob
|
import glob
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
import tensorflow.contrib.eager as tfe
|
import tensorflow.contrib.eager as tfe
|
||||||
|
from player import Player
|
||||||
|
|
||||||
class Network:
|
class Network:
|
||||||
# board_features_quack has size 28
|
# board_features_quack has size 28
|
||||||
|
@ -562,6 +563,28 @@ class Network:
|
||||||
return outcomes
|
return outcomes
|
||||||
|
|
||||||
|
|
||||||
|
def play_against_network(self):
|
||||||
|
self.restore_model()
|
||||||
|
human_player = Player(-1)
|
||||||
|
cur_player = 1
|
||||||
|
player = 1
|
||||||
|
board = Board.initial_state
|
||||||
|
i = 0
|
||||||
|
while Board.outcome(board) is None:
|
||||||
|
print(Board.pretty(board))
|
||||||
|
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||||
|
print("Bot rolled:", roll)
|
||||||
|
|
||||||
|
board, _ = self.make_move(board, roll, player)
|
||||||
|
print(Board.pretty(board))
|
||||||
|
roll = (random.randrange(1, 7), random.randrange(1, 7))
|
||||||
|
print("You rolled:", roll)
|
||||||
|
board = human_player.make_human_move(board, roll)
|
||||||
|
print("DONE "*10)
|
||||||
|
print(Board.pretty(board))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
|
def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -570,7 +593,7 @@ class Network:
|
||||||
:param trained_eps:
|
:param trained_eps:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
with tf.Session() as sess:
|
|
||||||
difference_in_vals = 0
|
difference_in_vals = 0
|
||||||
|
|
||||||
self.restore_model()
|
self.restore_model()
|
||||||
|
|
|
@ -9,8 +9,8 @@ from board import Board
|
||||||
import main
|
import main
|
||||||
|
|
||||||
config = main.config.copy()
|
config = main.config.copy()
|
||||||
config['model'] = "eager_testings"
|
config['model'] = "player_testings"
|
||||||
config['force_creation'] = True
|
config['ply'] = "1"
|
||||||
config['board_representation'] = 'quack-fat'
|
config['board_representation'] = 'quack-fat'
|
||||||
network = Network(config, config['model'])
|
network = Network(config, config['model'])
|
||||||
|
|
||||||
|
@ -40,19 +40,21 @@ boards = {initial_state,
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
board = network.board_trans_func(Board.initial_state, 1)
|
# board = network.board_trans_func(Board.initial_state, 1)
|
||||||
|
|
||||||
|
|
||||||
pair = network.make_move(Board.initial_state, [3,2], 1)
|
# pair = network.make_move(Board.initial_state, [3,2], 1)
|
||||||
|
|
||||||
print(pair[1])
|
# print(pair[1])
|
||||||
|
|
||||||
network.do_backprop(board, 0.9)
|
# network.do_backprop(board, 0.9)
|
||||||
|
|
||||||
|
|
||||||
network.print_variables()
|
# network.print_variables()
|
||||||
|
|
||||||
|
|
||||||
network.save_model(2)
|
# network.save_model(2)
|
||||||
|
|
||||||
print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
|
# print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
|
||||||
|
|
||||||
|
network.play_against_network()
|
60
player.py
60
player.py
|
@ -11,19 +11,55 @@ class Player:
|
||||||
def get_sym(self):
|
def get_sym(self):
|
||||||
return self.sym
|
return self.sym
|
||||||
|
|
||||||
def make_move(self, board, sym, roll):
|
def calc_move_sets(self, from_board, roll, player):
|
||||||
print(Board.pretty(board))
|
board = from_board
|
||||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
sets = []
|
||||||
if roll[0] == roll[1]:
|
total = 0
|
||||||
print("Example of move: 4/6,6/8,12/14,13/15")
|
for r in roll:
|
||||||
|
# print("Value of r:",r)
|
||||||
|
sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
|
||||||
|
total += r
|
||||||
|
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
|
||||||
|
return sets
|
||||||
|
|
||||||
|
|
||||||
|
def tmp_name(self, from_board, to_board, roll, player, total_moves):
|
||||||
|
sets = self.calc_move_sets(from_board, roll, player)
|
||||||
|
return_board = from_board
|
||||||
|
for idx, board_set in enumerate(sets):
|
||||||
|
|
||||||
|
board_set[0] = list(board_set[0])
|
||||||
|
print(to_board)
|
||||||
|
print(board_set)
|
||||||
|
if to_board in board_set[0]:
|
||||||
|
total_moves -= board_set[1]
|
||||||
|
# if it's not the sum of the moves
|
||||||
|
if idx < 2:
|
||||||
|
roll[idx] = 0
|
||||||
else:
|
else:
|
||||||
print("Example of move: 4/6,13/17")
|
roll = [0,0]
|
||||||
|
return_board = to_board
|
||||||
|
break
|
||||||
|
return total_moves, roll, return_board
|
||||||
|
|
||||||
user_moves = input("Enter your move: ").strip().split(",")
|
def make_human_move(self, board, roll):
|
||||||
board = Board.apply_moves_to_board(board, sym, user_moves)
|
total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
|
||||||
while board not in legal_moves:
|
move = ""
|
||||||
print("Move is invalid, please enter a new move")
|
while total_moves != 0:
|
||||||
user_moves = input("Enter your move: ").strip().split(",")
|
while True:
|
||||||
board = Board.apply_moves_to_board(board, sym, user_moves)
|
print("You have {roll} left!".format(roll=total_moves))
|
||||||
|
move = input("Pick a move!\n")
|
||||||
|
pot_move = move.split("/")
|
||||||
|
if len(pot_move) == 2:
|
||||||
|
try:
|
||||||
|
pot_move[0] = int(pot_move[0])
|
||||||
|
pot_move[1] = int(pot_move[1])
|
||||||
|
move = pot_move
|
||||||
|
break;
|
||||||
|
except TypeError:
|
||||||
|
print("The correct syntax is: 2/5 for a move from index 2 to 5.")
|
||||||
|
|
||||||
|
to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
|
||||||
|
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
|
||||||
|
print(Board.pretty(board))
|
||||||
return board
|
return board
|
Loading…
Reference in New Issue
Block a user