Some flags from main.py is gone, rolls now allow a face_value of 0 yet

again and it is possible to play against the ai. There is no flag
for this yet, so this has to be added.
This commit is contained in:
Alexander Munch-Hansen 2018-05-13 23:54:13 +02:00
parent ba4ef86bb5
commit 926a331df0
6 changed files with 244 additions and 132 deletions

View File

@ -170,12 +170,27 @@ class Board:
@staticmethod @staticmethod
def apply_moves_to_board(board, player, moves): def apply_moves_to_board(board, player, move):
for move in moves: from_idx = move[0]
from_idx, to_idx = move.split("/") to_idx = move[1]
board[int(from_idx)] -= int(player) board = list(board)
board[int(to_idx)] += int(player) board[from_idx] -= player
return board
if (to_idx < 1 or to_idx > 24):
return
if (board[to_idx] * player == -1):
if (player == 1):
board[25] -= player
else:
board[0] -= player
board[to_idx] = 0
board[to_idx] += player
return tuple(board)
@staticmethod @staticmethod
def calculate_legal_states(board, player, roll): def calculate_legal_states(board, player, roll):
@ -186,6 +201,8 @@ class Board:
# turn and then do something with the second die # turn and then do something with the second die
def calc_moves(board, face_value): def calc_moves(board, face_value):
if face_value == 0:
return [board]
return quack.calc_moves(board, player, face_value) return quack.calc_moves(board, player, face_value)
# Problem with cal_moves: Method can return empty list (should always contain at least same board). # Problem with cal_moves: Method can return empty list (should always contain at least same board).
@ -200,26 +217,32 @@ class Board:
if not Board.any_move_valid(board, player, roll): if not Board.any_move_valid(board, player, roll):
return { board } return { board }
dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4] dice_permutations = list(itertools.permutations(roll)) if roll[0] != roll[1] else [[roll[0]]*4]
#print("Permuts:",dice_permutations)
# print("Dice permuts:",dice_permutations) # print("Dice permuts:",dice_permutations)
for roll in dice_permutations: for roll in dice_permutations:
# Calculate boards resulting from first move # Calculate boards resulting from first move
#print("initial board: ", board) #print("initial board: ", board)
#print("roll:", roll) #print("roll:", roll)
#print("Rest of roll:",roll[1:])
boards = calc_moves(board, roll[0]) boards = calc_moves(board, roll[0])
#print("Boards:",boards)
#print("Roll:",roll[0])
#print("boards after first die: ", boards) #print("boards after first die: ", boards)
for die in roll[1:]: for die in roll[1:]:
# Calculate boards resulting from second move # if die != 0:
nested_boards = [calc_moves(board, die) for board in boards] if True:
#print("nested boards: ", nested_boards) # Calculate boards resulting from second move
boards = [board for boards in nested_boards for board in boards] nested_boards = [calc_moves(board, die) for board in boards]
# What the fuck #print("nested boards: ", nested_boards)
#for board in boards: boards = [board for boards in nested_boards for board in boards]
# print(board) # What the fuck
# print("type__:",type(board)) #for board in boards:
# Add resulting unique boards to set of legal boards resulting from roll # print(board)
# print("type__:",type(board))
# Add resulting unique boards to set of legal boards resulting from roll
#print("printing boards from calculate_legal_states: ", boards) #print("printing boards from calculate_legal_states: ", boards)
legal_moves = legal_moves | set(boards) legal_moves = legal_moves | set(boards)
# print("legal moves: ", legal_moves) # print("legal moves: ", legal_moves)
if len(legal_moves) == 0: if len(legal_moves) == 0:
@ -245,9 +268,9 @@ class Board:
return """ return """
13 14 15 16 17 18 19 20 21 22 23 24 13 14 15 16 17 18 19 20 21 22 23 24
+--------------------------------------------------------------------------+ +--------------------------------------------------------------------------+
| {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end -1: TODO| | {13}| {14}| {15}| {16}| {17}| {18}| bar -1: {25} | {19}| {20}| {21}| {22}| {23}| {24}| end 1: TODO|
|---|---|---|---|---|---|------------|---|---|---|---|---|---| | |---|---|---|---|---|---|------------|---|---|---|---|---|---| |
| {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end 1: TODO| | {12}| {11}| {10}| {9}| {8}| {7}| bar 1: {0} | {6}| {5}| {4}| {3}| {2}| {1}| end -1: TODO|
+--------------------------------------------------------------------------+ +--------------------------------------------------------------------------+
12 11 10 9 8 7 6 5 4 3 2 1 12 11 10 9 8 7 6 5 4 3 2 1
""".format(*temp) """.format(*temp)

84
bot.py
View File

@ -1,24 +1,8 @@
from cup import Cup
from network import Network
from board import Board from board import Board
import tensorflow as tf
import numpy as np
import random
class Bot: class Bot:
def __init__(self, sym, config = None, name = "unnamed"): def __init__(self, sym):
self.config = config
self.cup = Cup()
self.sym = sym self.sym = sym
self.graph = tf.Graph()
self.network = Network(config, name)
self.network.restore_model()
def restore_model(self):
with self.graph.as_default():
self.network.restore_model()
def get_session(self): def get_session(self):
return self.session return self.session
@ -26,16 +10,60 @@ class Bot:
def get_sym(self): def get_sym(self):
return self.sym return self.sym
def get_network(self):
return self.network
# TODO: DEPRECATE def calc_move_sets(self, from_board, roll, player):
def make_move(self, board, sym, roll): board = from_board
# print(Board.pretty(board)) sets = []
legal_moves = Board.calculate_legal_states(board, sym, roll) total = 0
moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ] print("board!:",board)
scores = [ x[1] for x in moves_and_scores ] for r in roll:
best_move_pair = moves_and_scores[np.array(scores).argmax()] # print("Value of r:",r)
#print("Found the best state, being:", np.array(move_scores).argmax()) sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
return best_move_pair total += r
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
return sets
def handle_move(self, from_board, to_board, roll, player):
# print("Cur board:",board)
sets = self.calc_move_sets(from_board, roll, player)
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
# print("My board_set:",board_set)
if to_board in [list(c) for c in board_set[0]]:
self.total_moves -= board_set[1]
if idx < 2:
# print("Roll object:",self.roll)
self.roll[idx] = 0
else:
self.roll = [0,0]
break
print("Total moves left:",self.total_moves)
def tmp_name(self, from_board, to_board, roll, player, total_moves):
sets = self.calc_move_sets(from_board, roll, player)
return_board = from_board
for idx, board_set in enumerate(sets):
board_set = list(board_set[0])
if to_board in [list(board) for board in board_set]:
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < 2:
roll[idx] = 0
else:
roll = [0,0]
return_board = to_board
break
return total_moves, roll, return_board
def make_human_move(self, board, player, roll):
total_moves = roll[0] + roll[1]
previous_board = board
while total_moves != 0:
move = input("Pick a move!\n")
to_board = Board.apply_moves_to_board(previous_board, player, move)
total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves)

18
main.py
View File

@ -31,12 +31,8 @@ parser.add_argument('--train-perpetually', action='store_true',
help='start new training session as soon as the previous is finished') help='start new training session as soon as the previous is finished')
parser.add_argument('--list-models', action='store_true', parser.add_argument('--list-models', action='store_true',
help='list all known models') help='list all known models')
parser.add_argument('--force-creation', action='store_true',
help='force model creation if model does not exist')
parser.add_argument('--board-rep', action='store', dest='board_rep', parser.add_argument('--board-rep', action='store', dest='board_rep',
help='name of board representation to use as input to neural network') help='name of board representation to use as input to neural network')
parser.add_argument('--use-baseline', action='store_true',
help='use the baseline model, note, has size 28')
parser.add_argument('--verbose', action='store_true', parser.add_argument('--verbose', action='store_true',
help='If set, a lot of stuff will be printed') help='If set, a lot of stuff will be printed')
parser.add_argument('--ply', action='store', dest='ply', default='0', parser.add_argument('--ply', action='store', dest='ply', default='0',
@ -46,9 +42,6 @@ parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default
args = parser.parse_args() args = parser.parse_args()
if args.model == "baseline_model":
print("Model name 'baseline_model' not allowed")
exit()
config = { config = {
'model': args.model, 'model': args.model,
@ -64,8 +57,6 @@ config = {
'model_storage_path': 'models', 'model_storage_path': 'models',
'bench_storage_path': 'bench', 'bench_storage_path': 'bench',
'board_representation': args.board_rep, 'board_representation': args.board_rep,
'force_creation': args.force_creation,
'use_baseline': args.use_baseline,
'global_step': 0, 'global_step': 0,
'verbose': args.verbose, 'verbose': args.verbose,
'ply': args.ply, 'ply': args.ply,
@ -87,6 +78,14 @@ if not os.path.isdir(log_path):
os.mkdir(log_path) os.mkdir(log_path)
def save_config():
import yaml
# checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
# config_path = os.path.join(checkpoint_path, 'config')
# with open(config_path, 'a+') as f:
# print("lol")
print(yaml.dump(config))
# Define helper functions # Define helper functions
def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")): def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
format_vars = { 'trained_eps': trained_eps, format_vars = { 'trained_eps': trained_eps,
@ -173,6 +172,7 @@ if __name__ == "__main__":
# Set up network # Set up network
from network import Network from network import Network
save_config()
# Set up variables # Set up variables
episode_count = config['episode_count'] episode_count = config['episode_count']

View File

@ -9,6 +9,7 @@ from eval import Eval
import glob import glob
from operator import itemgetter from operator import itemgetter
import tensorflow.contrib.eager as tfe import tensorflow.contrib.eager as tfe
from player import Player
class Network: class Network:
# board_features_quack has size 28 # board_features_quack has size 28
@ -562,6 +563,28 @@ class Network:
return outcomes return outcomes
def play_against_network(self):
self.restore_model()
human_player = Player(-1)
cur_player = 1
player = 1
board = Board.initial_state
i = 0
while Board.outcome(board) is None:
print(Board.pretty(board))
roll = (random.randrange(1, 7), random.randrange(1, 7))
print("Bot rolled:", roll)
board, _ = self.make_move(board, roll, player)
print(Board.pretty(board))
roll = (random.randrange(1, 7), random.randrange(1, 7))
print("You rolled:", roll)
board = human_player.make_human_move(board, roll)
print("DONE "*10)
print(Board.pretty(board))
def train_model(self, episodes=1000, save_step_size=100, trained_eps=0): def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
""" """
@ -570,79 +593,79 @@ class Network:
:param trained_eps: :param trained_eps:
:return: :return:
""" """
with tf.Session() as sess:
difference_in_vals = 0
self.restore_model() difference_in_vals = 0
start_time = time.time() self.restore_model()
def print_time_estimate(eps_completed): start_time = time.time()
cur_time = time.time()
time_diff = cur_time - start_time
eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed
eps_remaining = (episodes - eps_completed)
sys.stderr.write(
"[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
sys.stderr.write(
"[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size)) def print_time_estimate(eps_completed):
outcomes = [] cur_time = time.time()
for episode in range(1, episodes + 1): time_diff = cur_time - start_time
eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed
eps_remaining = (episodes - eps_completed)
sys.stderr.write(
"[TRAIN] Averaging {per_sec} episodes per second\n".format(per_sec=round(eps_per_sec, 2)))
sys.stderr.write(
"[TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining\n".format(
eps_remaining=eps_remaining, time_remaining=int(eps_remaining * secs_per_ep)))
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) sys.stderr.write("[TRAIN] Training {} episodes and save_step_size {}\n".format(episodes, save_step_size))
# TODO decide which player should be here outcomes = []
for episode in range(1, episodes + 1):
player = 1 sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
prev_board = Board.initial_state # TODO decide which player should be here
i = 0
while Board.outcome(prev_board) is None: player = 1
i += 1 prev_board = Board.initial_state
self.global_step += 1 i = 0
while Board.outcome(prev_board) is None:
i += 1
self.global_step += 1
cur_board, cur_board_value = self.make_move(prev_board, cur_board, cur_board_value = self.make_move(prev_board,
(random.randrange(1, 7), random.randrange(1, 7)), (random.randrange(1, 7), random.randrange(1, 7)),
player) player)
difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player)))) difference_in_vals += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))
if self.config['verbose']: if self.config['verbose']:
print("Difference in values:", difference_in_vals) print("Difference in values:", difference_in_vals)
print("Current board value :", cur_board_value) print("Current board value :", cur_board_value)
print("Current board is :\n",cur_board) print("Current board is :\n",cur_board)
# adjust weights # adjust weights
if Board.outcome(cur_board) is None: if Board.outcome(cur_board) is None:
self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value) self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
player *= -1 player *= -1
prev_board = cur_board prev_board = cur_board
final_board = prev_board final_board = prev_board
sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i)) sys.stderr.write("\t outcome {}\t turns {}".format(Board.outcome(final_board)[1], i))
outcomes.append(Board.outcome(final_board)[1]) outcomes.append(Board.outcome(final_board)[1])
final_score = np.array([Board.outcome(final_board)[1]]) final_score = np.array([Board.outcome(final_board)[1]])
scaled_final_score = ((final_score + 2) / 4) scaled_final_score = ((final_score + 2) / 4)
self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1)) self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))
sys.stderr.write("\n") sys.stderr.write("\n")
if episode % min(save_step_size, episodes) == 0: if episode % min(save_step_size, episodes) == 0:
sys.stderr.write("[TRAIN] Saving model...\n") sys.stderr.write("[TRAIN] Saving model...\n")
self.save_model(episode + trained_eps) self.save_model(episode + trained_eps)
if episode % 50 == 0: if episode % 50 == 0:
print_time_estimate(episode) print_time_estimate(episode)
sys.stderr.write("[TRAIN] Saving model for final episode...\n") sys.stderr.write("[TRAIN] Saving model for final episode...\n")
self.save_model(episode+trained_eps) self.save_model(episode+trained_eps)
return outcomes, difference_in_vals[0][0] return outcomes, difference_in_vals[0][0]

View File

@ -9,8 +9,8 @@ from board import Board
import main import main
config = main.config.copy() config = main.config.copy()
config['model'] = "eager_testings" config['model'] = "player_testings"
config['force_creation'] = True config['ply'] = "1"
config['board_representation'] = 'quack-fat' config['board_representation'] = 'quack-fat'
network = Network(config, config['model']) network = Network(config, config['model'])
@ -40,19 +40,21 @@ boards = {initial_state,
board = network.board_trans_func(Board.initial_state, 1) # board = network.board_trans_func(Board.initial_state, 1)
pair = network.make_move(Board.initial_state, [3,2], 1) # pair = network.make_move(Board.initial_state, [3,2], 1)
print(pair[1]) # print(pair[1])
network.do_backprop(board, 0.9) # network.do_backprop(board, 0.9)
network.print_variables() # network.print_variables()
network.save_model(2) # network.save_model(2)
print(network.calculate_1_ply(Board.initial_state, [3,2], 1)) # print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
network.play_against_network()

View File

@ -11,19 +11,55 @@ class Player:
def get_sym(self): def get_sym(self):
return self.sym return self.sym
def make_move(self, board, sym, roll): def calc_move_sets(self, from_board, roll, player):
print(Board.pretty(board)) board = from_board
legal_moves = Board.calculate_legal_states(board, sym, roll) sets = []
if roll[0] == roll[1]: total = 0
print("Example of move: 4/6,6/8,12/14,13/15") for r in roll:
else: # print("Value of r:",r)
print("Example of move: 4/6,13/17") sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
total += r
sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
return sets
user_moves = input("Enter your move: ").strip().split(",")
board = Board.apply_moves_to_board(board, sym, user_moves)
while board not in legal_moves:
print("Move is invalid, please enter a new move")
user_moves = input("Enter your move: ").strip().split(",")
board = Board.apply_moves_to_board(board, sym, user_moves)
return board def tmp_name(self, from_board, to_board, roll, player, total_moves):
sets = self.calc_move_sets(from_board, roll, player)
return_board = from_board
for idx, board_set in enumerate(sets):
board_set[0] = list(board_set[0])
print(to_board)
print(board_set)
if to_board in board_set[0]:
total_moves -= board_set[1]
# if it's not the sum of the moves
if idx < 2:
roll[idx] = 0
else:
roll = [0,0]
return_board = to_board
break
return total_moves, roll, return_board
def make_human_move(self, board, roll):
total_moves = roll[0] + roll[1] if roll[0] != roll[1] else int(roll[0])*4
move = ""
while total_moves != 0:
while True:
print("You have {roll} left!".format(roll=total_moves))
move = input("Pick a move!\n")
pot_move = move.split("/")
if len(pot_move) == 2:
try:
pot_move[0] = int(pot_move[0])
pot_move[1] = int(pot_move[1])
move = pot_move
break;
except TypeError:
print("The correct syntax is: 2/5 for a move from index 2 to 5.")
to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves)
print(Board.pretty(board))
return board