2018-02-05 21:31:34 +00:00
from board import Board
2018-03-09 13:19:31 +00:00
from player import Player
2018-02-05 22:50:31 +00:00
from bot import Bot
2018-03-06 10:06:38 +00:00
from restore_bot import RestoreBot
2018-02-07 15:27:03 +00:00
from cup import Cup
2018-03-14 13:02:19 +00:00
from eval import Eval
2018-03-11 19:00:24 +00:00
2018-03-08 15:27:16 +00:00
import numpy as np
import sys
2018-03-11 23:11:40 +00:00
import time
2018-03-06 10:06:38 +00:00
2018-02-05 21:31:34 +00:00
class Game :
2018-03-09 13:19:31 +00:00
2018-03-08 15:27:16 +00:00
def __init__ ( self , config = None ) :
self . config = config
2018-02-13 13:38:49 +00:00
self . board = Board . initial_state
2018-03-06 12:04:47 +00:00
2018-03-08 15:27:16 +00:00
self . p1 = None
self . p2 = None
2018-03-14 13:02:19 +00:00
# TODO remove this
2018-02-07 15:27:03 +00:00
self . cup = Cup ( )
2018-02-07 14:31:05 +00:00
2018-03-08 15:27:16 +00:00
def set_up_bots ( self ) :
self . p1 = Bot ( 1 , config = self . config )
self . p2 = Bot ( 1 , config = self . config )
2018-02-07 15:27:03 +00:00
def roll ( self ) :
return self . cup . roll ( )
2018-02-05 21:31:34 +00:00
2018-03-14 13:02:19 +00:00
def best_move_and_score ( self ) :
2018-03-04 16:35:36 +00:00
roll = self . roll ( )
move_and_val = self . p1 . make_move ( self . board , self . p1 . get_sym ( ) , roll )
self . board = move_and_val [ 0 ]
return move_and_val
2018-03-06 12:04:47 +00:00
2018-03-04 16:35:36 +00:00
def next_round ( self ) :
roll = self . roll ( )
2018-03-06 15:23:08 +00:00
#print(roll)
2018-03-07 13:44:17 +00:00
self . board = Board . flip ( self . p2 . make_move ( Board . flip ( self . board ) , self . p2 . get_sym ( ) , roll ) [ 0 ] )
2018-03-04 16:35:36 +00:00
return self . board
def board_state ( self ) :
return self . board
2018-03-09 13:19:31 +00:00
def play_against_player ( self ) :
self . board = Board . initial_state
coin_flip = random . random ( )
if coin_flip > 0.5 :
user_color = input ( " Pick a number, 1 (white) or -1 (black) " )
if int ( user_color ) == 1 :
2018-03-11 19:00:24 +00:00
p1 = Player ( 1 )
p2 = Bot ( - 1 )
2018-03-09 13:19:31 +00:00
else :
2018-03-11 19:00:24 +00:00
p1 = Bot ( 1 )
p2 = Player ( - 1 )
2018-03-09 13:19:31 +00:00
else :
2018-03-11 19:00:24 +00:00
p1 = Bot ( 1 )
p2 = Player ( - 1 )
# Since we have to make sure that the Bot always plays as if it's white, we have to flip
# the board when it's not actually.
if p1 . __name__ == " Bot " and p1 . get_sym ( ) == 1 :
while Board . outcome ( self . board ) == None :
roll = self . roll ( )
self . board = p1 . make_move ( self . board , p1 . get_sym ( ) , roll )
roll = self . roll ( )
self . board = p2 . make_move ( self . board , p2 . get_sym ( ) , roll )
if p1 . __name__ == " Bot " and p1 . get_sym ( ) == - 1 :
while Board . outcome ( self . board ) == None :
roll = self . roll ( )
self . board = Board . flip ( p1 . make_move ( Board . flip ( self . board ) , p1 . get_sym ( ) , roll ) )
roll = self . roll ( )
self . board = p2 . make_move ( self . board , p2 . get_sym ( ) , roll )
if p2 . __name__ == " Bot " and p1 . get_sym ( ) == - 1 :
while Board . outcome ( self . board ) == None :
roll = self . roll ( )
self . board = p1 . make_move ( self . board , p1 . get_sym ( ) , roll )
roll = self . roll ( )
self . board = Board . flip ( p2 . make_move ( Board . flip ( self . board ) , p2 . get_sym ( ) , roll ) )
2018-03-09 13:19:31 +00:00
2018-03-11 19:00:24 +00:00
print ( Board . outcome ( self . board ) )
2018-03-09 13:19:31 +00:00
2018-03-09 20:05:38 +00:00
def train_model ( self , episodes = 1000 , save_step_size = 100 , trained_eps = 0 ) :
2018-03-12 14:18:44 +00:00
start_time = time . time ( )
2018-03-14 13:02:19 +00:00
2018-03-12 14:18:44 +00:00
def print_time_estimate ( eps_completed ) :
cur_time = time . time ( )
time_diff = cur_time - start_time
eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed
eps_remaining = ( episodes - eps_completed )
sys . stderr . write ( " [TRAIN] Averaging {per_sec} episodes per second \n " . format ( per_sec = round ( eps_per_sec , 2 ) ) )
sys . stderr . write ( " [TRAIN] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining \n " . format ( eps_remaining = eps_remaining , time_remaining = int ( eps_remaining * secs_per_ep ) ) )
2018-03-08 15:27:16 +00:00
sys . stderr . write ( " [TRAIN] Training {} episodes and save_step_size {} \n " . format ( episodes , save_step_size ) )
2018-03-04 16:35:36 +00:00
outcomes = [ ]
2018-03-12 14:18:44 +00:00
for episode in range ( 1 , episodes + 1 ) :
2018-03-09 20:05:38 +00:00
sys . stderr . write ( " [TRAIN] Episode {} " . format ( episode + trained_eps ) )
2018-03-04 16:35:36 +00:00
self . board = Board . initial_state
2018-03-08 15:27:16 +00:00
2018-03-14 13:02:19 +00:00
prev_board , prev_board_value = self . best_move_and_score ( )
2018-03-06 15:23:08 +00:00
# find the best move here, make this move, then change turn as the
# first thing inside of the while loop and then call
2018-03-14 13:02:19 +00:00
# best_move_and_score to get V_t+1
2018-03-08 15:27:16 +00:00
2018-03-06 10:06:38 +00:00
while Board . outcome ( self . board ) is None :
2018-03-06 15:23:08 +00:00
self . next_round ( )
2018-03-14 13:02:19 +00:00
cur_board , cur_board_value = self . best_move_and_score ( )
2018-03-06 10:06:38 +00:00
self . p1 . get_network ( ) . train ( prev_board , cur_board_value )
prev_board = cur_board
2018-03-08 15:27:16 +00:00
2018-03-06 12:04:47 +00:00
# print("-"*30)
# print(Board.pretty(self.board))
# print("/"*30)
2018-03-08 15:27:16 +00:00
sys . stderr . write ( " \t outcome {} " . format ( Board . outcome ( self . board ) [ 1 ] ) )
2018-03-04 16:35:36 +00:00
outcomes . append ( Board . outcome ( self . board ) [ 1 ] )
2018-03-06 10:06:38 +00:00
final_score = np . array ( [ Board . outcome ( self . board ) [ 1 ] ] ) . reshape ( ( 1 , 1 ) )
self . p1 . get_network ( ) . train ( prev_board , final_score )
2018-03-08 15:27:16 +00:00
sys . stderr . write ( " \n " )
if episode % min ( save_step_size , episodes ) == 0 :
sys . stderr . write ( " [TRAIN] Saving model... \n " )
2018-03-09 23:22:20 +00:00
self . p1 . get_network ( ) . save_model ( episode + trained_eps )
sys . stderr . write ( " [TRAIN] Loading model for training opponent... \n " )
2018-03-06 15:23:08 +00:00
self . p2 . restore_model ( )
2018-03-06 12:04:47 +00:00
2018-03-12 14:18:44 +00:00
if episode % 50 == 0 :
print_time_estimate ( episode )
2018-03-08 15:27:16 +00:00
sys . stderr . write ( " [TRAIN] Saving model for final episode... \n " )
2018-03-09 23:22:20 +00:00
self . p1 . get_network ( ) . save_model ( episode + trained_eps )
2018-03-08 15:27:16 +00:00
self . p2 . restore_model ( )
return outcomes
2018-03-06 10:06:38 +00:00
2018-03-04 16:35:36 +00:00
def next_round_test ( self ) :
print ( self . board )
print ( )
self . next_round ( )
print ( " -------------------------------- " )
print ( self . board )
print ( " -------------------------------- " )
2018-03-09 20:05:38 +00:00
def eval ( self , trained_eps = 0 ) :
def do_eval ( method , episodes = 1000 , trained_eps = 0 ) :
2018-03-11 23:11:40 +00:00
start_time = time . time ( )
def print_time_estimate ( eps_completed ) :
cur_time = time . time ( )
time_diff = cur_time - start_time
eps_per_sec = eps_completed / time_diff
secs_per_ep = time_diff / eps_completed
eps_remaining = ( episodes - eps_completed )
sys . stderr . write ( " [EVAL ] Averaging {per_sec} episodes per second \n " . format ( per_sec = round ( eps_per_sec , 2 ) ) )
sys . stderr . write ( " [EVAL ] {eps_remaining} episodes remaining; approx. {time_remaining} seconds remaining \n " . format ( eps_remaining = eps_remaining , time_remaining = int ( eps_remaining * secs_per_ep ) ) )
2018-03-08 15:27:16 +00:00
sys . stderr . write ( " [EVAL ] Evaluating {eps} episode(s) with method ' {method} ' \n " . format ( eps = episodes , method = method ) )
2018-03-11 23:11:40 +00:00
2018-03-08 15:27:16 +00:00
if method == ' random ' :
outcomes = [ ]
2018-03-11 23:11:40 +00:00
for i in range ( 1 , episodes + 1 ) :
2018-03-08 15:27:16 +00:00
sys . stderr . write ( " [EVAL ] Episode {} " . format ( i ) )
self . board = Board . initial_state
while Board . outcome ( self . board ) is None :
roll = self . roll ( )
self . board = ( self . p1 . make_move ( self . board , self . p1 . get_sym ( ) , roll ) ) [ 0 ]
roll = self . roll ( )
2018-03-14 13:02:19 +00:00
self . board = Board . flip ( Eval . make_random_move ( Board . flip ( self . board ) , self . p2 . get_sym ( ) , roll ) )
2018-03-08 15:27:16 +00:00
sys . stderr . write ( " \t outcome {} " . format ( Board . outcome ( self . board ) [ 1 ] ) )
outcomes . append ( Board . outcome ( self . board ) [ 1 ] )
sys . stderr . write ( " \n " )
2018-03-11 23:11:40 +00:00
if i % 50 == 0 :
print_time_estimate ( i )
2018-03-08 15:27:16 +00:00
return outcomes
2018-03-11 19:00:24 +00:00
elif method == ' pubeval ' :
outcomes = [ ]
# Add the evaluation code for pubeval, the bot has a method make_pubeval_move(board, sym, roll), which can be used to get the best move according to pubeval
2018-03-11 23:11:40 +00:00
for i in range ( 1 , episodes + 1 ) :
sys . stderr . write ( " [EVAL ] Episode {} " . format ( i ) )
self . board = Board . initial_state
while Board . outcome ( self . board ) is None :
roll = self . roll ( )
self . board = ( self . p1 . make_move ( self . board , self . p1 . get_sym ( ) , roll ) ) [ 0 ]
roll = self . roll ( )
2018-03-14 13:02:19 +00:00
self . board = Board . flip ( Eval . make_pubeval_move ( self . board , self . p2 . get_sym ( ) , roll ) [ 0 ] [ 0 : 26 ] )
2018-03-11 23:11:40 +00:00
sys . stderr . write ( " \t outcome {} " . format ( Board . outcome ( self . board ) [ 1 ] ) )
outcomes . append ( Board . outcome ( self . board ) [ 1 ] )
sys . stderr . write ( " \n " )
if i % 10 == 0 :
print_time_estimate ( i )
return outcomes
2018-03-08 15:27:16 +00:00
else :
sys . stderr . write ( " [EVAL ] Evaluation method ' {} ' is not defined \n " . format ( method ) )
return [ 0 ]
return [ ( method , do_eval ( method ,
self . config [ ' episode_count ' ] ,
2018-03-09 20:05:38 +00:00
trained_eps = trained_eps ) )
2018-03-08 15:27:16 +00:00
for method
in self . config [ ' eval_methods ' ] ]
def play ( self , episodes = 1000 ) :
2018-03-08 12:32:40 +00:00
outcomes = [ ]
2018-03-08 15:27:16 +00:00
for i in range ( episodes ) :
2018-03-08 12:32:40 +00:00
self . board = Board . initial_state
while Board . outcome ( self . board ) is None :
2018-03-08 15:27:16 +00:00
# count += 1
# print("Turn:",count)
2018-02-22 13:01:28 +00:00
2018-03-08 12:32:40 +00:00
roll = self . roll ( )
2018-02-07 15:27:03 +00:00
2018-03-08 15:27:16 +00:00
# print("type of board: ", type(self.board))
# print("Board:",self.board)
# print("{} rolled: {}".format(self.p1.get_sym(), roll))
2018-02-22 13:01:28 +00:00
2018-03-08 15:27:16 +00:00
self . board = ( self . p1 . make_random_move ( self . board , self . p1 . get_sym ( ) , roll ) )
2018-03-08 12:32:40 +00:00
2018-03-08 15:27:16 +00:00
# print(self.board)
2018-02-22 13:01:28 +00:00
2018-03-08 15:27:16 +00:00
# print()
2018-02-22 13:01:28 +00:00
2018-03-08 15:27:16 +00:00
# count += 1
2018-02-22 13:01:28 +00:00
2018-03-08 12:32:40 +00:00
roll = self . roll ( )
2018-03-08 15:27:16 +00:00
# print("{} rolled: {}".format(self.p2.get_sym(), roll))
2018-03-08 12:32:40 +00:00
self . board = Board . flip ( self . p2 . make_random_move ( Board . flip ( self . board ) , self . p2 . get_sym ( ) , roll ) )
2018-02-22 13:01:28 +00:00
2018-03-08 12:32:40 +00:00
if Board . outcome ( self . board ) [ 1 ] > 0 :
print_winner = " 1: White, " + str ( Board . outcome ( self . board ) )
else :
print_winner = " -1: Black " + str ( Board . outcome ( self . board ) )
outcomes . append ( Board . outcome ( self . board ) [ 1 ] )
print ( " The winner is {} ! " . format ( print_winner ) )
2018-03-08 15:27:16 +00:00
print ( " Round: " , i )
# print("Final board:",Board.pretty(self.board))
2018-03-08 12:32:40 +00:00
return outcomes
# return count
2018-03-04 16:35:36 +00:00
highest = 0
2018-02-05 21:31:34 +00:00
2018-03-08 15:27:16 +00:00