spring cleaning 1
This commit is contained in:
parent
fcc373c0d8
commit
5caae5b935
37
bot.py
37
bot.py
|
@ -1,15 +1,12 @@
|
||||||
from cup import Cup
|
from cup import Cup
|
||||||
import tensorflow as tf
|
|
||||||
from network import Network
|
from network import Network
|
||||||
import numpy as np
|
|
||||||
from board import Board
|
from board import Board
|
||||||
import subprocess
|
|
||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
import random
|
import random
|
||||||
import sys
|
|
||||||
import pubeval
|
|
||||||
|
|
||||||
class Bot:
|
class Bot:
|
||||||
|
|
||||||
def __init__(self, sym, config = None):
|
def __init__(self, sym, config = None):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.cup = Cup()
|
self.cup = Cup()
|
||||||
|
@ -19,17 +16,6 @@ class Bot:
|
||||||
self.session = tf.Session()
|
self.session = tf.Session()
|
||||||
self.network = Network(self.session, config)
|
self.network = Network(self.session, config)
|
||||||
self.network.restore_model()
|
self.network.restore_model()
|
||||||
|
|
||||||
|
|
||||||
def roll(self):
|
|
||||||
print("{} rolled: ".format(self.sym))
|
|
||||||
roll = self.cup.roll()
|
|
||||||
# print(roll)
|
|
||||||
return roll
|
|
||||||
|
|
||||||
|
|
||||||
def switch(self,cur):
|
|
||||||
return -1 if cur == 1 else 1
|
|
||||||
|
|
||||||
def restore_model(self):
|
def restore_model(self):
|
||||||
with self.graph.as_default():
|
with self.graph.as_default():
|
||||||
|
@ -44,22 +30,6 @@ class Bot:
|
||||||
def get_network(self):
|
def get_network(self):
|
||||||
return self.network
|
return self.network
|
||||||
|
|
||||||
def make_random_move(self, board, sym, roll):
|
|
||||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
|
||||||
return random.choice(list(legal_moves))
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: Test this, the score results should be deterministic
|
|
||||||
def make_pubeval_move(self, board, sym, roll):
|
|
||||||
legal_moves = Board.calculate_legal_states(tuple(board), sym, roll)
|
|
||||||
moves_and_scores = [(board, pubeval.eval(False, Board.board_features_to_pubeval(board, sym))) for board in legal_moves]
|
|
||||||
scores = [ x[1] for x in moves_and_scores ]
|
|
||||||
best_move_pair = moves_and_scores[np.array(scores).argmax()]
|
|
||||||
return best_move_pair
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def make_move(self, board, sym, roll):
|
def make_move(self, board, sym, roll):
|
||||||
# print(Board.pretty(board))
|
# print(Board.pretty(board))
|
||||||
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
||||||
|
@ -69,4 +39,3 @@ class Bot:
|
||||||
#print("Found the best state, being:", np.array(move_scores).argmax())
|
#print("Found the best state, being:", np.array(move_scores).argmax())
|
||||||
return best_move_pair
|
return best_move_pair
|
||||||
|
|
||||||
|
|
||||||
|
|
11
cup.py
11
cup.py
|
@ -1,10 +1,5 @@
|
||||||
from dice import Dice
|
# on Christoffer's kill list
|
||||||
|
|
||||||
class Cup:
|
class Cup:
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.dice_1 = Dice
|
|
||||||
self.dice_2 = Dice
|
|
||||||
|
|
||||||
def roll(self):
|
def roll(self):
|
||||||
return [self.dice_1.roll(), self.dice_2.roll()]
|
return ( random.randrange(1,7),
|
||||||
|
random.randrange(1,7) )
|
||||||
|
|
5
dice.py
5
dice.py
|
@ -1,5 +0,0 @@
|
||||||
import random
|
|
||||||
|
|
||||||
class Dice:
|
|
||||||
def roll():
|
|
||||||
return random.randrange(1,7)
|
|
21
eval.py
Normal file
21
eval.py
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
from board import Board
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pubeval
|
||||||
|
|
||||||
|
|
||||||
|
class Eval:
|
||||||
|
@staticmethod
|
||||||
|
def make_random_move(board, sym, roll):
|
||||||
|
legal_moves = Board.calculate_legal_states(board, sym, roll)
|
||||||
|
return random.choice(list(legal_moves))
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Test this, the score results should be deterministic
|
||||||
|
@staticmethod
|
||||||
|
def make_pubeval_move(board, sym, roll):
|
||||||
|
legal_moves = Board.calculate_legal_states(tuple(board), sym, roll)
|
||||||
|
moves_and_scores = [(board, pubeval.eval(False, Board.board_features_to_pubeval(board, sym))) for board in legal_moves]
|
||||||
|
scores = [ x[1] for x in moves_and_scores ]
|
||||||
|
best_move_pair = moves_and_scores[np.array(scores).argmax()]
|
||||||
|
return best_move_pair
|
15
game.py
15
game.py
|
@ -3,6 +3,7 @@ from player import Player
|
||||||
from bot import Bot
|
from bot import Bot
|
||||||
from restore_bot import RestoreBot
|
from restore_bot import RestoreBot
|
||||||
from cup import Cup
|
from cup import Cup
|
||||||
|
from eval import Eval
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
|
@ -16,6 +17,7 @@ class Game:
|
||||||
|
|
||||||
self.p1 = None
|
self.p1 = None
|
||||||
self.p2 = None
|
self.p2 = None
|
||||||
|
# TODO remove this
|
||||||
self.cup = Cup()
|
self.cup = Cup()
|
||||||
|
|
||||||
def set_up_bots(self):
|
def set_up_bots(self):
|
||||||
|
@ -25,7 +27,7 @@ class Game:
|
||||||
def roll(self):
|
def roll(self):
|
||||||
return self.cup.roll()
|
return self.cup.roll()
|
||||||
|
|
||||||
def roll_and_find_best_for_bot(self):
|
def best_move_and_score(self):
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
|
move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
|
||||||
self.board = move_and_val[0]
|
self.board = move_and_val[0]
|
||||||
|
@ -82,6 +84,7 @@ class Game:
|
||||||
|
|
||||||
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
|
def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
def print_time_estimate(eps_completed):
|
def print_time_estimate(eps_completed):
|
||||||
cur_time = time.time()
|
cur_time = time.time()
|
||||||
time_diff = cur_time - start_time
|
time_diff = cur_time - start_time
|
||||||
|
@ -98,14 +101,14 @@ class Game:
|
||||||
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
|
sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
|
||||||
self.board = Board.initial_state
|
self.board = Board.initial_state
|
||||||
|
|
||||||
prev_board, prev_board_value = self.roll_and_find_best_for_bot()
|
prev_board, prev_board_value = self.best_move_and_score()
|
||||||
# find the best move here, make this move, then change turn as the
|
# find the best move here, make this move, then change turn as the
|
||||||
# first thing inside of the while loop and then call
|
# first thing inside of the while loop and then call
|
||||||
# roll_and_find_best_for_bot to get V_t+1
|
# best_move_and_score to get V_t+1
|
||||||
|
|
||||||
while Board.outcome(self.board) is None:
|
while Board.outcome(self.board) is None:
|
||||||
self.next_round()
|
self.next_round()
|
||||||
cur_board, cur_board_value = self.roll_and_find_best_for_bot()
|
cur_board, cur_board_value = self.best_move_and_score()
|
||||||
self.p1.get_network().train(prev_board, cur_board_value)
|
self.p1.get_network().train(prev_board, cur_board_value)
|
||||||
prev_board = cur_board
|
prev_board = cur_board
|
||||||
|
|
||||||
|
@ -166,7 +169,7 @@ class Game:
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll))
|
self.board = Board.flip(Eval.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll))
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
outcomes.append(Board.outcome(self.board)[1])
|
||||||
sys.stderr.write("\n")
|
sys.stderr.write("\n")
|
||||||
|
@ -184,7 +187,7 @@ class Game:
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0]
|
||||||
roll = self.roll()
|
roll = self.roll()
|
||||||
self.board = Board.flip(self.p2.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26])
|
self.board = Board.flip(Eval.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26])
|
||||||
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1]))
|
||||||
outcomes.append(Board.outcome(self.board)[1])
|
outcomes.append(Board.outcome(self.board)[1])
|
||||||
sys.stderr.write("\n")
|
sys.stderr.write("\n")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user