From 5caae5b935674b06e03504e0a2f96fa3f8fa1c4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoffer=20M=C3=BCller=20Madsen?= Date: Wed, 14 Mar 2018 14:02:19 +0100 Subject: [PATCH] spring cleaning 1 --- bot.py | 37 +++---------------------------------- cup.py | 11 +++-------- dice.py | 5 ----- eval.py | 21 +++++++++++++++++++++ game.py | 15 +++++++++------ 5 files changed, 36 insertions(+), 53 deletions(-) delete mode 100644 dice.py create mode 100644 eval.py diff --git a/bot.py b/bot.py index ca2f734..f9ef4bc 100644 --- a/bot.py +++ b/bot.py @@ -1,15 +1,12 @@ from cup import Cup -import tensorflow as tf from network import Network -import numpy as np from board import Board -import subprocess + +import tensorflow as tf +import numpy as np import random -import sys -import pubeval class Bot: - def __init__(self, sym, config = None): self.config = config self.cup = Cup() @@ -19,17 +16,6 @@ class Bot: self.session = tf.Session() self.network = Network(self.session, config) self.network.restore_model() - - - def roll(self): - print("{} rolled: ".format(self.sym)) - roll = self.cup.roll() -# print(roll) - return roll - - - def switch(self,cur): - return -1 if cur == 1 else 1 def restore_model(self): with self.graph.as_default(): @@ -44,22 +30,6 @@ class Bot: def get_network(self): return self.network - def make_random_move(self, board, sym, roll): - legal_moves = Board.calculate_legal_states(board, sym, roll) - return random.choice(list(legal_moves)) - - - # TODO: Test this, the score results should be deterministic - def make_pubeval_move(self, board, sym, roll): - legal_moves = Board.calculate_legal_states(tuple(board), sym, roll) - moves_and_scores = [(board, pubeval.eval(False, Board.board_features_to_pubeval(board, sym))) for board in legal_moves] - scores = [ x[1] for x in moves_and_scores ] - best_move_pair = moves_and_scores[np.array(scores).argmax()] - return best_move_pair - - - - def make_move(self, board, sym, roll): # print(Board.pretty(board)) legal_moves = Board.calculate_legal_states(board, sym, roll) @@ -69,4 +39,3 @@ class Bot: #print("Found the best state, being:", np.array(move_scores).argmax()) return best_move_pair - diff --git a/cup.py b/cup.py index 8d9f273..6c9bcaa 100644 --- a/cup.py +++ b/cup.py @@ -1,10 +1,5 @@ -from dice import Dice - +# on Christoffer's kill list class Cup: - - def __init__(self): - self.dice_1 = Dice - self.dice_2 = Dice - def roll(self): - return [self.dice_1.roll(), self.dice_2.roll()] + return ( random.randrange(1,7), + random.randrange(1,7) ) diff --git a/dice.py b/dice.py deleted file mode 100644 index 8e645b6..0000000 --- a/dice.py +++ /dev/null @@ -1,5 +0,0 @@ -import random - -class Dice: - def roll(): - return random.randrange(1,7) diff --git a/eval.py b/eval.py new file mode 100644 index 0000000..2f645db --- /dev/null +++ b/eval.py @@ -0,0 +1,21 @@ +from board import Board + +import numpy as np +import pubeval + + +class Eval: + @staticmethod + def make_random_move(board, sym, roll): + legal_moves = Board.calculate_legal_states(board, sym, roll) + return random.choice(list(legal_moves)) + + + # TODO: Test this, the score results should be deterministic + @staticmethod + def make_pubeval_move(board, sym, roll): + legal_moves = Board.calculate_legal_states(tuple(board), sym, roll) + moves_and_scores = [(board, pubeval.eval(False, Board.board_features_to_pubeval(board, sym))) for board in legal_moves] + scores = [ x[1] for x in moves_and_scores ] + best_move_pair = moves_and_scores[np.array(scores).argmax()] + return best_move_pair diff --git a/game.py b/game.py index 2da10ea..cf5594e 100644 --- a/game.py +++ b/game.py @@ -3,6 +3,7 @@ from player import Player from bot import Bot from restore_bot import RestoreBot from cup import Cup +from eval import Eval import numpy as np import sys @@ -16,6 +17,7 @@ class Game: self.p1 = None self.p2 = None + # TODO remove this self.cup = Cup() def set_up_bots(self): @@ -25,7 +27,7 @@ class Game: def roll(self): return self.cup.roll() - def roll_and_find_best_for_bot(self): + def best_move_and_score(self): roll = self.roll() move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll) self.board = move_and_val[0] @@ -82,6 +84,7 @@ class Game: def train_model(self, episodes=1000, save_step_size = 100, trained_eps = 0): start_time = time.time() + def print_time_estimate(eps_completed): cur_time = time.time() time_diff = cur_time - start_time @@ -98,14 +101,14 @@ class Game: sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) self.board = Board.initial_state - prev_board, prev_board_value = self.roll_and_find_best_for_bot() + prev_board, prev_board_value = self.best_move_and_score() # find the best move here, make this move, then change turn as the # first thing inside of the while loop and then call - # roll_and_find_best_for_bot to get V_t+1 + # best_move_and_score to get V_t+1 while Board.outcome(self.board) is None: self.next_round() - cur_board, cur_board_value = self.roll_and_find_best_for_bot() + cur_board, cur_board_value = self.best_move_and_score() self.p1.get_network().train(prev_board, cur_board_value) prev_board = cur_board @@ -166,7 +169,7 @@ class Game: roll = self.roll() self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] roll = self.roll() - self.board = Board.flip(self.p2.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) + self.board = Board.flip(Eval.make_random_move(Board.flip(self.board), self.p2.get_sym(), roll)) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) outcomes.append(Board.outcome(self.board)[1]) sys.stderr.write("\n") @@ -184,7 +187,7 @@ class Game: roll = self.roll() self.board = (self.p1.make_move(self.board, self.p1.get_sym(), roll))[0] roll = self.roll() - self.board = Board.flip(self.p2.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26]) + self.board = Board.flip(Eval.make_pubeval_move(self.board, self.p2.get_sym(), roll)[0][0:26]) sys.stderr.write("\t outcome {}".format(Board.outcome(self.board)[1])) outcomes.append(Board.outcome(self.board)[1]) sys.stderr.write("\n")