diff --git a/bot.py b/bot.py index 7dd6747..75fab80 100644 --- a/bot.py +++ b/bot.py @@ -38,11 +38,10 @@ class Bot: def make_move(self, board, sym, roll): # print(Board.pretty(board)) legal_moves = Board.calculate_legal_states(board, sym, roll) - legal_list = list(legal_moves) - lol = [] - for move in legal_list: - lol.append(self.network.eval_state(np.array(move).reshape(1,26))) - print("Found the best state, being:", np.array(lol).argmax()) - return [legal_list[np.array(lol).argmax()], max(lol)] + moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ] + scores = [ x[1] for x in moves_and_scores ] + best_move = moves_and_scores[np.array(scores).argmax()][0] + #print("Found the best state, being:", np.array(move_scores).argmax()) + return best_move # return random.choice(list(legal_moves)) diff --git a/game.py b/game.py index 5ea989e..4f3d6d5 100644 --- a/game.py +++ b/game.py @@ -9,9 +9,9 @@ from cup import Cup class Game: def __init__(self): self.board = Board.initial_state - + self.p1 = Bot(1) - self.p2 = RestoreBot(-1) + self.p2 = RestoreBot(1) self.cup = Cup() def roll(self): @@ -22,11 +22,11 @@ class Game: move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll) self.board = move_and_val[0] return move_and_val - + def next_round(self): roll = self.roll() print(roll) - self.board = self.p2.make_move(self.board, self.p2.get_sym(),roll) + self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll)) return self.board def board_state(self): @@ -43,6 +43,9 @@ class Game: self.p1.get_network().train(prev_board, cur_board_value) prev_board = cur_board self.next_round() + # print("-"*30) + # print(Board.pretty(self.board)) + # print("/"*30) print("Outcome:", Board.outcome(self.board)[1]) outcomes.append(Board.outcome(self.board)[1]) final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1)) @@ -51,6 +54,7 @@ class Game: if episode % 10 == 0: print("Saving...") self.p1.get_network().save_model() + print(outcomes) def next_round_test(self): diff --git a/restore_bot.py b/restore_bot.py index b238815..131f8c9 100644 --- a/restore_bot.py +++ b/restore_bot.py @@ -15,17 +15,7 @@ class RestoreBot: with self.graph.as_default(): self.session = tf.Session(graph = self.graph) self.network = Network(self.session) - self.network.restore_model() - - def roll(self): - print("{} rolled: ".format(self.sym)) - roll = self.cup.roll() - print(roll) - return roll - - - def switch(self,cur): - return -1 if cur == 1 else 1 + self.network.restore_model() def get_sym(self): return self.sym @@ -33,9 +23,10 @@ class RestoreBot: def make_move(self, board, sym, roll): # print(Board.pretty(board)) legal_moves = Board.calculate_legal_states(board, sym, roll) - legal_list = list(legal_moves) - move_scores = [ self.network.eval_state(np.array(move).reshape(1,26)) for move in legal_list ] - print("Found the best state, being:", np.array(move_scores).argmax()) - return legal_list[np.array(move_scores).argmax()] + moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ] + scores = [ x[1] for x in moves_and_scores ] + best_move = moves_and_scores[np.array(scores).argmax()][0] + #print("Found the best state, being:", np.array(move_scores).argmax()) + return best_move # return random.choice(list(legal_moves))