flips

2018-03-06 13:04:47 +01:00 · 2018-03-06 13:04:47 +01:00 · d2b1f5d523
commit d2b1f5d523
parent 73f10158e5
3 changed files with 19 additions and 25 deletions
--- a/bot.py
+++ b/bot.py
@ -38,11 +38,10 @@ class Bot:
    def make_move(self, board, sym, roll):
        # print(Board.pretty(board))
        legal_moves = Board.calculate_legal_states(board, sym, roll)
-        legal_list = list(legal_moves)
-        lol = []
-        for move in legal_list:
-            lol.append(self.network.eval_state(np.array(move).reshape(1,26)))
-        print("Found the best state, being:", np.array(lol).argmax())
-        return [legal_list[np.array(lol).argmax()], max(lol)]
+        moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
+        scores = [ x[1] for x in moves_and_scores ]
+        best_move = moves_and_scores[np.array(scores).argmax()][0]
+        #print("Found the best state, being:", np.array(move_scores).argmax())
+        return best_move
        
 #        return random.choice(list(legal_moves))
--- a/game.py
+++ b/game.py
@ -9,9 +9,9 @@ from cup import Cup
 class Game:
    def __init__(self):
        self.board = Board.initial_state
-        
+
        self.p1 = Bot(1)
-        self.p2 = RestoreBot(-1)
+        self.p2 = RestoreBot(1)
        self.cup = Cup()

    def roll(self):
@ -22,11 +22,11 @@ class Game:
        move_and_val = self.p1.make_move(self.board, self.p1.get_sym(), roll)
        self.board = move_and_val[0]
        return move_and_val
-    
+
    def next_round(self):
        roll = self.roll()
        print(roll)
-        self.board = self.p2.make_move(self.board, self.p2.get_sym(),roll)
+        self.board = Board.flip(self.p2.make_move(Board.flip(self.board), self.p2.get_sym(), roll))
        return self.board

    def board_state(self):
@ -43,6 +43,9 @@ class Game:
                self.p1.get_network().train(prev_board, cur_board_value)
                prev_board = cur_board
                self.next_round()
+                # print("-"*30)
+                # print(Board.pretty(self.board))
+                # print("/"*30)
            print("Outcome:", Board.outcome(self.board)[1])
            outcomes.append(Board.outcome(self.board)[1])
            final_score = np.array([ Board.outcome(self.board)[1] ]).reshape((1, 1))
@ -51,6 +54,7 @@ class Game:
            if episode % 10 == 0:
                print("Saving...")
                self.p1.get_network().save_model()
+
        print(outcomes)

    def next_round_test(self):
--- a/restore_bot.py
+++ b/restore_bot.py
@ -15,17 +15,7 @@ class RestoreBot:
        with self.graph.as_default():
            self.session = tf.Session(graph = self.graph)
            self.network = Network(self.session)
-            self.network.restore_model()
-
-    def roll(self):
-        print("{} rolled: ".format(self.sym))
-        roll = self.cup.roll()
-        print(roll)
-        return roll
-        
-
-    def switch(self,cur):
-        return -1 if cur == 1 else 1
+            self.network.restore_model()        

    def get_sym(self):
        return self.sym
@ -33,9 +23,10 @@ class RestoreBot:
    def make_move(self, board, sym, roll):
        # print(Board.pretty(board))
        legal_moves = Board.calculate_legal_states(board, sym, roll)
-        legal_list = list(legal_moves)
-        move_scores = [ self.network.eval_state(np.array(move).reshape(1,26)) for move in legal_list ]
-        print("Found the best state, being:", np.array(move_scores).argmax())
-        return legal_list[np.array(move_scores).argmax()]
+        moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
+        scores = [ x[1] for x in moves_and_scores ]
+        best_move = moves_and_scores[np.array(scores).argmax()][0]
+        #print("Found the best state, being:", np.array(move_scores).argmax())
+        return best_move
        
 #        return random.choice(list(legal_moves))