added network_test and some comments

2018-04-29 12:14:14 +02:00 · 2018-04-29 12:14:14 +02:00 · 3f6849048e
commit 3f6849048e
parent afa6504b05
2 changed files with 66 additions and 22 deletions
--- a/network.py
+++ b/network.py
@ -190,7 +190,10 @@ class Network:

        """

+        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
+
+        # find all values for the above boards
        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]

        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
@ -201,6 +204,7 @@ class Network:
        # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
        if player == 1:
            best_fifteen.reverse()
+
        best_fifteen_boards = [x[0] for x in best_fifteen[:15]]

        all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
@ -331,25 +335,37 @@ class Network:
        all_rolls = gen_21_rolls()

        all_rolls_scores = []
+
+        # loop over boards
        for a_board in boards:
            a_board_scores = []
+
+            # loop over all rolls, for each board
            for roll in all_rolls:
+
+                # find all states we can get to, given the board and roll and the opposite player
                all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)

+                # find scores for each board found above
                spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
                                    for new_board in all_rolls_boards]

-                # We need 1-score for the -1 player
+                # if the original player is the -1 player, then we need to find (1-value)
                spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]

+                # find the best score
                best_score = max(spec_roll_scores)

+                # append the best score to a_board_scores, where we keep track of the best score for each board
                a_board_scores.append(best_score)

+            # save the expected average of board scores
            all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))

+        # return all the average scores
        return all_rolls_scores

+
    def eval(self, episode_count, trained_eps = 0, tf_session = None):
        """
        Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
@ -545,3 +561,5 @@ class Network:
            writer.close()
        
            return outcomes
+
+
--- a/network_test.py
+++ b/network_test.py
@ -3,30 +3,56 @@ import tensorflow as tf
 import random
 import numpy as np

+
+from board import Board
+
+import main
+
+config = main.config.copy()
+config['model'] = "tesauro_blah"
+config['force_creation'] = True
+network = Network(config, config['model'])
+
 session = tf.Session()
-graph_lol = tf.Graph()
+
+session.run(tf.global_variables_initializer())
+network.restore_model(session)
+initial_state = Board.initial_state
+
+initial_state_1 = ( 0,
+                    0, 0, 0, 2, 0, -5,
+                    0, -3, 0, 0, 0, 0,
+                    -5, 0, 0, 0, 3, 5,
+                    0, 0, 0, 0, 5, -2,
+                    0 )
+
+initial_state_2 = ( 0,
+                    -5, -5, -3, -2, 0, 0,
+                    0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 15, 0, 0,
+                    0, 0, 0, 0, 0, 0,
+                    0 )
+
+boards = {initial_state,
+          initial_state_1,
+          initial_state_2 }


+print("-"*30)
+print(network.do_ply(session, boards, 1))

-network = Network(session)
+print(" "*10 + "network_test")
+print(" "*20 + "Depth 1")
+print(network.n_ply(1, session, boards, 1))

-initial_state = np.array(( 0,
-                  2, 0, 0, 0, 0, -5,
-                  0, -3, 0, 0, 0, 5,
-                  -5, 0, 0, 0, 3, 0,
-                  5, 0, 0, 0, 0, -2,
-                  0 )).reshape((1,26))
-    
-
-
-
-#print(x.shape)
-with graph_lol.as_default():
-    session_2 = tf.Session(graph = graph_lol)
-    network_2 = Network(session_2)
-    network_2.restore_model()
-    print(network_2.eval_state(initial_state))
-    
-print(network.eval_state(initial_state))
+print(" "*20 + "Depth 2")
+print(network.n_ply(2, session, boards, 1))

+# #print(x.shape)
+# with graph_lol.as_default():
+#     session_2 = tf.Session(graph = graph_lol)
+#     network_2 = Network(session_2)
+#     network_2.restore_model()
+#     print(network_2.eval_state(initial_state))
    
+# print(network.eval_state(initial_state))