added network_test and some comments

2018-04-29 12:14:14 +02:00 · 2018-04-29 12:14:14 +02:00 · 3f6849048e
commit 3f6849048e
parent afa6504b05
2 changed files with 66 additions and 22 deletions
--- a/network.py
+++ b/network.py
@ -190,7 +190,10 @@ class Network:
        """
        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
        # find all values for the above boards
        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
@ -201,6 +204,7 @@ class Network:
        # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
        if player == 1:
            best_fifteen.reverse()
        best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
        all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
@ -331,25 +335,37 @@ class Network:
        all_rolls = gen_21_rolls()
        all_rolls_scores = []
        # loop over boards
        for a_board in boards:
            a_board_scores = []
            # loop over all rolls, for each board
            for roll in all_rolls:
                # find all states we can get to, given the board and roll and the opposite player
                all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
                # find scores for each board found above
                spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
                                    for new_board in all_rolls_boards]
-                # We need 1-score for the -1 player
+                # if the original player is the -1 player, then we need to find (1-value)
                spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
                # find the best score
                best_score = max(spec_roll_scores)
                # append the best score to a_board_scores, where we keep track of the best score for each board
                a_board_scores.append(best_score)
            # save the expected average of board scores
            all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
        # return all the average scores
        return all_rolls_scores
    def eval(self, episode_count, trained_eps = 0, tf_session = None):
        """
        Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
@ -545,3 +561,5 @@ class Network:
            writer.close()
            return outcomes
--- a/network_test.py
+++ b/network_test.py
@ -3,30 +3,56 @@ import tensorflow as tf
 import random
 import numpy as np
 from board import Board
 import main
 config = main.config.copy()
 config['model'] = "tesauro_blah"
 config['force_creation'] = True
 network = Network(config, config['model'])
 session = tf.Session()
-graph_lol = tf.Graph()
+
 session.run(tf.global_variables_initializer())
 network.restore_model(session)
 initial_state = Board.initial_state
 initial_state_1 = ( 0,
                    0, 0, 0, 2, 0, -5,
                    0, -3, 0, 0, 0, 0,
                    -5, 0, 0, 0, 3, 5,
                    0, 0, 0, 0, 5, -2,
                    0 )
 initial_state_2 = ( 0,
                    -5, -5, -3, -2, 0, 0,
                    0, 0, 0, 0, 0, 0,
                    0, 0, 0, 15, 0, 0,
                    0, 0, 0, 0, 0, 0,
                    0 )
 boards = {initial_state,
          initial_state_1,
          initial_state_2 }
 print("-"*30)
 print(network.do_ply(session, boards, 1))
-network = Network(session)
+print(" "*10 + "network_test")
 print(" "*20 + "Depth 1")
 print(network.n_ply(1, session, boards, 1))
-initial_state = np.array(( 0,
+print(" "*20 + "Depth 2")
-                  2, 0, 0, 0, 0, -5,
+print(network.n_ply(2, session, boards, 1))
                  0, -3, 0, 0, 0, 5,
                  -5, 0, 0, 0, 3, 0,
                  5, 0, 0, 0, 0, -2,
                  0 )).reshape((1,26))
 #print(x.shape)
 with graph_lol.as_default():
    session_2 = tf.Session(graph = graph_lol)
    network_2 = Network(session_2)
    network_2.restore_model()
    print(network_2.eval_state(initial_state))
 print(network.eval_state(initial_state))
 # #print(x.shape)
 # with graph_lol.as_default():
 #     session_2 = tf.Session(graph = graph_lol)
 #     network_2 = Network(session_2)
 #     network_2.restore_model()
 #     print(network_2.eval_state(initial_state))
 # print(network.eval_state(initial_state))