diff --git a/network.py b/network.py index ebe7d8f..65d7e9e 100644 --- a/network.py +++ b/network.py @@ -190,7 +190,10 @@ class Network: """ + # find all legal states from the given board and the given roll init_legal_states = Board.calculate_legal_states(board, player, roll) + + # find all values for the above boards zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states] # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck. @@ -201,6 +204,7 @@ class Network: # player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize. if player == 1: best_fifteen.reverse() + best_fifteen_boards = [x[0] for x in best_fifteen[:15]] all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player) @@ -331,25 +335,37 @@ class Network: all_rolls = gen_21_rolls() all_rolls_scores = [] + + # loop over boards for a_board in boards: a_board_scores = [] + + # loop over all rolls, for each board for roll in all_rolls: + + # find all states we can get to, given the board and roll and the opposite player all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll) + # find scores for each board found above spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1)) for new_board in all_rolls_boards] - # We need 1-score for the -1 player + # if the original player is the -1 player, then we need to find (1-value) spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores] + # find the best score best_score = max(spec_roll_scores) + # append the best score to a_board_scores, where we keep track of the best score for each board a_board_scores.append(best_score) + # save the expected average of board scores all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores)) + # return all the average scores return all_rolls_scores + def eval(self, episode_count, trained_eps = 0, tf_session = None): """ Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval @@ -545,3 +561,5 @@ class Network: writer.close() return outcomes + + diff --git a/network_test.py b/network_test.py index 01ceba5..fb343aa 100644 --- a/network_test.py +++ b/network_test.py @@ -3,30 +3,56 @@ import tensorflow as tf import random import numpy as np + +from board import Board + +import main + +config = main.config.copy() +config['model'] = "tesauro_blah" +config['force_creation'] = True +network = Network(config, config['model']) + session = tf.Session() -graph_lol = tf.Graph() + +session.run(tf.global_variables_initializer()) +network.restore_model(session) +initial_state = Board.initial_state + +initial_state_1 = ( 0, + 0, 0, 0, 2, 0, -5, + 0, -3, 0, 0, 0, 0, + -5, 0, 0, 0, 3, 5, + 0, 0, 0, 0, 5, -2, + 0 ) + +initial_state_2 = ( 0, + -5, -5, -3, -2, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 15, 0, 0, + 0, 0, 0, 0, 0, 0, + 0 ) + +boards = {initial_state, + initial_state_1, + initial_state_2 } +print("-"*30) +print(network.do_ply(session, boards, 1)) -network = Network(session) +print(" "*10 + "network_test") +print(" "*20 + "Depth 1") +print(network.n_ply(1, session, boards, 1)) -initial_state = np.array(( 0, - 2, 0, 0, 0, 0, -5, - 0, -3, 0, 0, 0, 5, - -5, 0, 0, 0, 3, 0, - 5, 0, 0, 0, 0, -2, - 0 )).reshape((1,26)) +print(" "*20 + "Depth 2") +print(network.n_ply(2, session, boards, 1)) + +# #print(x.shape) +# with graph_lol.as_default(): +# session_2 = tf.Session(graph = graph_lol) +# network_2 = Network(session_2) +# network_2.restore_model() +# print(network_2.eval_state(initial_state)) - - - -#print(x.shape) -with graph_lol.as_default(): - session_2 = tf.Session(graph = graph_lol) - network_2 = Network(session_2) - network_2.restore_model() - print(network_2.eval_state(initial_state)) - -print(network.eval_state(initial_state)) - - +# print(network.eval_state(initial_state))