added network_test and some comments

This commit is contained in:
Alexander Munch-Hansen 2018-04-29 12:14:14 +02:00
parent afa6504b05
commit 3f6849048e
2 changed files with 66 additions and 22 deletions

View File

@ -190,7 +190,10 @@ class Network:
"""
# find all legal states from the given board and the given roll
init_legal_states = Board.calculate_legal_states(board, player, roll)
# find all values for the above boards
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
@ -201,6 +204,7 @@ class Network:
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
if player == 1:
best_fifteen.reverse()
best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
@ -331,25 +335,37 @@ class Network:
all_rolls = gen_21_rolls()
all_rolls_scores = []
# loop over boards
for a_board in boards:
a_board_scores = []
# loop over all rolls, for each board
for roll in all_rolls:
# find all states we can get to, given the board and roll and the opposite player
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
# find scores for each board found above
spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
for new_board in all_rolls_boards]
# We need 1-score for the -1 player
# if the original player is the -1 player, then we need to find (1-value)
spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
# find the best score
best_score = max(spec_roll_scores)
# append the best score to a_board_scores, where we keep track of the best score for each board
a_board_scores.append(best_score)
# save the expected average of board scores
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
# return all the average scores
return all_rolls_scores
def eval(self, episode_count, trained_eps = 0, tf_session = None):
"""
Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
@ -545,3 +561,5 @@ class Network:
writer.close()
return outcomes

View File

@ -3,30 +3,56 @@ import tensorflow as tf
import random
import numpy as np
from board import Board
import main
config = main.config.copy()
config['model'] = "tesauro_blah"
config['force_creation'] = True
network = Network(config, config['model'])
session = tf.Session()
graph_lol = tf.Graph()
session.run(tf.global_variables_initializer())
network.restore_model(session)
initial_state = Board.initial_state
initial_state_1 = ( 0,
0, 0, 0, 2, 0, -5,
0, -3, 0, 0, 0, 0,
-5, 0, 0, 0, 3, 5,
0, 0, 0, 0, 5, -2,
0 )
initial_state_2 = ( 0,
-5, -5, -3, -2, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 15, 0, 0,
0, 0, 0, 0, 0, 0,
0 )
boards = {initial_state,
initial_state_1,
initial_state_2 }
print("-"*30)
print(network.do_ply(session, boards, 1))
network = Network(session)
print(" "*10 + "network_test")
print(" "*20 + "Depth 1")
print(network.n_ply(1, session, boards, 1))
initial_state = np.array(( 0,
2, 0, 0, 0, 0, -5,
0, -3, 0, 0, 0, 5,
-5, 0, 0, 0, 3, 0,
5, 0, 0, 0, 0, -2,
0 )).reshape((1,26))
#print(x.shape)
with graph_lol.as_default():
session_2 = tf.Session(graph = graph_lol)
network_2 = Network(session_2)
network_2.restore_model()
print(network_2.eval_state(initial_state))
print(network.eval_state(initial_state))
print(" "*20 + "Depth 2")
print(network.n_ply(2, session, boards, 1))
# #print(x.shape)
# with graph_lol.as_default():
# session_2 = tf.Session(graph = graph_lol)
# network_2 = Network(session_2)
# network_2.restore_model()
# print(network_2.eval_state(initial_state))
# print(network.eval_state(initial_state))