added network_test and some comments
This commit is contained in:
parent
afa6504b05
commit
3f6849048e
20
network.py
20
network.py
|
@ -190,7 +190,10 @@ class Network:
|
|||
|
||||
"""
|
||||
|
||||
# find all legal states from the given board and the given roll
|
||||
init_legal_states = Board.calculate_legal_states(board, player, roll)
|
||||
|
||||
# find all values for the above boards
|
||||
zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
|
||||
|
||||
# pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
|
||||
|
@ -201,6 +204,7 @@ class Network:
|
|||
# player 1 wishes to maximize. It's not needed for player -1, since that player seeks to minimize.
|
||||
if player == 1:
|
||||
best_fifteen.reverse()
|
||||
|
||||
best_fifteen_boards = [x[0] for x in best_fifteen[:15]]
|
||||
|
||||
all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
|
||||
|
@ -331,25 +335,37 @@ class Network:
|
|||
all_rolls = gen_21_rolls()
|
||||
|
||||
all_rolls_scores = []
|
||||
|
||||
# loop over boards
|
||||
for a_board in boards:
|
||||
a_board_scores = []
|
||||
|
||||
# loop over all rolls, for each board
|
||||
for roll in all_rolls:
|
||||
|
||||
# find all states we can get to, given the board and roll and the opposite player
|
||||
all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
|
||||
|
||||
# find scores for each board found above
|
||||
spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
|
||||
for new_board in all_rolls_boards]
|
||||
|
||||
# We need 1-score for the -1 player
|
||||
# if the original player is the -1 player, then we need to find (1-value)
|
||||
spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
|
||||
|
||||
# find the best score
|
||||
best_score = max(spec_roll_scores)
|
||||
|
||||
# append the best score to a_board_scores, where we keep track of the best score for each board
|
||||
a_board_scores.append(best_score)
|
||||
|
||||
# save the expected average of board scores
|
||||
all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
|
||||
|
||||
# return all the average scores
|
||||
return all_rolls_scores
|
||||
|
||||
|
||||
def eval(self, episode_count, trained_eps = 0, tf_session = None):
|
||||
"""
|
||||
Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
|
||||
|
@ -545,3 +561,5 @@ class Network:
|
|||
writer.close()
|
||||
|
||||
return outcomes
|
||||
|
||||
|
||||
|
|
|
@ -3,30 +3,56 @@ import tensorflow as tf
|
|||
import random
|
||||
import numpy as np
|
||||
|
||||
|
||||
from board import Board
|
||||
|
||||
import main
|
||||
|
||||
config = main.config.copy()
|
||||
config['model'] = "tesauro_blah"
|
||||
config['force_creation'] = True
|
||||
network = Network(config, config['model'])
|
||||
|
||||
session = tf.Session()
|
||||
graph_lol = tf.Graph()
|
||||
|
||||
session.run(tf.global_variables_initializer())
|
||||
network.restore_model(session)
|
||||
initial_state = Board.initial_state
|
||||
|
||||
initial_state_1 = ( 0,
|
||||
0, 0, 0, 2, 0, -5,
|
||||
0, -3, 0, 0, 0, 0,
|
||||
-5, 0, 0, 0, 3, 5,
|
||||
0, 0, 0, 0, 5, -2,
|
||||
0 )
|
||||
|
||||
initial_state_2 = ( 0,
|
||||
-5, -5, -3, -2, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 15, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0 )
|
||||
|
||||
boards = {initial_state,
|
||||
initial_state_1,
|
||||
initial_state_2 }
|
||||
|
||||
|
||||
print("-"*30)
|
||||
print(network.do_ply(session, boards, 1))
|
||||
|
||||
network = Network(session)
|
||||
print(" "*10 + "network_test")
|
||||
print(" "*20 + "Depth 1")
|
||||
print(network.n_ply(1, session, boards, 1))
|
||||
|
||||
initial_state = np.array(( 0,
|
||||
2, 0, 0, 0, 0, -5,
|
||||
0, -3, 0, 0, 0, 5,
|
||||
-5, 0, 0, 0, 3, 0,
|
||||
5, 0, 0, 0, 0, -2,
|
||||
0 )).reshape((1,26))
|
||||
|
||||
|
||||
|
||||
|
||||
#print(x.shape)
|
||||
with graph_lol.as_default():
|
||||
session_2 = tf.Session(graph = graph_lol)
|
||||
network_2 = Network(session_2)
|
||||
network_2.restore_model()
|
||||
print(network_2.eval_state(initial_state))
|
||||
|
||||
print(network.eval_state(initial_state))
|
||||
print(" "*20 + "Depth 2")
|
||||
print(network.n_ply(2, session, boards, 1))
|
||||
|
||||
# #print(x.shape)
|
||||
# with graph_lol.as_default():
|
||||
# session_2 = tf.Session(graph = graph_lol)
|
||||
# network_2 = Network(session_2)
|
||||
# network_2.restore_model()
|
||||
# print(network_2.eval_state(initial_state))
|
||||
|
||||
# print(network.eval_state(initial_state))
|
||||
|
|
Loading…
Reference in New Issue
Block a user