From f170bad9b1cdc853a0a5a080bd1d70eb1f9fd1b8 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Tue, 22 May 2018 15:39:14 +0200 Subject: [PATCH] tesauro fat and diffs in values --- board.py | 12 +----------- network.py | 12 +++++------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/board.py b/board.py index c341a7a..c32340d 100644 --- a/board.py +++ b/board.py @@ -268,23 +268,13 @@ class Board: # print("Dice permuts:",dice_permutations) for roll in dice_permutations: # Calculate boards resulting from first move - #print("initial board: ", board) - #print("roll:", roll) - #print("Rest of roll:",roll[1:]) boards = calc_moves(board, roll[0]) - #print("Boards:",boards) - #print("Roll:",roll[0]) - #print("boards after first die: ", boards) for die in roll[1:]: # Calculate boards resulting from second move nested_boards = [calc_moves(board, die) for board in boards] - #print("nested boards: ", nested_boards) boards = [board for boards in nested_boards for board in boards] - # What the fuck - #for board in boards: - # print(board) - # print("type__:",type(board)) + # Add resulting unique boards to set of legal boards resulting from roll #print("printing boards from calculate_legal_states: ", boards) diff --git a/network.py b/network.py index 1dbbbc1..a1b5149 100644 --- a/network.py +++ b/network.py @@ -114,15 +114,14 @@ class Network: with tf.GradientTape() as tape: value = self.model(prev_state.reshape(1,-1)) + grads = tape.gradient(value, self.model.variables) difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), []) - tf.summary.scalar("difference_in_values", tf.abs(difference_in_values)) - with tf.variable_scope('apply_gradients'): - for grad, train_var in zip(grads, self.model.variables): - backprop_calc = self.learning_rate * difference_in_values * grad - train_var.assign_add(backprop_calc) + for grad, train_var in zip(grads, self.model.variables): + backprop_calc = self.learning_rate * difference_in_values * grad + train_var.assign_add(backprop_calc) @@ -299,7 +298,7 @@ class Network: length_list = [] test_list = [] # Prepping of data - start = time.time() + # start = time.time() for board in boards: length = 0 for roll in all_rolls: @@ -478,7 +477,6 @@ class Network: for episode in range(1, episodes + 1): sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps)) - # TODO decide which player should be here # player = 1 player = random.choice([-1,1])