tesauro fat and diffs in values

2018-05-22 15:39:14 +02:00 · 2018-05-22 15:39:14 +02:00 · f170bad9b1
commit f170bad9b1
parent 6e061171da
2 changed files with 6 additions and 18 deletions
--- a/board.py
+++ b/board.py
@ -268,23 +268,13 @@ class Board:
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
            #print("initial board: ", board)
            #print("roll:", roll)
            #print("Rest of roll:",roll[1:])
            boards = calc_moves(board, roll[0])
            #print("Boards:",boards)
            #print("Roll:",roll[0])
            #print("boards after first die: ", boards)
            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]
-                # What the fuck
+
                #for board in boards:
                #    print(board)
                #    print("type__:",type(board))
                # Add resulting unique boards to set of legal boards resulting from roll
                #print("printing boards from calculate_legal_states: ", boards)
--- a/network.py
+++ b/network.py
@ -114,15 +114,14 @@ class Network:
        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
        grads = tape.gradient(value, self.model.variables)
        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
-        with tf.variable_scope('apply_gradients'):
+        for grad, train_var in zip(grads, self.model.variables):
-            for grad, train_var in zip(grads, self.model.variables):
+            backprop_calc = self.learning_rate * difference_in_values * grad
-                backprop_calc = self.learning_rate * difference_in_values * grad
+            train_var.assign_add(backprop_calc)
                train_var.assign_add(backprop_calc)
@ -299,7 +298,7 @@ class Network:
        length_list = []
        test_list = []
        # Prepping of data
-        start = time.time()
+        # start = time.time()
        for board in boards:
            length = 0
            for roll in all_rolls:
@ -478,7 +477,6 @@ class Network:
        for episode in range(1, episodes + 1):
            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
            # TODO decide which player should be here
            # player = 1
            player = random.choice([-1,1])