From 160f5bd737b2147d6b07dc1f4d8358f6795c4f0e Mon Sep 17 00:00:00 2001
From: Pownie <alexmunchhansen@gmail.com>
Date: Sun, 22 Apr 2018 19:13:46 +0200
Subject: [PATCH] added some comments and removed some old code

---
 network.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/network.py b/network.py
index 6c38216..5945eb9 100644
--- a/network.py
+++ b/network.py
@@ -1,5 +1,4 @@
 import tensorflow as tf
-from cup import Cup
 import numpy as np
 from board import Board
 import os
@@ -77,10 +76,6 @@ class Network:
 
         self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
 
-        # tf.reduce_sum basically finds the sum of its input, so this gives the
-        # difference between the two values, in case they should be lists, which
-        # they might be if our input changes
-
         # TODO: Alexander thinks that self.value will be computed twice (instead of once)
         difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
         tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
@@ -95,7 +90,6 @@ class Network:
 
         with tf.variable_scope('apply_gradients'):
             for gradient, trainable_var in zip(gradients, trainable_vars):
-                # Hopefully this is Δw_t = α(V_t+1 - V_t)▿_wV_t.
                 backprop_calc = self.learning_rate * difference_in_values * gradient
                 grad_apply = trainable_var.assign_add(backprop_calc)
                 apply_gradients.append(grad_apply)
@@ -148,6 +142,10 @@ class Network:
 
 
     def gen_21_rolls(self):
+        """
+        Calculate all possible rolls, [[1,1], [1,2] ..]
+        :return: All possible rolls
+        """
         a = []
         for x in range(1,7):
             for y in range(1,7):
@@ -187,7 +185,9 @@ class Network:
                 spec_roll_scores = []
                 all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
 
-                spec_roll_scores.append([self.eval_state(sess, self.board_trans_func(new_board, player*-1)) for new_board in all_rolls_boards])
+                spec_roll_scores.append(
+                    [self.eval_state(sess, self.board_trans_func(new_board, player*-1)) for new_board in all_rolls_boards]
+                )
 
                 best_score = max(spec_roll_scores)
 
@@ -201,8 +201,27 @@ class Network:
         return [best_board, max(all_rolls_scores)]
 
     def eval(self, episode_count, trained_eps = 0, tf_session = None):
+        """
+        Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
+        a model which has been given random weights, so it acts deterministically random.
+
+        :param episode_count: The amount of episodes to run
+        :param trained_eps:   The amount of episodes the model we want to evaluate, has trained
+        :param tf_session:
+        :return: outcomes:    The outcomes of the evaluation session
+        """
 
         def do_eval(sess, method, episodes = 1000, trained_eps = 0):
+            """
+            Do the actual evaluation
+
+            :param sess:
+            :param method:     Either pubeval or dumbeval
+            :param episodes:   Amount of episodes to use in the evaluation
+            :param trained_eps:
+            :return: outcomes : Described above
+            """
+
             start_time = time.time()
 
             def print_time_estimate(eps_completed):
@@ -337,6 +356,9 @@ class Network:
                                                                 (random.randrange(1, 7), random.randrange(1, 7)),
                                                                 player)
 
+                    # print("The evaluation of the previous state:\n", self.eval_state(sess, self.board_trans_func(prev_board, player)))
+                    # print("The evaluation of the current_state:\n", cur_board_value)
+
                     # adjust weights
                     sess.run(self.training_op,
                              feed_dict={self.x: self.board_trans_func(prev_board, player),