Yet another input argument, "--ply", 0 for no look-ahead, 1 for a single

look-ahead.
2018-05-10 23:22:41 +02:00 · 2018-05-10 23:22:41 +02:00 · 504308a9af
commit 504308a9af
parent 3b57c10b5a
2 changed files with 24 additions and 12 deletions
--- a/main.py
+++ b/main.py
@ -40,6 +40,8 @@ parser.add_argument('--use-baseline', action='store_true',
                    help='use the baseline model, note, has size 28')
 parser.add_argument('--verbose', action='store_true',
                    help='If set, a lot of stuff will be printed')
+parser.add_argument('--ply', action='store', dest='ply',
+                    help='defines the amount of ply used when deciding what move to make')

 args = parser.parse_args()

@ -64,7 +66,8 @@ config = {
    'force_creation': args.force_creation,
    'use_baseline': args.use_baseline,
    'global_step': 0,
-    'verbose': args.verbose
+    'verbose': args.verbose,
+    'ply': args.ply

 }

--- a/network.py
+++ b/network.py
@ -23,6 +23,7 @@ class Network:
        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
    }

+
    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))

@ -31,6 +32,12 @@ class Network:
        :param config:
        :param name:
        """
+
+        move_options = {
+            '1': self.make_move_1_ply,
+            '0': self.make_move_0_ply
+        }
+
        tf.enable_eager_execution()

        xavier_init = tf.contrib.layers.xavier_initializer()
@ -40,6 +47,10 @@ class Network:

        self.name = name

+        self.make_move = move_options[
+            self.config['ply']
+        ]
+
        # Set board representation from config
        self.input_size, self.board_trans_func = Network.board_reps[
            self.config['board_representation']
@ -191,7 +202,7 @@ class Network:



-    def make_move(self, board, roll, player):
+    def make_move_0_ply(self, board, roll, player):
        """
        Find the best move given a board, roll and a player, by finding all possible states one can go to
        and then picking the best, by using the network to evaluate each state. The highest score is picked
@ -218,17 +229,16 @@ class Network:

        return [best_move, best_score]

-    def make_move_n_ply(self, sess, board, roll, player, n = 1):
+    def make_move_1_ply(self, board, roll, player):
        """
-
-        :param sess:
        :param board:
        :param roll:
        :param player:
-        :param n:
        :return:
        """
-        best_pair = self.calc_n_ply(n, sess, board, player, roll)
+        start = time.time()
+        best_pair = self.calculate_1_ply(board, roll, player)
+        print(time.time() - start)
        return best_pair


@ -303,7 +313,7 @@ class Network:

        all_rolls = gen_21_rolls()

-        start = time.time()
+        # start = time.time()

        list_of_moves = []

@ -318,9 +328,8 @@ class Network:
            list_of_moves.append(np.array(all_board_moves))


-        print(time.time() - start)
-
-        start = time.time()
+        # print(time.time() - start)
+        # start = time.time()

        # Running data through networks
        all_scores = [self.model.predict_on_batch(board) for board in list_of_moves]
@ -328,7 +337,7 @@ class Network:

        transformed_means = [x if player == 1 else (1-x) for x in scores_means]

-        print(time.time() - start)
+        # print(time.time() - start)
        return ([scores_means, transformed_means])