Updated server code.

restore restore_model
tesauro fat and diffs in values
2018-06-07 21:36:06 +02:00 · 2018-05-22 20:49:10 +02:00 · 2018-05-22 15:39:14 +02:00 · 2018-05-22 15:38:04 +02:00 · 2018-05-22 15:36:23 +02:00 · 2018-05-22 13:16:10 +00:00
7 changed files with 75 additions and 45 deletions
--- a/app.py
+++ b/app.py
@ -2,6 +2,7 @@ from flask import Flask, request, jsonify
 from flask_json import FlaskJSON, as_json_p
 from flask_cors import CORS
 from board import Board
+from eval import Eval
 import main
 import random
 from network import Network
@ -17,8 +18,8 @@ CORS(app)

 config = main.config.copy()
 config['model'] = "player_testings"
-config['ply'] = "1"
-config['board_representation'] = 'quack-fat'
+config['ply'] = "0"
+config['board_representation'] = 'tesauro'
 network = Network(config, config['model'])

 network.restore_model()
@ -90,11 +91,16 @@ def bot_move():
    data = request.get_json(force=True)

    board = [int(x) for x in data['board'].split(',')]
+    use_pubeval = bool(data['pubeval'])

-    roll = (random.randrange(1,7), random.randrange(1,7))
-    # print(roll)
-    board, _ = network.make_move(tuple(board), roll, 1)
-    # print("Boards!:",board)
+    roll = (random.randrange(1, 7), random.randrange(1, 7))
+
+    if use_pubeval:
+        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
+    else:
+        board, _ = network.make_move(tuple(board), roll, 1)
+
+    # print("Board!:",board)

    return ",".join([str(x) for x in list(board)])

--- a/bin/0-ply-tests.rb
+++ b/bin/0-ply-tests.rb
@ -50,7 +50,7 @@ run_stuff(board_rep, model_name, ply)

 board_rep = "quack-norm"
 model_name = "quack-norm_test_0_ply"
-ply = 1
+ply = 0

 run_stuff(board_rep, model_name, ply)

@ -67,7 +67,7 @@ run_stuff(board_rep, model_name, ply)

 board_rep = "tesauro"
 model_name = "tesauro_test_0_ply"
-ply = 1
+ply = 0

 run_stuff(board_rep, model_name, ply)

--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -1,30 +1,30 @@
 #!/usr/bin/env ruby
+MODELS_DIR = 'models'
+
 def save(model_name)
  require 'date'

-  models_dir = 'models'
-  model_path = File.join(models_dir, model_name)
-  if not File.exists? model_path then
-    return false
-  end
+  model_path = File.join(MODELS_DIR, model_name)

  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i

  puts "Found model #{model_name} with episodes #{episode_count} trained!"

  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
-  save_path = File.join(models_dir, 'saves', file_name)
+  save_path = File.join(MODELS_DIR, 'saves', file_name)
  puts "Saving to #{save_path}"
  
-  system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
-
-  return true
+  system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
 end

 def train(model, episodes)
  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
 end

+def force_train(model, episodes)
+  system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
+end
+
 def evaluate(model, episodes, method)
  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
 end
@ -33,11 +33,9 @@ model = ARGV[0]

 if model.nil? then raise "no model specified" end

-while true do
+if not File.exists? File.join(MODELS_DIR, model) then
+  force_train model, 10
  save model
-  train model, 1000
-  save model
-  train model, 1000
  3.times do
    evaluate model, 250, "pubeval"
  end
@ -45,3 +43,27 @@ while true do
    evaluate model, 250, "dumbeval"
  end
 end
+
+# while true do
+#   save model
+#   train model, 1000
+#   save model
+#   train model, 1000
+#   3.times do
+#     evaluate model, 250, "pubeval"
+#   end
+#   3.times do
+#     evaluate model, 250, "dumbeval"
+#   end
+# end
+
+while true do
+  save model
+  train model, 500
+  5.times do
+    evaluate model, 250, "pubeval"
+  end
+  5.times do
+    evaluate model, 250, "dumbeval"
+  end
+end
--- a/board.py
+++ b/board.py
@ -15,9 +15,7 @@ class Board:
    def idxs_with_checkers_of_player(board, player):
        return quack.idxs_with_checkers_of_player(board, player)

-
-    # TODO: Write a test for this
-    # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
+    
    # index 26 is player 1 home, index 27 is player -1 home
    @staticmethod
    def board_features_to_pubeval(board, player):
@ -249,9 +247,6 @@ class Board:
        # Find all points with checkers on them belonging to the player
        # Iterate through each index and check if it's a possible move given the roll

-        # TODO: make sure that it is not possible to do nothing on first part of
-        #       turn and then do something with the second die
-        
        def calc_moves(board, face_value):
            if face_value == 0:
                return [board]
@ -273,23 +268,13 @@ class Board:
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
-            #print("initial board: ", board)
-            #print("roll:", roll)
-            #print("Rest of roll:",roll[1:])
            boards = calc_moves(board, roll[0])
-            #print("Boards:",boards)
-            #print("Roll:",roll[0])
-            #print("boards after first die: ", boards)

            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
-                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]
-                # What the fuck
-                #for board in boards:
-                #    print(board)
-                #    print("type__:",type(board))
+
                # Add resulting unique boards to set of legal boards resulting from roll

                #print("printing boards from calculate_legal_states: ", boards)
--- a/main.py
+++ b/main.py
@ -209,6 +209,8 @@ if __name__ == "__main__":

    elif args.eval:
        network = Network(config, config['model'])
+        network.restore_model()
+
        for i in range(int(config['repeat_eval'])):
            start_episode = network.episodes_trained
            # Evaluation measures are described in `config`
--- a/network.py
+++ b/network.py
@ -114,15 +114,14 @@ class Network:

        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
+
        grads = tape.gradient(value, self.model.variables)

        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
-        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))

-        with tf.variable_scope('apply_gradients'):
-            for grad, train_var in zip(grads, self.model.variables):
-                backprop_calc = self.learning_rate * difference_in_values * grad
-                train_var.assign_add(backprop_calc)
+        for grad, train_var in zip(grads, self.model.variables):
+            backprop_calc = self.learning_rate * difference_in_values * grad
+            train_var.assign_add(backprop_calc)



@ -299,7 +298,7 @@ class Network:
        length_list = []
        test_list = []
        # Prepping of data
-        start = time.time()
+        # start = time.time()
        for board in boards:
            length = 0
            for roll in all_rolls:
@ -478,7 +477,6 @@ class Network:
        for episode in range(1, episodes + 1):

            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
-            # TODO decide which player should be here

            # player = 1
            player = random.choice([-1,1])
--- a/test.py
+++ b/test.py
@ -737,6 +737,23 @@ class TestBoardFlip(unittest.TestCase):
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())

+    def test_pubeval_features(self):
+        board = Board.initial_state
+
+        expected = (0,
+                    2, 0, 0, 0, 0, -5,
+                    0, -3, 0, 0, 0, 5,
+                    -5, 0, 0, 0, 3, 0,
+                    5, 0, 0, 0, 0, -2,
+                    0,
+                    0, 0)
+
+        import numpy as np
+        self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
+                         np.array(expected).reshape(1, 28)).all())
+        self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
+                         np.array(expected).reshape(1, 28)).all())
+
    def test_tesauro_bars(self):
        board = list(Board.initial_state)
        board[1] = 0
Author	SHA1	Message	Date
Alexander Munch-Hansen	ea4efc5a2b	Updated server code.	2018-06-07 21:36:06 +02:00
Christoffer Müller Madsen	26c0b469eb	restore restore_model	2018-05-22 20:49:10 +02:00
Alexander Munch-Hansen	f170bad9b1	tesauro fat and diffs in values	2018-05-22 15:39:14 +02:00
Christoffer Müller Madsen	6e061171da	rm TODO	2018-05-22 15:38:04 +02:00
Christoffer Müller Madsen	40c228ef01	pubeval tests	2018-05-22 15:36:23 +02:00
Christoffer Müller Madsen	c2c6c89e9f	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!8	2018-05-22 13:16:10 +00:00
Christoffer Müller Madsen	b7708b3675	train-evaluate-save	2018-05-22 15:15:36 +02:00
Christoffer Müller Madsen	bad870c27a	update 0-ply-tests	2018-05-22 15:15:15 +02:00