Updated server code.

restore restore_model
tesauro fat and diffs in values
2018-06-07 21:36:06 +02:00 · 2018-05-22 20:49:10 +02:00 · 2018-05-22 15:39:14 +02:00 · 2018-05-22 15:38:04 +02:00 · 2018-05-22 15:36:23 +02:00 · 2018-05-22 13:16:10 +00:00
7 changed files with 75 additions and 45 deletions
--- a/app.py
+++ b/app.py
@ -2,6 +2,7 @@ from flask import Flask, request, jsonify
 from flask_json import FlaskJSON, as_json_p
 from flask_cors import CORS
 from board import Board
 from eval import Eval
 import main
 import random
 from network import Network
@ -17,8 +18,8 @@ CORS(app)
 config = main.config.copy()
 config['model'] = "player_testings"
-config['ply'] = "1"
+config['ply'] = "0"
-config['board_representation'] = 'quack-fat'
+config['board_representation'] = 'tesauro'
 network = Network(config, config['model'])
 network.restore_model()
@ -90,11 +91,16 @@ def bot_move():
    data = request.get_json(force=True)
    board = [int(x) for x in data['board'].split(',')]
    use_pubeval = bool(data['pubeval'])
-    roll = (random.randrange(1,7), random.randrange(1,7))
+    roll = (random.randrange(1, 7), random.randrange(1, 7))
-    # print(roll)
+
-    board, _ = network.make_move(tuple(board), roll, 1)
+    if use_pubeval:
-    # print("Boards!:",board)
+        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
    else:
        board, _ = network.make_move(tuple(board), roll, 1)
    # print("Board!:",board)
    return ",".join([str(x) for x in list(board)])
--- a/bin/0-ply-tests.rb
+++ b/bin/0-ply-tests.rb
@ -50,7 +50,7 @@ run_stuff(board_rep, model_name, ply)
 board_rep = "quack-norm"
 model_name = "quack-norm_test_0_ply"
-ply = 1
+ply = 0
 run_stuff(board_rep, model_name, ply)
@ -67,7 +67,7 @@ run_stuff(board_rep, model_name, ply)
 board_rep = "tesauro"
 model_name = "tesauro_test_0_ply"
-ply = 1
+ply = 0
 run_stuff(board_rep, model_name, ply)
--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -1,30 +1,30 @@
 #!/usr/bin/env ruby
 MODELS_DIR = 'models'
 def save(model_name)
  require 'date'
-  models_dir = 'models'
+  model_path = File.join(MODELS_DIR, model_name)
  model_path = File.join(models_dir, model_name)
  if not File.exists? model_path then
    return false
  end
  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i
  puts "Found model #{model_name} with episodes #{episode_count} trained!"
  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
-  save_path = File.join(models_dir, 'saves', file_name)
+  save_path = File.join(MODELS_DIR, 'saves', file_name)
  puts "Saving to #{save_path}"
-  system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
+  system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
  return true
 end
 def train(model, episodes)
  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
 end
 def force_train(model, episodes)
  system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
 end
 def evaluate(model, episodes, method)
  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
 end
@ -33,11 +33,9 @@ model = ARGV[0]
 if model.nil? then raise "no model specified" end
-while true do
+if not File.exists? File.join(MODELS_DIR, model) then
  force_train model, 10
  save model
  train model, 1000
  save model
  train model, 1000
  3.times do
    evaluate model, 250, "pubeval"
  end
@ -45,3 +43,27 @@ while true do
    evaluate model, 250, "dumbeval"
  end
 end
 # while true do
 #   save model
 #   train model, 1000
 #   save model
 #   train model, 1000
 #   3.times do
 #     evaluate model, 250, "pubeval"
 #   end
 #   3.times do
 #     evaluate model, 250, "dumbeval"
 #   end
 # end
 while true do
  save model
  train model, 500
  5.times do
    evaluate model, 250, "pubeval"
  end
  5.times do
    evaluate model, 250, "dumbeval"
  end
 end
--- a/board.py
+++ b/board.py
@ -16,8 +16,6 @@ class Board:
        return quack.idxs_with_checkers_of_player(board, player)
    # TODO: Write a test for this
    # TODO: Make sure that the bars fit, 0 represents the -1 player and 25 represents the 1 player
    # index 26 is player 1 home, index 27 is player -1 home
    @staticmethod
    def board_features_to_pubeval(board, player):
@ -249,9 +247,6 @@ class Board:
        # Find all points with checkers on them belonging to the player
        # Iterate through each index and check if it's a possible move given the roll
        # TODO: make sure that it is not possible to do nothing on first part of
        #       turn and then do something with the second die
        def calc_moves(board, face_value):
            if face_value == 0:
                return [board]
@ -273,23 +268,13 @@ class Board:
        # print("Dice permuts:",dice_permutations)
        for roll in dice_permutations:
            # Calculate boards resulting from first move
            #print("initial board: ", board)
            #print("roll:", roll)
            #print("Rest of roll:",roll[1:])
            boards = calc_moves(board, roll[0])
            #print("Boards:",boards)
            #print("Roll:",roll[0])
            #print("boards after first die: ", boards)
            for die in roll[1:]:
                # Calculate boards resulting from second move
                nested_boards = [calc_moves(board, die) for board in boards]
                #print("nested boards: ", nested_boards)
                boards = [board for boards in nested_boards for board in boards]
-                # What the fuck
+
                #for board in boards:
                #    print(board)
                #    print("type__:",type(board))
                # Add resulting unique boards to set of legal boards resulting from roll
                #print("printing boards from calculate_legal_states: ", boards)
--- a/main.py
+++ b/main.py
@ -209,6 +209,8 @@ if __name__ == "__main__":
    elif args.eval:
        network = Network(config, config['model'])
        network.restore_model()
        for i in range(int(config['repeat_eval'])):
            start_episode = network.episodes_trained
            # Evaluation measures are described in `config`
--- a/network.py
+++ b/network.py
@ -114,15 +114,14 @@ class Network:
        with tf.GradientTape() as tape:
            value = self.model(prev_state.reshape(1,-1))
        grads = tape.gradient(value, self.model.variables)
        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
-        with tf.variable_scope('apply_gradients'):
+        for grad, train_var in zip(grads, self.model.variables):
-            for grad, train_var in zip(grads, self.model.variables):
+            backprop_calc = self.learning_rate * difference_in_values * grad
-                backprop_calc = self.learning_rate * difference_in_values * grad
+            train_var.assign_add(backprop_calc)
                train_var.assign_add(backprop_calc)
@ -299,7 +298,7 @@ class Network:
        length_list = []
        test_list = []
        # Prepping of data
-        start = time.time()
+        # start = time.time()
        for board in boards:
            length = 0
            for roll in all_rolls:
@ -478,7 +477,6 @@ class Network:
        for episode in range(1, episodes + 1):
            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
            # TODO decide which player should be here
            # player = 1
            player = random.choice([-1,1])
--- a/test.py
+++ b/test.py
@ -737,6 +737,23 @@ class TestBoardFlip(unittest.TestCase):
        self.assertTrue((Board.board_features_tesauro(board, 1) ==
                         np.array(expected).reshape(1, 198)).all())
    def test_pubeval_features(self):
        board = Board.initial_state
        expected = (0,
                    2, 0, 0, 0, 0, -5,
                    0, -3, 0, 0, 0, 5,
                    -5, 0, 0, 0, 3, 0,
                    5, 0, 0, 0, 0, -2,
                    0,
                    0, 0)
        import numpy as np
        self.assertTrue((Board.board_features_to_pubeval(board, 1) ==
                         np.array(expected).reshape(1, 28)).all())
        self.assertTrue((Board.board_features_to_pubeval(board, -1) ==
                         np.array(expected).reshape(1, 28)).all())
    def test_tesauro_bars(self):
        board = list(Board.initial_state)
        board[1] = 0
Author	SHA1	Message	Date
Alexander Munch-Hansen	ea4efc5a2b	Updated server code.	2018-06-07 21:36:06 +02:00
Christoffer Müller Madsen	26c0b469eb	restore restore_model	2018-05-22 20:49:10 +02:00
Alexander Munch-Hansen	f170bad9b1	tesauro fat and diffs in values	2018-05-22 15:39:14 +02:00
Christoffer Müller Madsen	6e061171da	rm TODO	2018-05-22 15:38:04 +02:00
Christoffer Müller Madsen	40c228ef01	pubeval tests	2018-05-22 15:36:23 +02:00
Christoffer Müller Madsen	c2c6c89e9f	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!8	2018-05-22 13:16:10 +00:00
Christoffer Müller Madsen	b7708b3675	train-evaluate-save	2018-05-22 15:15:36 +02:00
Christoffer Müller Madsen	bad870c27a	update 0-ply-tests	2018-05-22 15:15:15 +02:00