Updated server code.

restore restore_model
tesauro fat and diffs in values
2018-06-07 21:36:06 +02:00 · 2018-05-22 20:49:10 +02:00 · 2018-05-22 15:39:14 +02:00 · 2018-05-22 15:38:04 +02:00 · 2018-05-22 15:36:23 +02:00 · 2018-05-22 13:16:10 +00:00
14 changed files with 1452 additions and 501 deletions
--- a/app.py
+++ b/app.py
@ -0,0 +1,141 @@
+from flask import Flask, request, jsonify
+from flask_json import FlaskJSON, as_json_p
+from flask_cors import CORS
+from board import Board
+from eval import Eval
+import main
+import random
+from network import Network
+
+app = Flask(__name__)
+
+
+app.config['JSON_ADD_STATUS'] = False
+app.config['JSON_JSONP_OPTIONAL'] = False
+
+json = FlaskJSON(app)
+CORS(app)
+
+config = main.config.copy()
+config['model'] = "player_testings"
+config['ply'] = "0"
+config['board_representation'] = 'tesauro'
+network = Network(config, config['model'])
+
+network.restore_model()
+
+
+def calc_move_sets(from_board, roll, player):
+    board = from_board
+    sets = []
+    total = 0
+    for r in roll:
+        # print("Value of r:", r)
+        sets.append([Board.calculate_legal_states(board, player, [r, 0]), r])
+        total += r
+    sets.append([Board.calculate_legal_states(board, player, roll), total])
+    return sets
+
+
+def tmp_name(from_board, to_board, roll, player, total_moves, is_quad=False):
+    sets = calc_move_sets(from_board, roll, player)
+    return_board = from_board
+    print("To board:\n",to_board)
+    print("All sets:\n",sets)
+    for idx, board_set in enumerate(sets):
+        board_set[0] = list(board_set[0])
+        # print(to_board)
+        # print(board_set)
+        if to_board in board_set[0]:
+            # print("To board:", to_board)
+            # print(board_set[0])
+            # print(board_set[1])
+            total_moves -= board_set[1]
+            # if it's not the sum of the moves
+            if idx < (4 if is_quad else 2):
+                roll[idx] = 0
+            else:
+                roll = [0, 0]
+            return_board = to_board
+            break
+
+    # print("Return board!:\n",return_board)
+    return total_moves, roll, return_board
+
+def calc_move_stuff(from_board, to_board, roll, player, total_roll, is_quad):
+
+    total_moves, roll, board = tmp_name(from_board, to_board, list(roll), player, total_roll, is_quad)
+    return board, total_moves, roll
+
+
+@app.route('/get_board', methods=['GET'])
+@as_json_p
+def get_board():
+    return {'board':'0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0'}
+
+
+
+def check_move(prev, curr):
+
+    # TODO: Decide on player system and implement roll properly
+    legal_states = Board.calculate_legal_states(tuple(prev), -1, [1,2])
+
+    truth_list = [list(curr) == list(ele) for ele in legal_states]
+
+    return any(truth_list)
+
+
+
+@app.route('/bot_move', methods=['POST'])
+def bot_move():
+    data = request.get_json(force=True)
+
+    board = [int(x) for x in data['board'].split(',')]
+    use_pubeval = bool(data['pubeval'])
+
+    roll = (random.randrange(1, 7), random.randrange(1, 7))
+
+    if use_pubeval:
+        board, value = Eval.make_pubeval_move(tuple(board), 1, roll)
+    else:
+        board, _ = network.make_move(tuple(board), roll, 1)
+
+    # print("Board!:",board)
+
+    return ",".join([str(x) for x in list(board)])
+
+
+
+@app.route('/post_board', methods=['POST'])
+def post_board():
+    data = request.get_json(force=True)
+
+    # TODO: Fix hardcoded player
+    player = -1
+
+    board = [int(x) for x in data['board'].split(',')]
+    prev_board = [int(x) for x in data['prevBoard'].split(',')]
+    print(data['roll'])
+    roll = [int(x) for x in data['roll'].split(',')]
+    print(roll)
+    quad = data['quad'] == "true"
+
+
+    # print(board)
+
+    total_roll = int(data['totalRoll'])
+    print("total roll is:", total_roll)
+    return_board, total_moves, roll = calc_move_stuff(tuple(prev_board), tuple(board), tuple(roll), player, total_roll, quad)
+
+    str_board = ",".join([str(x) for x in return_board])
+    str_roll = ",".join([str(x) for x in roll])
+
+
+    return_string = str_board + "#" + str(total_moves) + "#" + str_roll
+
+    print(return_string)
+
+    return return_string
+
+if __name__ == '__main__':
+    app.run(host = '0.0.0.0', port=35270)
--- a/bin/0-ply-tests.rb
+++ b/bin/0-ply-tests.rb
@ -0,0 +1,78 @@
+def run_stuff(board_rep, model_name, ply)
+  epi_count = 0
+  system("python3 main.py --train --model #{model_name} --board-rep #{board_rep} --episodes 1 --ply #{ply}")
+  while epi_count < 200000 do
+    system("python3 main.py --eval --model #{model_name} --eval-methods dumbeval --episodes 250 --ply #{ply} --repeat-eval 3")
+    system("python3 main.py --eval --model #{model_name} --eval-methods pubeval --episodes 250 --ply #{ply} --repeat-eval 3")
+    system("python3 main.py --train --model #{model_name} --episodes 2000 --ply #{ply}")
+    epi_count += 2000
+  end
+end
+
+
+### ///////////////////////////////////////////////////////////////
+# QUACK TESTINGS
+### ///////////////////////////////////////////////////////////////
+
+board_rep = "quack"
+model_name = "quack_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+
+# board_rep = "quack"
+# model_name = "quack_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
+
+### ///////////////////////////////////////////////////////////////
+# QUACK-FAT TESTING
+### ///////////////////////////////////////////////////////////////
+
+board_rep = "quack-fat"
+model_name = "quack-fat_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+# board_rep = "quack-fat"
+# model_name = "quack-fat_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
+
+### ///////////////////////////////////////////////////////////////
+# QUACK-NORM TESTING
+### ///////////////////////////////////////////////////////////////
+
+
+board_rep = "quack-norm"
+model_name = "quack-norm_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+# board_rep = "quack-norm"
+# model_name = "quack-norm_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
+
+### ///////////////////////////////////////////////////////////////
+# TESAURO TESTING
+### ///////////////////////////////////////////////////////////////
+
+
+board_rep = "tesauro"
+model_name = "tesauro_test_0_ply"
+ply = 0
+
+run_stuff(board_rep, model_name, ply)
+
+# board_rep = "tesauro"
+# model_name = "tesauro_test_1_ply"
+# ply = 1
+
+# run_stuff(board_rep, model_name, ply)
--- a/bin/train-evaluate-save
+++ b/bin/train-evaluate-save
@ -1,30 +1,30 @@
 #!/usr/bin/env ruby
+MODELS_DIR = 'models'
+
 def save(model_name)
  require 'date'

-  models_dir = 'models'
-  model_path = File.join(models_dir, model_name)
-  if not File.exists? model_path then
-    return false
-  end
+  model_path = File.join(MODELS_DIR, model_name)

  episode_count = (File.read File.join(model_path, 'episodes_trained')).to_i

  puts "Found model #{model_name} with episodes #{episode_count} trained!"

  file_name = "model-#{model_name}-#{episode_count}-#{Time.now.strftime('%Y%m%d-%H%M%S')}.tar.gz"
-  save_path = File.join(models_dir, 'saves', file_name)
+  save_path = File.join(MODELS_DIR, 'saves', file_name)
  puts "Saving to #{save_path}"
  
-  system("tar", "-cvzf", save_path, "-C", models_dir, model_name)
-
-  return true
+  system("tar", "-cvzf", save_path, "-C", MODELS_DIR, model_name)
 end

 def train(model, episodes)
  system("python3", "main.py", "--train", "--model", model, "--episodes", episodes.to_s)
 end

+def force_train(model, episodes)
+  system("python3", "main.py", "--train", "--force-creation", "--model", model, "--episodes", episodes.to_s)
+end
+
 def evaluate(model, episodes, method)
  system("python3", "main.py", "--eval" , "--model", model, "--episodes", episodes.to_s, "--eval-methods", method)
 end
@ -33,11 +33,9 @@ model = ARGV[0]

 if model.nil? then raise "no model specified" end

-while true do
+if not File.exists? File.join(MODELS_DIR, model) then
+  force_train model, 10
  save model
-  train model, 1000
-  save model
-  train model, 1000
  3.times do
    evaluate model, 250, "pubeval"
  end
@ -45,3 +43,27 @@ while true do
    evaluate model, 250, "dumbeval"
  end
 end
+
+# while true do
+#   save model
+#   train model, 1000
+#   save model
+#   train model, 1000
+#   3.times do
+#     evaluate model, 250, "pubeval"
+#   end
+#   3.times do
+#     evaluate model, 250, "dumbeval"
+#   end
+# end
+
+while true do
+  save model
+  train model, 500
+  5.times do
+    evaluate model, 250, "pubeval"
+  end
+  5.times do
+    evaluate model, 250, "dumbeval"
+  end
+end
--- a/bot.py
+++ b/bot.py
@ -1,24 +1,8 @@
-from cup import Cup
-from network import Network
 from board import Board

-import tensorflow as tf
-import numpy as np
-import random
-
 class Bot:
-    def __init__(self, sym, config = None, name = "unnamed"):
-        self.config = config
-        self.cup = Cup()
+    def __init__(self, sym):
        self.sym = sym
-        self.graph = tf.Graph()
-
-        self.network = Network(config, name)
-        self.network.restore_model()
-
-    def restore_model(self):
-        with self.graph.as_default():
-            self.network.restore_model()

    def get_session(self):
        return self.session
@ -26,16 +10,60 @@ class Bot:
    def get_sym(self):
        return self.sym

-    def get_network(self):
-        return self.network

-    # TODO: DEPRECATE
-    def make_move(self, board, sym, roll):
-        # print(Board.pretty(board))
-        legal_moves = Board.calculate_legal_states(board, sym, roll)
-        moves_and_scores = [ (move, self.network.eval_state(np.array(move).reshape(1,26))) for move in legal_moves ]
-        scores = [ x[1] for x in moves_and_scores ]
-        best_move_pair = moves_and_scores[np.array(scores).argmax()]
-        #print("Found the best state, being:", np.array(move_scores).argmax())
-        return best_move_pair
+    def calc_move_sets(self, from_board, roll, player):
+        board = from_board
+        sets = []
+        total = 0
+        print("board!:",board)
+        for r in roll:
+            # print("Value of r:",r)
+            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
+            total += r
+        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
+        return sets
+
+
+    def handle_move(self, from_board, to_board, roll, player):
+
+        # print("Cur board:",board)
+        sets = self.calc_move_sets(from_board, roll, player)
+        for idx, board_set in enumerate(sets):
+            board_set[0] = list(board_set[0])
+            # print("My board_set:",board_set)
+            if to_board in [list(c) for c in board_set[0]]:
+                self.total_moves -= board_set[1]
+                if idx < 2:
+                    # print("Roll object:",self.roll)
+                    self.roll[idx] = 0
+                else:
+                    self.roll = [0,0]
+                break
+        print("Total moves left:",self.total_moves)
+
+
+    def tmp_name(self, from_board, to_board, roll, player, total_moves):
+        sets = self.calc_move_sets(from_board, roll, player)
+        return_board = from_board
+        for idx, board_set in enumerate(sets):
+            board_set = list(board_set[0])
+            if to_board in [list(board) for board in board_set]:
+                total_moves -= board_set[1]
+                # if it's not the sum of the moves
+                if idx < 2:
+                    roll[idx] = 0
+                else:
+                    roll = [0,0]
+                return_board = to_board
+                break
+        return total_moves, roll, return_board
+
+    def make_human_move(self, board, player, roll):
+        total_moves = roll[0] + roll[1]
+        previous_board = board
+        while total_moves != 0:
+            move = input("Pick a move!\n")
+            to_board = Board.apply_moves_to_board(previous_board, player, move)
+            total_moves, roll, board = self.tmp_name(board, to_board, roll, player, total_moves)
+
        
--- a/main.py
+++ b/main.py
@ -2,6 +2,7 @@ import argparse
 import sys
 import os
 import time
+import subprocess

 # Parse command line arguments
 parser = argparse.ArgumentParser(description="Backgammon games")
@ -31,19 +32,17 @@ parser.add_argument('--train-perpetually', action='store_true',
                    help='start new training session as soon as the previous is finished')
 parser.add_argument('--list-models', action='store_true',
                    help='list all known models')
-parser.add_argument('--force-creation', action='store_true',
-                    help='force model creation if model does not exist')
 parser.add_argument('--board-rep', action='store', dest='board_rep',
-                    default='tesauro',
                    help='name of board representation to use as input to neural network')
-parser.add_argument('--use-baseline', action='store_true',
-                    help='use the baseline model, note, has size 28')
+parser.add_argument('--verbose', action='store_true',
+                    help='If set, a lot of stuff will be printed')
+parser.add_argument('--ply', action='store', dest='ply', default='0',
+                    help='defines the amount of ply used when deciding what move to make')
+parser.add_argument('--repeat-eval', action='store', dest='repeat_eval', default='1',
+                    help='the amount of times the evaluation method should be repeated')

 args = parser.parse_args()

-if args.model == "baseline_model":
-    print("Model name 'baseline_model' not allowed")
-    exit()

 config = {
    'model': args.model,
@ -59,10 +58,13 @@ config = {
    'model_storage_path': 'models',
    'bench_storage_path': 'bench',
    'board_representation': args.board_rep,
-    'force_creation': args.force_creation,
-    'use_baseline': args.use_baseline
+    'global_step': 0,
+    'verbose': args.verbose,
+    'ply': args.ply,
+    'repeat_eval': args.repeat_eval
 }

+
 # Create models folder
 if not os.path.exists(config['model_storage_path']):
    os.makedirs(config['model_storage_path'])
@ -76,19 +78,20 @@ if not os.path.isdir(model_path()):
 if not os.path.isdir(log_path):
    os.mkdir(log_path)

-
 # Define helper functions
 def log_train_outcome(outcome, diff_in_values, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "train.log")):
+    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    format_vars = { 'trained_eps': trained_eps,
                    'count': len(outcome),
                    'sum': sum(outcome),
                    'mean': sum(outcome) / len(outcome),
                    'time': int(time.time()),
-                    'average_diff_in_vals': diff_in_values/len(outcome)
+                    'average_diff_in_vals': diff_in_values,
+                    'commit': commit
    }

    with open(log_path, 'a+') as f:
-        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals}".format(**format_vars) + "\n")
+        f.write("{time};{trained_eps};{count};{sum};{mean};{average_diff_in_vals};{commit}".format(**format_vars) + "\n")
    

 def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_path(), 'logs', "eval.log")):
@ -99,9 +102,12 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
    :param log_path:
    :return:
    """
+    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
+    
    for outcome in outcomes:
        scores = outcome[1]
-        format_vars = { 'trained_eps': trained_eps,
+        format_vars = { 'commit': commit,
+                        'trained_eps': trained_eps,
                        'method': outcome[0],
                        'count': len(scores),
                        'sum': sum(scores),
@ -109,9 +115,10 @@ def log_eval_outcomes(outcomes, trained_eps = 0, log_path = os.path.join(model_p
                        'time': int(time.time())
        }
        with open(log_path, 'a+') as f:
-            f.write("{time};{method};{trained_eps};{count};{sum};{mean}".format(**format_vars) + "\n")
+            f.write("{time};{method};{trained_eps};{count};{sum};{mean};{commit}".format(**format_vars) + "\n")

 def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
+    commit = subprocess.run(['git', 'describe', '--first-parent', '--always'], stdout=subprocess.PIPE).stdout.decode('utf-8').rstrip()
    for outcome in outcomes:
        scores = outcome[1]
        format_vars = { 'trained_eps': trained_eps,
@ -121,9 +128,28 @@ def log_bench_eval_outcomes(outcomes, log_path, index, time, trained_eps = 0):
                        'mean': sum(scores) / len(scores),
                        'time': time,
                        'index': index,
+                        'commit': commit
        }
        with open(log_path, 'a+') as f:
-            f.write("{method};{count};{index};{time};{sum};{mean}".format(**format_vars) + "\n")
+            f.write("{method};{count};{index};{time};{sum};{mean};{commit}".format(**format_vars) + "\n")
+
+def find_board_rep():
+    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
+    board_rep_path = os.path.join(checkpoint_path, "board_representation")
+    with open(board_rep_path, 'r') as f:
+        return f.read()
+
+
+def board_rep_file_exists():
+    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
+    board_rep_path = os.path.join(checkpoint_path, "board_representation")
+    return os.path.isfile(board_rep_path)
+
+def create_board_rep():
+    checkpoint_path = os.path.join(config['model_storage_path'], config['model'])
+    board_rep_path = os.path.join(checkpoint_path, "board_representation")
+    with open(board_rep_path, 'a+') as f:
+        f.write(config['board_representation'])

 # Do actions specified by command-line
 if args.list_models:
@ -148,6 +174,22 @@ if __name__ == "__main__":
    # Set up variables
    episode_count = config['episode_count']

+    if config['board_representation'] is None:
+        if board_rep_file_exists():
+            config['board_representation'] = find_board_rep()
+        else:
+            sys.stderr.write("Was not given a board_rep and was unable to find a board_rep file\n")
+            exit()
+    else:
+        if not board_rep_file_exists():
+            create_board_rep()
+        else:
+            if config['board_representation'] != find_board_rep():
+                sys.stderr.write("Board representation \"{given}\", does not match one in board_rep file, \"{board_rep}\"\n".
+                                 format(given = config['board_representation'], board_rep = find_board_rep()))
+                exit()
+
+                  
    if args.train:
        network = Network(config, config['model'])
        start_episode = network.episodes_trained
@ -161,9 +203,15 @@ if __name__ == "__main__":
            if not config['train_perpetually']:
                break

+    elif args.play:
+        network = Network(config, config['model'])
+        network.play_against_network()

    elif args.eval:
        network = Network(config, config['model'])
+        network.restore_model()
+
+        for i in range(int(config['repeat_eval'])):
            start_episode = network.episodes_trained
            # Evaluation measures are described in `config`
            outcomes = network.eval(config['episode_count'])
@ -191,7 +239,7 @@ if __name__ == "__main__":
        episode_counts = [25, 50, 100, 250, 500, 1000, 2500, 5000,
                          10000, 20000]

-        def do_eval(sess):
+        def do_eval():
            for eval_method in config['eval_methods']:
                result_path = os.path.join(config['bench_storage_path'],
                                           eval_method) + "-{}.log".format(int(time.time()))
@ -199,8 +247,7 @@ if __name__ == "__main__":
                    for i in range(sample_count):
                        start_time = time.time()
                        # Evaluation measure to be benchmarked are described in `config`
-                        outcomes = network.eval(episode_count = n,
-                                                tf_session = sess)
+                        outcomes = network.eval(episode_count = n)
                        time_diff = time.time() - start_time
                        log_bench_eval_outcomes(outcomes,
                                                time = time_diff,
@ -210,8 +257,8 @@ if __name__ == "__main__":

        # CMM: oh no
        import tensorflow as tf
-        with tf.Session() as session:
-            network.restore_model(session)
-            do_eval(session)
+
+        network.restore_model()
+        do_eval()
        
        
--- a/network.py
+++ b/network.py
@ -8,6 +8,8 @@ import random
 from eval import Eval
 import glob
 from operator import itemgetter
+import tensorflow.contrib.eager as tfe
+from player import Player

 class Network:
    # board_features_quack has size 28
@ -18,18 +20,43 @@ class Network:
        'quack-fat'   : (30, Board.board_features_quack_fat),
        'quack'       : (28, Board.board_features_quack),
        'tesauro'     : (198, Board.board_features_tesauro),
-        'quack-norm': (30, Board.board_features_quack_norm)
+        'quack-norm'  : (30, Board.board_features_quack_norm),
+        'tesauro-fat' : (726, Board.board_features_tesauro_fat),
+        'tesauro-poop': (198, Board.board_features_tesauro_wrong)
    }

    def custom_tanh(self, x, name=None):
        return tf.scalar_mul(tf.constant(2.00), tf.tanh(x, name))

    def __init__(self, config, name):
+        """
+        :param config:
+        :param name:
+        """
+
+        move_options = {
+            '1': self.make_move_1_ply,
+            '0': self.make_move_0_ply
+        }
+
+        self.max_or_min = {
+            1: np.argmax,
+            -1: np.argmin
+        }
+
+        tf.enable_eager_execution()
+
+        xavier_init = tf.contrib.layers.xavier_initializer()
+
        self.config = config
        self.checkpoint_path = os.path.join(config['model_storage_path'], config['model'])

        self.name = name

+        self.make_move = move_options[
+            self.config['ply']
+        ]
+
        # Set board representation from config
        self.input_size, self.board_trans_func = Network.board_reps[
            self.config['board_representation']
@ -39,16 +66,6 @@ class Network:
        self.max_learning_rate = 0.1
        self.min_learning_rate = 0.001

-        self.global_step = tf.Variable(0, trainable=False, name="global_step")
-        self.learning_rate = tf.maximum(self.min_learning_rate,
-                                        tf.train.exponential_decay(self.max_learning_rate,
-                                                                   self.global_step, 50000,
-                                                                   0.96,
-                                                                   staircase=True),
-                                        name="learning_rate")
-
-        
-        
        # Restore trained episode count for model
        episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
        if os.path.isfile(episode_count_path):
@ -57,336 +74,261 @@ class Network:
        else:
            self.episodes_trained = 0

-        self.x = tf.placeholder('float', [1, self.input_size], name='input')
-        self.value_next = tf.placeholder('float', [1, self.output_size], name="value_next")
-
-        xavier_init = tf.contrib.layers.xavier_initializer()
-
-        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
-                              initializer=xavier_init)
-        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
-                              initializer=xavier_init)
-
-        b_1 = tf.get_variable("b_1", (self.hidden_size,),
-                              initializer=tf.zeros_initializer)
-        b_2 = tf.get_variable("b_2", (self.output_size,),
-                              initializer=tf.zeros_initializer)
+        global_step_path = os.path.join(self.checkpoint_path, "global_step")
+        if os.path.isfile(global_step_path):
+            with open(global_step_path, 'r') as f:
+                self.global_step = int(f.read())
+        else:
+            self.global_step = 0


-        value_after_input = tf.sigmoid(tf.matmul(self.x, W_1) + b_1, name='hidden_layer')
-
-        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
-
-        # TODO: Alexander thinks that self.value will be computed twice (instead of once)
-        difference_in_values = tf.reshape(tf.subtract(self.value_next, self.value, name='difference_in_values'), [])
-        tf.summary.scalar("difference_in_values", tf.abs(difference_in_values))
-
-        trainable_vars = tf.trainable_variables()
-        gradients = tf.gradients(self.value, trainable_vars)
-
-        apply_gradients = []
-
-        global_step_op = self.global_step.assign_add(1)
+        self.model = tf.keras.Sequential([
+            tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=xavier_init,
+                                  input_shape=(1,self.input_size)),
+            tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=xavier_init)
+        ])


-        with tf.variable_scope('apply_gradients'):
-            for gradient, trainable_var in zip(gradients, trainable_vars):
-                backprop_calc = self.learning_rate * difference_in_values * gradient
-                grad_apply = trainable_var.assign_add(backprop_calc)
-                apply_gradients.append(grad_apply)
+    def exp_decay(self, max_lr, global_step, decay_rate, decay_steps):
+        """
+        Calculates the exponential decay on a learning rate
+        :param max_lr: The learning rate that the network starts at
+        :param global_step: The global step
+        :param decay_rate: The rate at which the learning rate should decay
+        :param decay_steps: The amount of steps between each decay
+        :return: The result of the exponential decay performed on the learning rate
+        """
+        res = max_lr * decay_rate ** (global_step // decay_steps)
+        return res
+
+    def do_backprop(self, prev_state, value_next):
+        """
+        Performs the Temporal-difference backpropagation step on the model
+        :param prev_state: The previous state of the game, this has its value recalculated
+        :param value_next: The value of the current move
+        :return: Nothing, the calculation is performed on the model of the network
+        """
+        self.learning_rate = tf.maximum(self.min_learning_rate,
+                                        self.exp_decay(self.max_learning_rate, self.global_step, 0.96, 50000),
+                                        name="learning_rate")
+
+        with tf.GradientTape() as tape:
+            value = self.model(prev_state.reshape(1,-1))
+
+        grads = tape.gradient(value, self.model.variables)
+
+        difference_in_values = tf.reshape(tf.subtract(value_next, value, name='difference_in_values'), [])
+
+        for grad, train_var in zip(grads, self.model.variables):
+            backprop_calc = self.learning_rate * difference_in_values * grad
+            train_var.assign_add(backprop_calc)


-        with tf.control_dependencies([global_step_op]):

-            self.training_op = tf.group(*apply_gradients, name='training_op')
+    def print_variables(self):
+        """
+        Prints all the variables of the model
+        :return:
+        """
+        variables = self.model.variables
+        for k in variables:
+            print(k)

-        self.saver = tf.train.Saver(max_to_keep=1)
+    def eval_state(self, state):
+        """
+        Evaluates a single state
+        :param state:
+        :return:
+        """
+        return self.model(state.reshape(1,-1))

-    def eval_state(self, sess, state):
-        return sess.run(self.value, feed_dict={self.x: state})
+    def save_model(self, episode_count):
+        """
+        Saves the model of the network, it references global_step as self.global_step
+        :param episode_count:
+        :return:
+        """
+
+        tfe.Saver(self.model.variables).save(os.path.join(self.checkpoint_path, 'model.ckpt'))

-    def save_model(self, sess, episode_count, global_step):
-        self.saver.save(sess, os.path.join(self.checkpoint_path, 'model.ckpt'), global_step=global_step)
        with open(os.path.join(self.checkpoint_path, "episodes_trained"), 'w+') as f:
            print("[NETWK] ({name}) Saving model to:".format(name=self.name),
                  os.path.join(self.checkpoint_path, 'model.ckpt'))
            f.write(str(episode_count) + "\n")

-    def restore_model(self, sess):
+        with open(os.path.join(self.checkpoint_path, "global_step"), 'w+') as f:
+            print("[NETWK] ({name}) Saving global step to:".format(name=self.name),
+                  os.path.join(self.checkpoint_path, 'model.ckpt'))
+            f.write(str(self.global_step) + "\n")
+        if self.config['verbose']:
+            self.print_variables()
+
+
+    def calc_vals(self, states):
+        """
+        Calculate a score of each state in states
+        :param states: A number of states. The states have to be transformed before being given to this function.
+        :return:
+        """
+        return self.model.predict_on_batch(states)
+
+
+    def restore_model(self):
        """
        Restore a model for a session, such that a trained model and either be further trained or
        used for evaluation
        
-        :param sess: Current session
        :return: Nothing. It's a side-effect that a model gets restored for the network.
        """

+
        if glob.glob(os.path.join(self.checkpoint_path, 'model.ckpt*.index')):
        
            latest_checkpoint = tf.train.latest_checkpoint(self.checkpoint_path)
            print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
                  str(latest_checkpoint))
-            self.saver.restore(sess, latest_checkpoint)
-            variables_names = [v.name for v in tf.trainable_variables()]
-            values = sess.run(variables_names)
-            for k, v in zip(variables_names, values):
-                print("Variable: ", k)
-                print("Shape: ", v.shape)
-                print(v)
+            tfe.Saver(self.model.variables).restore(latest_checkpoint)

            # Restore trained episode count for model
            episode_count_path = os.path.join(self.checkpoint_path, "episodes_trained")
            if os.path.isfile(episode_count_path):
                with open(episode_count_path, 'r') as f:
                    self.config['start_episode'] = int(f.read())
-        elif self.config['use_baseline'] and glob.glob(os.path.join(os.path.join(self.config['model_storage_path'], "baseline_model"), 'model.ckpt*.index')):
-            checkpoint_path = os.path.join(self.config['model_storage_path'], "baseline_model")
-            latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
-            print("[NETWK] ({name}) Restoring model from:".format(name=self.name),
-                  str(latest_checkpoint))
-            self.saver.restore(sess, latest_checkpoint)

-            variables_names = [v.name for v in tf.trainable_variables()]
-            values = sess.run(variables_names)
-            for k, v in zip(variables_names, values):
-                print("Variable: ", k)
-                print("Shape: ", v.shape)
-                print(v)
-        elif not self.config['force_creation']:
-            print("You need to have baseline_model inside models")
-            exit()
+            global_step_path = os.path.join(self.checkpoint_path, "global_step")
+            if os.path.isfile(global_step_path):
+                with open(global_step_path, 'r') as f:
+                    self.config['global_step'] = int(f.read())
+
+            if self.config['verbose']:
+                self.print_variables()


-    def make_move(self, sess, board, roll, player):
+
+    def make_move_0_ply(self, board, roll, player):
        """
        Find the best move given a board, roll and a player, by finding all possible states one can go to
-        and then picking the best, by using the network to evaluate each state. The highest score is picked
-        for the 1-player and the max(1-score) is picked for the -1-player.
+        and then picking the best, by using the network to evaluate each state. This is 0-ply, ie. no look-ahead.
+        The highest score is picked for the 1-player and the max(1-score) is picked for the -1-player.

-        :param sess:
        :param board: Current board
        :param roll:  Current roll
        :param player: Current player
        :return: A pair of the best state to go to, together with the score of that state
        """
-        legal_moves = Board.calculate_legal_states(board, player, roll)
-        moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in legal_moves]
-        scores = [x[1] if np.sign(player) > 0 else 1-x[1] for x in moves_and_scores]
-        best_score_index = np.array(scores).argmax()
-        best_move_pair = moves_and_scores[best_score_index]
-        return best_move_pair
+        legal_moves = list(Board.calculate_legal_states(board, player, roll))
+        legal_states = np.array([self.board_trans_func(move, player)[0] for move in legal_moves])

-    def make_move_n_ply(self, sess, board, roll, player, n = 1):
-        best_pair = self.calc_n_ply(n, sess, board, player, roll)
+        scores = self.model.predict_on_batch(legal_states)
+
+        best_score_idx = self.max_or_min[player](scores)
+
+        best_move, best_score = legal_moves[best_score_idx], scores[best_score_idx]
+
+        return (best_move, best_score)
+
+    def make_move_1_ply(self, board, roll, player):
+        """
+        Return the best board and best score based on a 1-ply look-ahead.
+        :param board:
+        :param roll:
+        :param player:
+        :return:
+        """
+        start = time.time()
+        best_pair = self.calculate_1_ply(board, roll, player)
+        #print(time.time() - start)
        return best_pair


-    def calculate_1_ply(self, sess, board, roll, player):
+    def calculate_1_ply(self, board, roll, player):
        """
-        Find the best move based on a 1-ply look-ahead. First the best move is found for a single ply and then an
-        exhaustive search is performed on the best 15 moves from the single ply.
+        Find the best move based on a 1-ply look-ahead. First the x best moves are picked from a 0-ply and then
+        all moves and scores are found for them. The expected score is then calculated for each of the boards from the
+        0-ply.

-        :param sess:
        :param board:
        :param roll: The original roll
        :param player: The current player
        :return: Best possible move based on 1-ply look-ahead
-
        """

        # find all legal states from the given board and the given roll
        init_legal_states = Board.calculate_legal_states(board, player, roll)
+        legal_states = np.array([self.board_trans_func(state, player)[0] for state in init_legal_states])

-        # find all values for the above boards
-        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
-
-        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
-        best_fifteen = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
-
-        best_fifteen_boards = [x[0] for x in best_fifteen[:10]]
-
-        all_rolls_scores = self.do_ply(sess, best_fifteen_boards, player)
-
-
-        best_score_index = np.array(all_rolls_scores).argmax()
-        best_board = best_fifteen_boards[best_score_index]
-
-        return [best_board, max(all_rolls_scores)]
-
-    def calc_n_ply(self, n_init, sess, board, player, roll):
-
-        # find all legal states from the given board and the given roll
-        init_legal_states = Board.calculate_legal_states(board, player, roll)
-
-        # find all values for the above boards
-        zero_ply_moves_and_scores = [(move, self.eval_state(sess, self.board_trans_func(move, player))) for move in init_legal_states]
-
-        # pythons reverse is in place and I can't call [:15] on it, without applying it to an object like so. Fuck.
-        sorted_moves_and_scores = sorted(zero_ply_moves_and_scores, key=itemgetter(1), reverse=player==1)
-
+        scores = [ score.numpy()
+                   for score
+                   in  self.calc_vals(legal_states) ]

+        moves_and_scores = list(zip(init_legal_states, scores))
+        sorted_moves_and_scores = sorted(moves_and_scores, key=itemgetter(1), reverse=(player == 1))
        best_boards = [ x[0] for x in sorted_moves_and_scores[:10] ]

-        best_move_score_pair = self.n_ply(n_init, sess, best_boards, player)
+        scores = self.do_ply(best_boards, player)

-        return best_move_score_pair
+        best_score_idx = self.max_or_min[player](scores)
+        # best_score_idx = np.array(trans_scores).argmax()

+        return (best_boards[best_score_idx], scores[best_score_idx])

-    def n_ply(self, n_init, sess, boards_init, player_init):
-        def ply(n, boards, player):
-            def calculate_possible_states(board):
-                possible_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
-                                   (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
-                                   (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
-                                   (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
-                                   (6, 6) ]
-
-                # for roll in possible_rolls:
-                #     print(len(Board.calculate_legal_states(board, player, roll)))
-
-                return [ Board.calculate_legal_states(board, player, roll)
-                         for roll
-                         in  possible_rolls ]
-
-            def find_best_state_score(boards):
-                score_pairs = [ (board, self.eval_state(sess, self.board_trans_func(board, player)))
-                                for board
-                                in  boards ]
-                scores = [ pair[1]
-                           for pair
-                           in score_pairs ]
-                best_score_pair = score_pairs[np.array(scores).argmax()]
-
-                return best_score_pair
-
-            def average_score(boards):
-                return sum(boards)/len(boards)
-
-            def average_ply_score(board):
-                states_for_rolls = calculate_possible_states(board)
-
-                best_state_score_for_each_roll = [
-                    find_best_state_score(states)
-                    for states
-                    in  states_for_rolls ]
-                best_score_for_each_roll = [ x[1]
-                                             for x
-                                             in best_state_score_for_each_roll ]
-
-                average_score_var = average_score(best_score_for_each_roll)
-                return average_score_var
-
-
-            if n == 1:
-                average_score_pairs = [ (board, average_ply_score(board))
-                                        for board
-                                        in  boards ]
-                return average_score_pairs
-            elif n > 1: # n != 1
-                def average_for_score_pairs(score_pairs):
-                    scores = [ pair[1]
-                               for pair
-                               in score_pairs ]
-                    return sum(scores)/len(scores)
-
-                def average_plain(scores):
-                    return sum(scores)/len(scores)
-
-                print("+"*20)
-                print(n)
-                print(type(boards))
-                print(boards)
-                possible_states_for_boards = [
-                    (board, calculate_possible_states(board))
-                    for board
-                    in  boards ]
-
-                average_score_pairs = [
-                    (inner_boards[0], average_plain([ average_for_score_pairs(ply(n - 1, inner_board, player * -1 if n == 1 else player))
-                                                      for inner_board
-                                                      in  inner_boards[1] ]))
-                    for inner_boards
-                    in  possible_states_for_boards ]
-
-                return average_score_pairs
-
-            else:
-                assert False
-
-        if n_init < 1: print("Unexpected argument n = {}".format(n_init)); exit()
-
-        boards_with_scores = ply(n_init, boards_init, -1 * player_init)
-        #print("Boards with scores:",boards_with_scores)
-        scores = [ ( pair[1] if player_init == 1 else (1 - pair[1]) )
-                   for pair
-                   in boards_with_scores ]
-        #print("All the scores:",scores)
-        best_score_pair = boards_with_scores[np.array(scores).argmax()]
-        return best_score_pair
-
-    def do_ply(self, sess, boards, player):
+    def do_ply(self, boards, player):
        """
        Calculates a single extra ply, resulting in a larger search space for our best move.
        This is somewhat hardcoded to only do a single ply, seeing that it calls max on all scores, rather than
        allowing the function to search deeper, which could result in an even larger search space. If we wish
        to have more than 2-ply, this should be fixed, so we could extend this method to allow for 3-ply.

-        :param sess:
        :param boards: The boards to try all rolls on
        :param player: The player of the previous ply
        :return: An array of scores where each index describes one of the boards which was given as param
        to this function.
        """

-        def gen_21_rolls():
-            """
-            Calculate all possible rolls, [[1,1], [1,2] ..]
-            :return: All possible rolls
-            """
-            a = []
-            for x in range(1, 7):
-                for y in range(1, 7):
-                    if not [x, y] in a and not [y, x] in a:
-                        a.append([x, y])
+        all_rolls = [ (1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
+                      (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
+                      (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
+                      (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
+                      (6, 6) ]

-            return a

-        all_rolls = gen_21_rolls()
+        # start = time.time()

-        all_rolls_scores = []
-        count = 0
-        # loop over boards
-        for a_board in boards:
-            a_board_scores = []
-
-            # loop over all rolls, for each board
+        # print("/"*50)
+        length_list = []
+        test_list = []
+        # Prepping of data
+        # start = time.time()
+        for board in boards:
+            length = 0
            for roll in all_rolls:
+                all_states = Board.calculate_legal_states(board, player*-1, roll)
+                for state in all_states:
+                    state = np.array(self.board_trans_func(state, player*-1)[0])
+                    test_list.append(state)
+                    length += 1
+            length_list.append(length)

-                # find all states we can get to, given the board and roll and the opposite player
-                all_rolls_boards = Board.calculate_legal_states(a_board, player*-1, roll)
-                count += len(all_rolls_boards)
-                # find scores for each board found above
-                spec_roll_scores = [self.eval_state(sess, self.board_trans_func(new_board, player*-1))
-                                    for new_board in all_rolls_boards]
+        # print(time.time() - start)

-                # if the original player is the -1 player, then we need to find (1-value)
-                spec_roll_scores = [x if player == 1 else (1-x) for x in spec_roll_scores]
+        start = time.time()

-                # find the best score
-                best_score = max(spec_roll_scores)
+        all_scores = self.model.predict_on_batch(np.array(test_list))

-                # append the best score to a_board_scores, where we keep track of the best score for each board
-                a_board_scores.append(best_score)
+        split_scores = []
+        from_idx = 0
+        for length in length_list:
+            split_scores.append(all_scores[from_idx:from_idx+length])
+            from_idx += length

-            # save the expected average of board scores
-            all_rolls_scores.append(sum(a_board_scores)/len(a_board_scores))
+        means_splits = [tf.reduce_mean(scores) for scores in split_scores]

-        # return all the average scores
-        print(count)
-        return all_rolls_scores
+        # print(time.time() - start)
+        # print("/"*50)
+        return means_splits


-    def eval(self, episode_count, trained_eps = 0, tf_session = None):
+    def eval(self, episode_count, trained_eps = 0):
        """
        Used to evaluate a model. Can either use pubeval, a model playing at an intermediate level, or dumbeval
        a model which has been given random weights, so it acts deterministically random.
@ -397,11 +339,10 @@ class Network:
        :return: outcomes:    The outcomes of the evaluation session
        """

-        def do_eval(sess, method, episodes = 1000, trained_eps = 0):
+        def do_eval(method, episodes = 1000, trained_eps = 0):
            """
            Do the actual evaluation

-            :param sess:
            :param method:     Either pubeval or dumbeval
            :param episodes:   Amount of episodes to use in the evaluation
            :param trained_eps:
@ -425,7 +366,6 @@ class Network:
            sys.stderr.write(
                "[EVAL ] Evaluating {eps} episode(s) with method '{method}'\n".format(eps=episodes, method=method))

-
            if method == 'pubeval':
                outcomes = []
                for i in range(1, episodes + 1):
@ -433,11 +373,9 @@ class Network:
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-
-                        board = (self.make_move(sess, board, roll, 1))[0]
+                        board = (self.make_move(board, roll, 1))[0]

                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-
                        board = Eval.make_pubeval_move(board, -1, roll)[0][0:26]

                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -456,11 +394,9 @@ class Network:
                    board = Board.initial_state
                    while Board.outcome(board) is None:
                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-
-                        board = (self.make_move(sess, board, roll, 1))[0]
+                        board = (self.make_move(board, roll, 1))[0]

                        roll = (random.randrange(1, 7), random.randrange(1, 7))
-
                        board = Eval.make_dumbeval_move(board, -1, roll)[0][0:26]

                    sys.stderr.write("\t outcome {}".format(Board.outcome(board)[1]))
@ -476,41 +412,52 @@ class Network:
                sys.stderr.write("[EVAL ] Evaluation method '{}' is not defined\n".format(method))
                return [0]
            
-        if tf_session == None:
-            with tf.Session() as session:
-                session.run(tf.global_variables_initializer())
-                self.restore_model(session)
-                outcomes = [ (method, do_eval(session,
-                                              method,
-                                              episode_count,
-                                              trained_eps = trained_eps))
-                             for method
-                             in self.config['eval_methods'] ]
-                return outcomes
-        else:
-            outcomes = [ (method, do_eval(tf_session,
-                                          method,
+
+        outcomes = [ (method, do_eval(method,
                                      episode_count,
                                      trained_eps = trained_eps))
                     for method
                     in self.config['eval_methods'] ]
        return outcomes

+
+    def play_against_network(self):
+        """
+        Allows you to play against a supplied model.
+        :return:
+        """
+        self.restore_model()
+        human_player = Player(-1)
+        cur_player = 1
+        player = 1
+        board = Board.initial_state
+        i = 0
+        while Board.outcome(board) is None:
+            print(Board.pretty(board))
+            roll = (random.randrange(1, 7), random.randrange(1, 7))
+            print("Bot rolled:", roll)
+
+            board, _ = self.make_move(board, roll, player)
+            print(Board.pretty(board))
+            roll = (random.randrange(1, 7), random.randrange(1, 7))
+            print("You rolled:", roll)
+            board = human_player.make_human_move(board, roll)
+        print("DONE "*10)
+        print(Board.pretty(board))
+
+
+
    def train_model(self, episodes=1000, save_step_size=100, trained_eps=0):
-        with tf.Session() as sess:
-            difference_in_vals = 0
-            writer = tf.summary.FileWriter("/tmp/log/tf", sess.graph)
-
-            sess.run(tf.global_variables_initializer())
-            self.restore_model(sess)
-
-            variables_names = [v.name for v in tf.trainable_variables()]
-            values = sess.run(variables_names)
-            for k, v in zip(variables_names, values):
-                print("Variable: ", k)
-                print("Shape: ", v.shape)
-                print(v)
+        """
+        Train a model to by self-learning.
+        :param episodes:
+        :param save_step_size:
+        :param trained_eps:
+        :return:
+        """

+        self.restore_model()
+        average_diffs = 0
        start_time = time.time()

        def print_time_estimate(eps_completed):
@ -530,27 +477,30 @@ class Network:
        for episode in range(1, episodes + 1):

            sys.stderr.write("[TRAIN] Episode {}".format(episode + trained_eps))
-                # TODO decide which player should be here

-                player = 1
+            # player = 1
+            player = random.choice([-1,1])
            prev_board = Board.initial_state
            i = 0
+            difference_in_values = 0
            while Board.outcome(prev_board) is None:
                i += 1
+                self.global_step += 1

-                    cur_board, cur_board_value = self.make_move(sess,
-                                                                prev_board,
+                cur_board, cur_board_value = self.make_move(prev_board,
                                                            (random.randrange(1, 7), random.randrange(1, 7)),
                                                            player)

-                    difference_in_vals += abs((cur_board_value - self.eval_state(sess, self.board_trans_func(prev_board, player))))
+                difference_in_values += abs((cur_board_value - self.eval_state(self.board_trans_func(prev_board, player))))

+                if self.config['verbose']:
+                    print("Difference in values:", difference_in_vals)
+                    print("Current board value :", cur_board_value)
+                    print("Current board is    :\n",cur_board)

                # adjust weights
-                    sess.run(self.training_op,
-                             feed_dict={self.x: self.board_trans_func(prev_board, player),
-                                        self.value_next: cur_board_value})
-
+                if Board.outcome(cur_board) is None:
+                    self.do_backprop(self.board_trans_func(prev_board, player), cur_board_value)
                    player *= -1

                prev_board = cur_board
@ -561,27 +511,25 @@ class Network:
            final_score = np.array([Board.outcome(final_board)[1]])
            scaled_final_score = ((final_score + 2) / 4)

-                with tf.name_scope("final"):
-                    merged = tf.summary.merge_all()
-                    global_step, summary, _ = sess.run([self.global_step, merged, self.training_op],
-                                          feed_dict={self.x: self.board_trans_func(prev_board, player),
-                                                     self.value_next: scaled_final_score.reshape((1, 1))})
-                    writer.add_summary(summary, episode + trained_eps)
+            difference_in_values += abs(scaled_final_score-cur_board_value)
+
+            average_diffs += (difference_in_values[0][0] / (i+1))
+
+            self.do_backprop(self.board_trans_func(prev_board, player), scaled_final_score.reshape(1,1))

            sys.stderr.write("\n")

            if episode % min(save_step_size, episodes) == 0:
                sys.stderr.write("[TRAIN] Saving model...\n")
-                    self.save_model(sess, episode + trained_eps, global_step)
+                self.save_model(episode + trained_eps)

            if episode % 50 == 0:
                print_time_estimate(episode)

        sys.stderr.write("[TRAIN] Saving model for final episode...\n")
-            self.save_model(sess, episode+trained_eps, global_step)

-            writer.close()
+        self.save_model(episode+trained_eps)

-            return outcomes, difference_in_vals[0][0]
+        return outcomes, average_diffs/len(outcomes)


--- a/network_test.py
+++ b/network_test.py
@ -9,14 +9,12 @@ from board import Board
 import main

 config = main.config.copy()
-config['model'] = "tesauro_blah"
-config['force_creation'] = True
+config['model'] = "player_testings"
+config['ply'] = "1"
+config['board_representation'] = 'quack-fat'
 network = Network(config, config['model'])

-session = tf.Session()
-
-session.run(tf.global_variables_initializer())
-network.restore_model(session)
+network.restore_model()
 initial_state = Board.initial_state

 initial_state_1 = ( 0,
@ -38,65 +36,32 @@ boards = {initial_state,
          initial_state_2 }


-def gen_21_rolls():
-    """
-    Calculate all possible rolls, [[1,1], [1,2] ..]
-    :return: All possible rolls
-    """
-    a = []
-    for x in range(1, 7):
-        for y in range(1, 7):
-            if not [x, y] in a and not [y, x] in a:
-                a.append([x, y])
-
-    return a
-
-def calc_all_scores(board, player):
-    scores = []
-    trans_board = network.board_trans_func(board, player)
-    rolls = gen_21_rolls()
-    for roll in rolls:
-        score = network.eval_state(session, trans_board)
-        scores.append(score)
-    return scores
-
-
-def calculate_possible_states(board):
-    possible_rolls = [(1, 1), (1, 2), (1, 3), (1, 4), (1, 5),
-                      (1, 6), (2, 2), (2, 3), (2, 4), (2, 5),
-                      (2, 6), (3, 3), (3, 4), (3, 5), (3, 6),
-                      (4, 4), (4, 5), (4, 6), (5, 5), (5, 6),
-                      (6, 6)]
-
-    for roll in possible_rolls:
-        meh = Board.calculate_legal_states(board, -1, roll)
-        print(len(meh))
-    return [Board.calculate_legal_states(board, -1, roll)
-            for roll
-            in possible_rolls]



-#for board in boards:
-#    calculate_possible_states(board)

-#print("-"*30)
-#print(network.calculate_1_ply(session, Board.initial_state, [2,4], 1))
+# board = network.board_trans_func(Board.initial_state, 1)

-#print(" "*10 + "network_test")
-print(" "*20 + "Depth 1")
-print(network.calc_n_ply(2, session, Board.initial_state, 1, [2, 4]))

-#print(scores)
+# pair = network.make_move(Board.initial_state, [3,2], 1)

-#print(" "*20 + "Depth 2")
-#print(network.n_ply(2, session, boards, 1))
+# print(pair[1])

-# #print(x.shape)
-# with graph_lol.as_default():
-#     session_2 = tf.Session(graph = graph_lol)
-#     network_2 = Network(session_2)
-#     network_2.restore_model()
-#     print(network_2.eval_state(initial_state))
+# network.do_backprop(board, 0.9)

-# print(network.eval_state(initial_state))
+
+# network.print_variables()
+
+
+# network.save_model(2)
+
+# print(network.calculate_1_ply(Board.initial_state, [3,2], 1))
+
+
+diff = [0, 0]
+val = network.eval_state(Board.board_features_quack_fat(initial_state, 1))
+print(val)
+diff[0] += abs(-1-val)
+diff[1] += 1
+
+print(diff[1])
--- a/player.py
+++ b/player.py
@ -11,19 +11,59 @@ class Player:
    def get_sym(self):
        return self.sym
    
-    def make_move(self, board, sym, roll):
-        print(Board.pretty(board))
-        legal_moves = Board.calculate_legal_states(board, sym, roll)
-        if roll[0] == roll[1]:
-            print("Example of move: 4/6,6/8,12/14,13/15")
+    def calc_move_sets(self, from_board, roll, player):
+        board = from_board
+        sets = []
+        total = 0
+        for r in roll:
+            # print("Value of r:",r)
+            sets.append([Board.calculate_legal_states(board, player, [r,0]), r])
+            total += r
+        sets.append([Board.calculate_legal_states(board, player, [total,0]), total])
+        print(sets)
+        return sets
+
+
+    def tmp_name(self, from_board, to_board, roll, player, total_moves, is_quad = False):
+        sets = self.calc_move_sets(from_board, roll, player)
+        return_board = from_board
+        for idx, board_set in enumerate(sets):
+
+            board_set[0] = list(board_set[0])
+            # print(to_board)
+            # print(board_set)
+            if to_board in board_set[0]:
+                total_moves -= board_set[1]
+                # if it's not the sum of the moves
+                if idx < (4 if is_quad else 2):
+                    roll[idx] = 0
                else:
-            print("Example of move: 4/6,13/17")
+                    roll = [0,0]
+                return_board = to_board
+                break
+        return total_moves, roll, return_board

-        user_moves = input("Enter your move: ").strip().split(",")
-        board = Board.apply_moves_to_board(board, sym, user_moves)
-        while board not in legal_moves:
-            print("Move is invalid, please enter a new move")
-            user_moves = input("Enter your move: ").strip().split(",")
-            board = Board.apply_moves_to_board(board, sym, user_moves)
+    def make_human_move(self, board, roll):
+        is_quad = roll[0] == roll[1]
+        total_moves = roll[0] + roll[1] if not is_quad else int(roll[0])*4
+        if is_quad:
+            roll = [roll[0]]*4
        
+        while total_moves != 0:
+            while True:
+                print("You have {roll} left!".format(roll=total_moves))
+                move = input("Pick a move!\n")
+                pot_move = move.split("/")
+                if len(pot_move) == 2:
+                    try:
+                        pot_move[0] = int(pot_move[0])
+                        pot_move[1] = int(pot_move[1])
+                        move = pot_move
+                        break;
+                    except TypeError:
+                        print("The correct syntax is: 2/5 for a move from index 2 to 5.")
+
+            to_board = Board.apply_moves_to_board(board, self.get_sym(), move)
+            total_moves, roll, board = self.tmp_name(board, to_board, list(roll), self.get_sym(), total_moves, is_quad)
+            print(Board.pretty(board))
        return board
--- a/quack/quack.c
+++ b/quack/quack.c
@ -0,0 +1,484 @@
+#include <Python.h>
+
+static PyObject* QuackError;
+
+typedef struct board_list board_list;
+struct board_list {
+  int size;
+  PyObject* list[16];
+};
+
+/* Utility functions */
+int sign(int x) {
+    return (x > 0) - (x < 0);
+}
+
+int abs(int x) {
+  if (x >= 0) return x;
+  else        return -x;
+}
+/* end utility functions */
+
+/* Helper functions */
+
+int *idxs_with_checkers_of_player(int board[], int player) {
+  int idxs_tmp[26];
+  int ctr = 0;
+  
+  for (int i = 0; i < 26; i++) {
+    if (board[i] * player >= 1) {
+      idxs_tmp[ctr] = i;
+      ctr++;
+    }
+  }
+
+  int *idxs = malloc((1 + ctr) * sizeof(int));
+  if (idxs == NULL) {
+    PyErr_NoMemory();
+    abort();
+  }
+
+  idxs[0] = ctr;
+  for (int i = 0; i < ctr; i++) {
+    idxs[i+1] = idxs_tmp[i];
+  }
+
+  return idxs;
+}
+
+int is_forward_move(int direction, int player) {
+  return direction == player;
+}
+
+int face_value_match_move_length(int delta, int face_value) {
+  return abs(delta) == face_value;
+}
+
+int bear_in_if_checker_on_bar(int board[], int player, int from_idx) {
+  int bar;
+    
+  if (player == 1) bar = 0;
+  else             bar = 25;
+
+  if (board[bar] != 0) return from_idx == bar;
+  else                 return 1;
+}
+
+int checkers_at_from_idx(int from_state, int player) {
+  return sign(from_state) == player;
+}
+
+int no_block_at_to_idx(int to_state, int player) {
+  if (-sign(to_state) == player) return abs(to_state) == 1;
+  else                           return 1;
+}
+
+
+int can_bear_off(int board[], int player, int from_idx, int to_idx) {
+  int* checker_idxs = idxs_with_checkers_of_player(board, player);
+
+  int moving_backmost_checker = 1;
+  int bearing_directly_off = 0;
+  int all_checkers_in_last_quadrant = 1;
+
+  /* Check if bearing directly off */
+  if      (player ==  1 && to_idx == 25) bearing_directly_off = 1;
+  else if (player == -1 && to_idx == 0)  bearing_directly_off = 1;
+  
+  for (int i = 1; i <= checker_idxs[0]; i++) {
+    if (player == 1 ) {
+      /* Check if all checkers are in last quardrant */
+      if (checker_idxs[i] < 19) {
+	all_checkers_in_last_quadrant = 0;
+	break;
+      }
+      
+      /* Check if moving backmost checker */
+      if (checker_idxs[i] < from_idx) {
+	moving_backmost_checker = 0;
+	if (!bearing_directly_off) break;
+      }
+    } else {
+      if (checker_idxs[i] > 6) {
+	all_checkers_in_last_quadrant = 0;
+	break;
+      }
+
+      if (checker_idxs[i] > from_idx) {
+	moving_backmost_checker = 0;
+	if (!bearing_directly_off) break;
+      }
+    }
+  }
+
+  free(checker_idxs);
+  
+  if (all_checkers_in_last_quadrant &&
+      (bearing_directly_off || moving_backmost_checker))  return 1;
+  else                                                    return 0;
+}
+
+
+
+/* end helper functions */
+
+int is_move_valid(int board[], int player, int face_value, int move[]) {
+  int from_idx = move[0];
+  int to_idx = move[1];
+  int to_state;
+  int from_state = board[from_idx];
+  int delta = to_idx - from_idx;
+  int direction = sign(delta);
+  int bearing_off;
+
+  if (to_idx >= 1 && to_idx <= 24) {
+    to_state = board[to_idx];
+    bearing_off = 0;
+  } else {
+    to_state = 0;
+    bearing_off = 1;
+  }
+  
+  return is_forward_move(direction, player)
+    && face_value_match_move_length(delta, face_value)
+    && bear_in_if_checker_on_bar(board, player, from_idx)
+    && checkers_at_from_idx(from_state, player)
+    && no_block_at_to_idx(to_state, player)
+    && (!bearing_off || can_bear_off(board, player, from_idx, to_idx))
+    ;
+}
+
+void do_move(int board[], int player, int move[]) {
+  int from_idx = move[0];
+  int to_idx   = move[1];
+
+  /* "lift" checker */
+  board[from_idx] -= player;
+
+  /* Return early if bearing off */
+  if (to_idx < 1 || to_idx > 24) return;
+
+  /* Hit opponent checker */
+  if (board[to_idx] * player == -1) {
+    /* Move checker to bar */
+    if (player == 1) board[25] -= player;
+    else             board[0]  -= player;
+
+    board[to_idx] = 0;
+  }
+
+  /* Put down checker */
+  board[to_idx] += player;
+
+  return;
+}
+
+int* do_move_clone(int board[], int player, int move[]) {
+  int* new_board = malloc(sizeof(int) * 26);
+  if (new_board == NULL) {
+    PyErr_NoMemory();
+    abort();
+  }
+  
+  for (int i = 0; i < 26; i++) {
+    new_board[i] = board[i];
+  }
+
+  do_move(new_board, player, move);
+  return new_board;
+}
+
+PyObject* store_board_to_pytuple(int board[], int size) {
+  PyObject* board_tuple = PyTuple_New(size);
+  for (int i = 0; i < size; i++) {
+    PyTuple_SetItem(board_tuple, i, Py_BuildValue("i", board[i]));
+  }
+  return board_tuple;
+}
+
+board_list calc_moves(int board[], int player, int face_value) {
+  int* checker_idxs = idxs_with_checkers_of_player(board, player);
+  board_list boards = { .size = 0 };
+  
+  if (checker_idxs[0] == 0) { 
+    boards.size = 1;
+    PyObject* board_tuple = store_board_to_pytuple(board, 26);
+    boards.list[0] = board_tuple;
+    free(checker_idxs);
+    return boards;
+  }
+
+  int ctr = 0;
+  for (int i = 1; i <= checker_idxs[0]; i++) {
+    int move[2];
+    move[0] = checker_idxs[i];
+    move[1] = checker_idxs[i] + (face_value * player);
+
+    if (is_move_valid(board, player, face_value, move)) {
+      int* new_board = do_move_clone(board, player, move);
+      PyObject* board_tuple = store_board_to_pytuple(new_board, 26);
+
+      // segfault maybe :'(
+      free(new_board);
+
+      boards.list[ctr] = board_tuple;
+      ctr++;
+    }
+  }
+
+  free(checker_idxs);
+  
+  boards.size = ctr;
+  return boards;
+}
+
+int* board_features_quack_fat(int board[], int player) {
+  int* new_board = malloc(sizeof(int) * 30);
+  if (new_board == NULL) {
+    PyErr_NoMemory();
+    abort();
+  }
+
+  int pos_sum = 0;
+  int neg_sum = 0;
+  for (int i = 0; i < 26; i++) {
+    new_board[i] = board[i];
+    if (sign(new_board[i] > 0)) pos_sum += new_board[i];
+    else                        neg_sum += new_board[i]; 
+  }
+
+  new_board[26] = 15 - pos_sum;
+  new_board[27] = -15 - neg_sum;
+  if (player == 1) {
+    new_board[28] = 1;
+    new_board[29] = 0;
+  } else {
+    new_board[28] = 0;
+    new_board[29] = 1;
+  }
+  
+  return new_board;
+}
+
+/* Meta definitions */
+int extract_board(int *board, PyObject* board_tuple_obj) {
+  long numValuesBoard;
+  numValuesBoard = PyTuple_Size(board_tuple_obj);
+  if (numValuesBoard != 26) {
+    PyErr_SetString(QuackError, "Board tuple must have 26 entries");
+    return 1;
+  }
+  
+  PyObject* board_val_obj;
+  // Iterate over tuple to retreive positions
+  for (int i=0; i<numValuesBoard; i++) {
+    board_val_obj = PyTuple_GetItem(board_tuple_obj, i);
+    board[i] = PyLong_AsLong(board_val_obj);
+  }
+  
+  return 0;
+}
+
+int extract_move(int *move, PyObject* move_tuple_obj) {
+  long numValuesMove;
+  numValuesMove = PyTuple_Size(move_tuple_obj);
+  if (numValuesMove != 2) {
+    PyErr_SetString(QuackError, "Move tuple must have exactly 2 entries");
+    return 1;
+  }
+  PyObject* move_val_obj;
+  for (int i=0; i<numValuesMove; i++) {
+    move_val_obj = PyTuple_GetItem(move_tuple_obj, i);
+    move[i] = PyLong_AsLong(move_val_obj);
+  }
+  
+  return 0;
+}
+
+static PyObject*
+quack_is_move_valid(PyObject *self, PyObject *args) {
+  int board[26];
+  int player;
+  int face_value;
+  int move[2];
+  
+  PyObject* board_tuple_obj;
+  PyObject* move_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!iiO!",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player,
+			 &face_value,
+			 &PyTuple_Type, &move_tuple_obj))
+    return NULL;
+
+ if (extract_board(board, board_tuple_obj)) return NULL;
+ if (extract_move(move, move_tuple_obj))    return NULL;
+ 
+ if (is_move_valid(board, player, face_value, move)) Py_RETURN_TRUE;
+ else                                                Py_RETURN_FALSE;
+}
+
+static PyObject*
+quack_idxs_with_checkers_of_player(PyObject *self, PyObject *args) {
+
+  int board[26];
+  int player;
+  
+  int* idxs;
+  
+  PyObject* board_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!i",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player))
+    return NULL;
+
+  if (extract_board(board, board_tuple_obj)) return NULL;
+  
+  idxs = idxs_with_checkers_of_player(board, player);
+  PyObject* idxs_list = PyList_New(idxs[0]);
+  
+  for (int i = 0; i < idxs[0]; i++) {
+    PyList_SetItem(idxs_list, i, Py_BuildValue("i", idxs[i+1]));
+  }
+  free(idxs);
+
+  PyObject *result = Py_BuildValue("O", idxs_list);
+  Py_DECREF(idxs_list);
+  
+  return result;
+}
+
+static PyObject*
+quack_do_move(PyObject *self, PyObject *args) {
+  int board[26];
+  int player;
+  int move[2];
+  
+  PyObject* board_tuple_obj;
+  PyObject* move_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!iO!",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player,
+			 &PyTuple_Type, &move_tuple_obj))
+    return NULL;
+
+  if (extract_board(board, board_tuple_obj)) return NULL;
+  if (extract_move(move, move_tuple_obj))    return NULL;
+
+  do_move(board, player, move);
+  PyObject* board_tuple = store_board_to_pytuple(board, 26);
+
+  // This is shaky
+  Py_DECREF(board);
+
+  PyObject *result = Py_BuildValue("O", board_tuple);
+  Py_DECREF(board_tuple);
+  
+  return result;
+}
+
+static PyObject*
+quack_calc_moves(PyObject *self, PyObject *args) {
+  int board[26];
+  int player;
+  int face_value;
+  
+  PyObject* board_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!ii",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player,
+			 &face_value))
+    return NULL;
+
+  if (extract_board(board, board_tuple_obj)) return NULL;
+
+  board_list boards = calc_moves(board, player, face_value);
+  PyObject* boards_list = PyList_New(boards.size);
+  
+  for (int i = 0; i < boards.size; i++) {
+    if (PyList_SetItem(boards_list, i, boards.list[i])) {
+      printf("list insertion failed at index %i\n",i);
+      abort();
+    }
+  }
+  
+  PyObject *result = Py_BuildValue("O", boards_list);
+  Py_DECREF(boards_list);
+  
+  return result;
+}
+
+static PyObject*
+quack_board_features_quack_fat(PyObject *self, PyObject *args) {
+  int board[26];
+  int player;
+  
+  PyObject* board_tuple_obj;
+  
+  if (! PyArg_ParseTuple(args, "O!i",
+			 &PyTuple_Type, &board_tuple_obj,
+			 &player))
+    return NULL;
+
+  if (extract_board(board, board_tuple_obj)) return NULL;
+
+  int* new_board = board_features_quack_fat(board, player);
+  PyObject* board_tuple = store_board_to_pytuple(new_board, 30);
+  free(new_board);
+
+  PyObject *result = Py_BuildValue("O", board_tuple);
+  Py_DECREF(board_tuple);
+  
+  return result;
+}
+
+
+static PyMethodDef quack_methods[] = {
+  {
+    "is_move_valid", quack_is_move_valid, METH_VARARGS,
+    "Evaluates the validity of the proposed move."
+  },
+  {
+    "idxs_with_checkers_of_player", quack_idxs_with_checkers_of_player, METH_VARARGS,
+    "Returns a list of indexes with checkers of the specified player"
+  },
+  {
+    "do_move", quack_do_move, METH_VARARGS,
+    "Returns the board after doing the specified move"
+  },
+  {
+    "calc_moves", quack_calc_moves, METH_VARARGS,
+    "Calculates all legal moves from board with specified face value"
+  },
+  {
+    "board_features_quack_fat", quack_board_features_quack_fat, METH_VARARGS,
+    "Transforms a board to the quack-fat board representation"
+  },
+  {NULL, NULL, 0, NULL}
+};
+
+static struct PyModuleDef quack_definition = {
+  PyModuleDef_HEAD_INIT,
+  "quack",
+  "A Python module that provides various useful Backgammon-related functions.",
+  -1,
+  quack_methods
+};
+
+PyMODINIT_FUNC PyInit_quack(void) {
+  PyObject* module;
+
+  module = PyModule_Create(&quack_definition);
+  if (module == NULL)
+    return NULL;
+    
+  QuackError = PyErr_NewException("quack.error", NULL, NULL);
+  Py_INCREF(QuackError);
+  PyModule_AddObject(module, "error", QuackError);
+  
+  return module;
+}
--- a/quack/setup.py
+++ b/quack/setup.py
@ -0,0 +1,9 @@
+from distutils.core import setup, Extension
+
+quack = Extension('quack',
+                  sources = ['quack.c'])
+
+setup (name = 'quack',
+       version = '0.1',
+       description = 'Quack Backgammon Tools',
+       ext_modules = [quack])
--- a/report_docs.txt
+++ b/report_docs.txt
@ -0,0 +1,28 @@
+<christoffer> Alexander og jeg skrev noget af vores bachelorprojekt om til C her i fredags.
+<christoffer> Man skal virkelig passe på sine hukommelsesallokeringer.
+<Jmaa> Ja, helt klart.
+<christoffer> Jeg fandt et memory leak, der lækkede 100 MiB hukommelse i sekundet.
+<Jmaa> Hvilken del blev C-ificeret?
+<Jmaa> Damned
+<christoffer> Årsagen var at vi gav et objekt med tilbage til Python uden at dekrementere dets ref-count, så fortolkeren stadig troede at nogen havde brug for det.
+<christoffer> Den del af spillogikken, der tjekker om træk er gyldige.
+<christoffer> Det bliver kaldt ret mange tusinde gange pr. spil, så vi tænkte at der måske kunne være lidt optimering at hente i at omskrive det til C.
+<Jmaa> Ok, så I har ikke selv brugt alloc og free. Det er alligevel noget.
+<christoffer> Metoden selv blev 7 gange hurtigere!
+<Jmaa> Wow!
+<christoffer> Jo. Det endte vi også med at gøre.
+<christoffer> Vi havde brug for lister af variabel størrelse. Det endte med en struct med et "size" felt og et "list" felt.
+<Jmaa> Inkluderer det speedup, frem og tilbagen mellem C og python?
+<christoffer> Det burde det gøre, ja!
+<Jmaa> Gjorde det nogen stor effekt for hvor hurtigt I kan evaluere?
+<christoffer> Jeg tror ikke at der er særligt meget "frem og tilbage"-stads. Det ser ud til at det kode man skriver bliver kastet ret direkte ind i fortolkeren.
+<christoffer> Det gjorde en stor forskel for når vi laver 1-ply.
+<christoffer> "ply" er hvor mange træk man kigger fremad.
+<christoffer> Så kun at kigge på det umiddelbart næste træk er 0-ply, hvilket er det vi har gjort indtil nu
+<christoffer> 1-ply var for langsomt. Det tog ca. 6-7 sekunder at evaluere ét træk.
+<christoffer> Alexander lavede lidt omskrivninger, så TensorFlow udregnede det hurtigere og fik det ned på ca. 3-4 sekunder *pr. spil*.
+<christoffer> Så skrev vi noget af det om til C, og nu er vi så på ca. 2 sekunder pr. spil med 1-ply, hvilket er ret vildt.
+<christoffer> Det er så godt at Python-fortolkeren kan udvides med C!
+<christoffer> caspervk, kan I optimere jeres bachelorprojekt med et par C-moduler?
+<Jmaa> Det er en hel lille sektion til rapporten det der.
+<christoffer> Yeah. Kopierer bare det her verbatim ind.
--- a/requirements.txt
+++ b/requirements.txt
@ -16,8 +16,8 @@ pyparsing==2.2.0
 python-dateutil==2.7.2
 pytz==2018.3
 six==1.11.0
-tensorboard==1.6.0
-tensorflow==1.6.0
+tensorboard==1.8.0
+tensorflow==1.8.0
 termcolor==1.1.0
 Werkzeug==0.14.1
 pygame==1.9.3
--- a/tensorflow_impl_tests/eager_main.py
+++ b/tensorflow_impl_tests/eager_main.py
@ -0,0 +1,94 @@
+import time
+import numpy as np
+import tensorflow as tf
+from board import Board
+import tensorflow.contrib.eager as tfe
+
+
+tf.enable_eager_execution()
+xavier_init = tf.contrib.layers.xavier_initializer()
+
+
+
+opt = tf.train.MomentumOptimizer(learning_rate=0.1, momentum=1)
+
+output_size = 1
+hidden_size = 40
+input_size = 30
+
+
+model = tf.keras.Sequential([
+    tf.keras.layers.Dense(40, activation="sigmoid", kernel_initializer=tf.constant_initializer(-2), input_shape=(1,input_size)),
+    tf.keras.layers.Dense(1, activation="sigmoid", kernel_initializer=tf.constant_initializer(0.2))
+])
+
+
+# tfe.Saver(model.variables).restore(tf.train.latest_checkpoint("./"))
+
+input = [0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0]
+
+
+
+all_input = np.array([Board.board_features_quack_fat(input, 1) for _ in range(20)])
+
+
+single_in = Board.board_features_quack_fat(input, 1)
+
+
+start = time.time()
+
+all_predictions = model.predict_on_batch(all_input)
+
+
+learning_rate = 0.1
+
+with tf.GradientTape() as tape:
+    value = model(single_in)
+
+
+print("Before:", value)
+
+grads = tape.gradient(value, model.variables)
+print("/"*40,"model_variables","/"*40)
+print(model.variables)
+print("/"*40,"grads","/"*40)
+print(grads)
+
+difference_in_values = tf.reshape(tf.subtract(0.9, value, name='difference_in_values'), [])
+
+for grad, train_var in zip(grads, model.variables):
+    backprop_calc = 0.1 * difference_in_values * grad
+    train_var.assign_add(backprop_calc)
+
+value = model(single_in)
+print("/"*40,"model_variables","/"*40)
+print(model.variables)
+print("After:", value)
+
+
+# # grads = [0.1*val-np.random.uniform(-1,1)+grad for grad, trainable_var in zip(grads, model.variables)]
+#
+# # print(model.variables[0][0])
+# weights_before = model.weights[0]
+#
+# start = time.time()
+# #[trainable_var.assign_add(0.1*val-0.3+grad) for grad, trainable_var in zip(grads, model.variables)]
+#
+# start = time.time()
+# for gradient, trainable_var in zip(grads, model.variables):
+#     backprop_calc = 0.1 * (0.9 - val) * gradient
+#     trainable_var.assign_add(backprop_calc)
+#
+# # opt.apply_gradients(zip(grads, model.variables))
+#
+# print(time.time() - start)
+#
+# print(model(single_in))
+#
+# vals = model.predict_on_batch(all_input)
+# vals = list(vals)
+# vals[3] = 4
+# print(vals)
+# print(np.argmax(np.array(vals)))
+
+# tfe.Saver(model.variables).save("./tmp_ckpt")
--- a/tensorflow_impl_tests/normal_main.py
+++ b/tensorflow_impl_tests/normal_main.py
@ -0,0 +1,67 @@
+import tensorflow as tf
+import numpy as np
+import time
+
+class Everything:
+
+    def __init__(self):
+
+        self.output_size = 1
+        self.hidden_size = 40
+        self.input_size = 30
+
+        self.input = tf.placeholder('float', [1, self.input_size])
+
+        xavier_init = tf.contrib.layers.xavier_initializer()
+
+
+        W_1 = tf.get_variable("w_1", (self.input_size, self.hidden_size),
+                              initializer=tf.constant_initializer(-2))
+        W_2 = tf.get_variable("w_2", (self.hidden_size, self.output_size),
+                              initializer=tf.constant_initializer(0.2))
+
+        b_1 = tf.get_variable("b_1", (self.hidden_size,),
+                              initializer=tf.zeros_initializer)
+        b_2 = tf.get_variable("b_2", (self.output_size,),
+                              initializer=tf.zeros_initializer)
+
+        value_after_input = tf.sigmoid(tf.matmul(self.input, W_1) + b_1, name='hidden_layer')
+
+        self.value = tf.sigmoid(tf.matmul(value_after_input, W_2) + b_2, name='output_layer')
+
+        apply_gradients = []
+
+
+        trainable_vars = tf.trainable_variables()
+        gradients = tf.gradients(self.value, trainable_vars)
+
+        difference_in_values = tf.reshape(tf.subtract(0.9, self.value, name='difference_in_values'), [])
+
+        with tf.variable_scope('apply_gradients'):
+            for gradient, trainable_var in zip(gradients, trainable_vars):
+                backprop_calc = 0.1 * difference_in_values * gradient
+                grad_apply = trainable_var.assign_add(backprop_calc)
+                apply_gradients.append(grad_apply)
+
+
+        self.training_op = tf.group(*apply_gradients, name='training_op')
+
+
+
+    def eval(self):
+        input = np.array([0, 2, 0, 0, 0, 0, -5, 0, -3, 0, 0, 0, 5, -5, 0, 0, 0, 3, 0, 5, 0, 0, 0, 0, -2, 0, 0, 0, 1, 0])
+        start = time.time()
+        sess = tf.Session()
+        sess.run(tf.global_variables_initializer())
+        for i in range(20):
+            val = sess.run(self.value, feed_dict={self.input: input.reshape(1,-1)})
+        print(time.time() - start)
+        print(val)
+        sess.run(self.training_op, feed_dict={self.input: input.reshape(1,-1)})
+        val = sess.run(self.value, feed_dict={self.input: input.reshape(1, -1)})
+        print(val)
+
+everything = Everything()
+everything.eval()
+
+
Author	SHA1	Message	Date
Alexander Munch-Hansen	ea4efc5a2b	Updated server code.	2018-06-07 21:36:06 +02:00
Christoffer Müller Madsen	26c0b469eb	restore restore_model	2018-05-22 20:49:10 +02:00
Alexander Munch-Hansen	f170bad9b1	tesauro fat and diffs in values	2018-05-22 15:39:14 +02:00
Christoffer Müller Madsen	6e061171da	rm TODO	2018-05-22 15:38:04 +02:00
Christoffer Müller Madsen	40c228ef01	pubeval tests	2018-05-22 15:36:23 +02:00
Christoffer Müller Madsen	c2c6c89e9f	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!8	2018-05-22 13:16:10 +00:00
Christoffer Müller Madsen	b7708b3675	train-evaluate-save	2018-05-22 15:15:36 +02:00
Christoffer Müller Madsen	bad870c27a	update 0-ply-tests	2018-05-22 15:15:15 +02:00
Christoffer Müller Madsen	653d6e30a8	add missing comma	2018-05-22 15:12:47 +02:00
Christoffer Müller Madsen	7e51b44e33	Merge branch 'experimentation' into 'master' tesauro fat and diffs in values See merge request Pownie/backgammon!7	2018-05-22 13:12:10 +00:00
Christoffer Müller Madsen	1fd6c35baa	Merge branch 'master' into 'experimentation' # Conflicts: # main.py	2018-05-22 13:11:43 +00:00
Alexander Munch-Hansen	d426c1c3b5	tesauro fat and diffs in values	2018-05-22 15:10:41 +02:00
Christoffer Müller Madsen	5ab144cffc	add git commit status to all logs	2018-05-22 14:44:13 +02:00
Christoffer Müller Madsen	cef8e54709	Merge branch 'master' of gitfub.space:Pownie/backgammon	2018-05-22 14:37:46 +02:00
Christoffer Müller Madsen	2efbc446f2	log git commit status in evaluation logs	2018-05-22 14:37:27 +02:00
Christoffer Müller Madsen	c54f7aca24	Merge branch 'experimentation' into 'master' Experimentation See merge request Pownie/backgammon!6	2018-05-22 12:36:37 +00:00
Alexander Munch-Hansen	c31bc39780	More server	2018-05-22 00:26:32 +02:00
Alexander Munch-Hansen	6133cb439f	Merge remote-tracking branch 'origin/experimentation' into experimentation	2018-05-20 20:15:57 +02:00
Alexander Munch-Hansen	5acd79b6da	Slight modification to move calculation	2018-05-20 19:43:28 +02:00
=	b11e783b30	add 0-ply-tests	2018-05-20 18:50:28 +02:00
Christoffer Müller Madsen	f834b10e02	remove unnecessary print	2018-05-20 16:52:05 +02:00
Christoffer Müller Madsen	72f01a2a2d	remove dependency on yaml	2018-05-20 16:03:58 +02:00
Alexander Munch-Hansen	d14e6c5994	Everything might work, except for quad, that might be bugged.	2018-05-20 00:38:13 +02:00
Alexander Munch-Hansen	a266293ecd	Stuff is happening, moving is better!	2018-05-19 22:01:55 +02:00
Alexander Munch-Hansen	e9a46c79df	server and stuff	2018-05-19 14:12:13 +02:00
Alexander Munch-Hansen	816cdfae00	fix and clean	2018-05-18 14:55:10 +02:00
Christoffer Müller Madsen	ff9664eb38	Merge branch 'eager_eval' into 'master' Eager eval See merge request Pownie/backgammon!5	2018-05-18 12:06:12 +00:00
Alexander Munch-Hansen	3e379b40c4	Accidentally added a '5' in the middle of a variable.	2018-05-16 00:20:54 +02:00
Alexander Munch-Hansen	90fad334b9	More optimizations.	2018-05-15 23:37:35 +02:00
Alexander Munch-Hansen	a77c13a0a4	1-ply runs even faster.	2018-05-15 19:29:27 +02:00
Alexander Munch-Hansen	260c32d909	oiuhhiu	2018-05-15 18:16:44 +02:00
Alexander Munch-Hansen	00974b0f11	Added '--play' flag, so you can now play against the ai.	2018-05-14 13:07:48 +02:00
Alexander Munch-Hansen	2c02689577	Merge remote-tracking branch 'origin/eager_eval' into eager_eval	2018-05-13 23:55:02 +02:00
Alexander Munch-Hansen	926a331df0	Some flags from main.py is gone, rolls now allow a face_value of 0 yet again and it is possible to play against the ai. There is no flag for this yet, so this has to be added.	2018-05-13 23:54:13 +02:00
Christoffer Müller Madsen	d932663519	add explanation of ply speedup	2018-05-13 22:26:24 +02:00
Christoffer Müller Madsen	2312c9cb2a	Merge branch 'eager_eval' of gitfub.space:Pownie/backgammon into eager_eval	2018-05-12 15:19:12 +02:00
Christoffer Müller Madsen	9f1bd56c0a	fix bear_off bug; addtional tests and additional fixes	2018-05-12 15:18:52 +02:00
Alexander Munch-Hansen	ba4ef86bb5	Board rep can now be inferred from file after being given once. We can also evaluate multiple times by using the flag "--repeat-eval". The flag defaults to 1, if not provided.	2018-05-12 12:14:47 +02:00
Christoffer Müller Madsen	c3f5e909d6	flip is back	2018-05-11 21:47:48 +02:00
Christoffer Müller Madsen	1aa9cf705f	quack without leaks	2018-05-11 21:24:10 +02:00
Christoffer Müller Madsen	383dd7aa4b	code works again; quack gave ~3 times improvement for calc_moves	2018-05-11 20:13:43 +02:00
Christoffer Müller Madsen	93188fe06b	more quack for board	2018-05-11 20:07:27 +02:00
Christoffer Müller Madsen	ffbc98e1a2	quack kind of works	2018-05-11 19:00:39 +02:00
Christoffer Müller Madsen	03e61a59cf	quack	2018-05-11 17:29:22 +02:00
Alexander Munch-Hansen	93224864a4	More comments, backprop have been somewhat tested in the eager_main.py and normal_main.py.	2018-05-11 13:35:01 +02:00
Alexander Munch-Hansen	504308a9af	Yet another input argument, "--ply", 0 for no look-ahead, 1 for a single look-ahead.	2018-05-10 23:22:41 +02:00
Alexander Munch-Hansen	3b57c10b5a	Saves calling tf.reduce_mean on all values once.	2018-05-10 22:57:27 +02:00
Christoffer Müller Madsen	4fa10861bb	update TF dependency to 1.8.0	2018-05-10 19:27:51 +02:00
Alexander Munch-Hansen	6131d5b5f4	Added comments for Christoffer!	2018-05-10 19:25:28 +02:00
Alexander Munch-Hansen	1aedc23de1	1-ply now works again.	2018-05-10 19:13:18 +02:00
Alexander Munch-Hansen	2d84cd5a0b	1-ply now works again.	2018-05-10 19:06:53 +02:00
Alexander Munch-Hansen	396d5b036d	All values for boards and all rolls can now be calculated	2018-05-10 18:41:21 +02:00
Alexander Munch-Hansen	4efb229d34	Added a lot of comments	2018-05-10 15:28:33 +02:00
Alexander Munch-Hansen	f2a67ca92e	All board reps should now work as input.	2018-05-10 10:49:25 +02:00
Alexander Munch-Hansen	9cfdd7e2b2	Added a verbosity flag, --verbose, which allows for printing of variables and such.	2018-05-10 10:39:22 +02:00
Alexander Munch-Hansen	6429e0732c	We should now be able to both train and eval as per usual. I've added a file "global_step", which works as the new global_step counter, so we can use it for exp_decay.	2018-05-09 23:15:35 +02:00
Alexander Munch-Hansen	cb7e7b519c	Getting closer to functionality. We're capable of evaluating moves and a rework of global_step has begun, such that we now use episode_count as a way of calculating exp_decay, which have been implemented as a function.	2018-05-09 22:22:12 +02:00
Alexander Munch-Hansen	9a2d87516e	Ongoing rewrite of network to use an eager model. We're now capable of evaluating a list of states with network.py. We can also save and restore models.	2018-05-09 00:33:05 +02:00
Alexander Munch-Hansen	7b308be4e2	Different implementations of different speed	2018-05-07 22:24:47 +02:00